# 수화 동영상 파일 -> 텐서 파일(+ 미디어파이프 처리 영상 파일)

In [1]:
import os
import torch
import mediapipe as mp
import numpy as np
import cv2

In [2]:
def make_tensro_and_video(input_video_path, output_tensor_path, output_video_path, video_save=False):
    
    # Prepare DrawingSpec
    mp_drawing = mp.solutions.drawing_utils 
    drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

    # Config holistic
    mp_holistic = mp.solutions.holistic
    holistic = mp_holistic.Holistic(
        min_detection_confidence=0.5, min_tracking_confidence=0.5)
    
    # 영상 가져오기
    cap = cv2.VideoCapture(input_video_path)

    # 영상 저장 1
    if video_save:
        fourcc = cv2.VideoWriter_fourcc(*'MP4V') # 영상 포맷
        out = cv2.VideoWriter(output_video_path, fourcc, 30.0, (1280,720)) # 비디오 경로, 영상 포맷, 초당 프레임, width*height 

    # 영상 width, height 설정
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

    # 각 요소(왼손, 오른손, 얼굴, 포즈) 좌표 저장 리스트
    left_hand_lists = []
    right_hand_lists = []
    face_lists = []
    pose_lists = []

    while cap.isOpened():

        success, image = cap.read() 

        if not success: # 동영상 끝
            break

        # Flip the image horizontally for a later selfie-view display, and convert
        # the BGR image to RGB.
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        
        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        image.flags.writeable = False
        results = holistic.process(image)

        # Draw landmark annotation on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        mp_drawing.draw_landmarks(
          image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
        mp_drawing.draw_landmarks(
          image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(
          image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
        mp_drawing.draw_landmarks(
          image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)

        # 왼손 랜드마크 리스트에 저장
        if results.left_hand_landmarks: # 영상에 왼손이 잡힐 경우
            left_hand_list = []
            for lm in results.left_hand_landmarks.landmark:
                left_hand_list.append([lm.x, lm.y, lm.z])
            left_hand_lists.append(left_hand_list)
        else: # 영상에 오른손이 잡히지 않을 경우
            left_hand_lists.append([[0] * 3 for _ in range(21)])
              
        # 오른손 랜드마크 리스트에 저장
        if results.right_hand_landmarks: # 영상에 오른손이 잡힐 경우
            right_hand_list = []
            for lm in results.right_hand_landmarks.landmark:
                right_hand_list.append([lm.x, lm.y, lm.z])
            right_hand_lists.append(right_hand_list)
        else: # 영상에 오른손이 잡히지 않을 경우
            right_hand_lists.append([[0] * 3 for _ in range(21)])

        # 얼굴 랜드마크 리스트에 저장
        if results.face_landmarks: # 영상에 얼굴이 잡힐 경우
            face_list = []
            for lm in results.face_landmarks.landmark:
                face_list.append([lm.x, lm.y, lm.z])
            face_lists.append(face_list)
        else: # 영상에 얼굴이 잡히지 않을 경우
            face_lists.append([[0] * 3 for _ in range(468)])

        # 포즈 랜드마크 리스트에 저장
        if results.pose_landmarks: # 영상에 포즈가 잡힐 경우
            pose_list = []
            for lm in results.pose_landmarks.landmark:
                pose_list.append([lm.x, lm.y, lm.z])
            pose_lists.append(pose_list)
        else: # 영상에 포즈가 잡히지 않을 경우
            pose_lists.append([[0] * 3 for _ in range(33)])
            
        # 영상 저장 2 (실질적인 영상 쓰기)
        if video_save:
            out.write(image)

    # 텐서로 변환
    lt = torch.FloatTensor(left_hand_lists)
    rt = torch.FloatTensor(right_hand_lists)
    ft = torch.FloatTensor(face_lists)
    pt = torch.FloatTensor(pose_lists)
    
    # 텐서 저장할 디렉토리 생성
    if not(os.path.isdir(output_tensor_path)):
        os.mkdir(output_tensor_path)
        
    # 텐서 파일 저장
    torch.save(lt, output_tensor_path+"lt.pt")
    torch.save(rt, output_tensor_path+"rt.pt")
    torch.save(ft, output_tensor_path+"ft.pt")
    torch.save(pt, output_tensor_path+"pt.pt")

    
    # 영상 저장 3
    if video_save:
        out.release()
        
    holistic.close()
    cap.release()
    cv2.destroyAllWindows()

In [3]:
# 여러 디렉토리 생성 함수
def mkdirs(file_path_list: list, verbose=True):
    # ex) file_path_list= [".", "output", "images"]
    
    file_path = ""  # 생성할 디렉토리
    for path in file_path_list:
        file_path = os.path.join(file_path, path)
        
        # 디렉토리가 없다면 생성
        if not(os.path.isdir(file_path)):
            # 디렉토리 생성 안내 문구 출력
            if verbose:
                print("No {}".format(file_path))
                print("Make directory {}".format(file_path))
            # 디렉토리 생성
            os.mkdir(file_path)

In [4]:
def convert_videos_to_tensor(input_video_path_list=[".", "videos"],
                             output_tensor_path_list=[".", "output", "tensor"],
                             output_video_path_list=[".", "output", "video"],
                             videos_save=False):
    
    # 디렉토리 풀내임 생성
    input_video_path = os.path.join(*input_video_path_list)
    output_tensor_path = os.path.join(*output_tensor_path_list)
    if videos_save:
        output_video_path = os.path.join(*output_video_path_list)

    # 안내 문구 출력
    print("{:20}{}".format("intput_video_path: ", input_video_path))
    print("{:20}{}".format("output_tensor_path: ", output_tensor_path))
    print("{:20}{}".format("videos_save: ", str(videos_save)))
    if videos_save:
        print("{:20}{}".format("output_video_path: ", output_video_path))
    print()
    
    # 디렉토리 생성
    mkdirs(input_video_path_list)
    mkdirs(output_tensor_path_list)
    if videos_save:
        mkdirs(output_video_path_list)

    # 각 비디오 파일 
    videos = os.listdir(input_video_path)
    videos.sort()

    for i, video in enumerate(videos):
        make_tensor_and_video("./videos/"+sign_video,
                              "./output/tensor/"+sign_video[:-4]+"/",
                              "./output/videos/"+sign_video,
                             videos_save)
        if i%5==0:
            print("{}/{} videos completed".format(i, len(videos)))

In [5]:
# 동영상 개장 3~5초 정도 소요
# ex) 동영상 50개: 4분 소요
convert_videos_to_tensor()

intput_video_path:  ./videos
output_tensor_path: ./output/tensor
videos_save:        False

0/51 videos completed
5/51 videos completed
10/51 videos completed
15/51 videos completed
20/51 videos completed
25/51 videos completed
30/51 videos completed
35/51 videos completed
40/51 videos completed
45/51 videos completed
50/51 videos completed


In [6]:
# shape 확인
print(torch.load("./output/tensor/KETI_SL_0000000002/lt.pt").shape)
print(torch.load("./output/tensor/KETI_SL_0000000002/rt.pt").shape)
print(torch.load("./output/tensor/KETI_SL_0000000002/pt.pt").shape)
print(torch.load("./output/tensor/KETI_SL_0000000002/ft.pt").shape)

torch.Size([160, 21, 3])
torch.Size([160, 21, 3])
torch.Size([160, 33, 3])
torch.Size([160, 468, 3])


In [7]:
# face 좌표 확인
ft = torch.load("./output/tensor/KETI_SL_0000000002/ft.pt")
print(ft)

tensor([[[ 0.4906,  0.2412, -0.0145],
         [ 0.4891,  0.2166, -0.0271],
         [ 0.4897,  0.2243, -0.0141],
         ...,
         [ 0.4974,  0.1712, -0.0031],
         [ 0.5260,  0.1595,  0.0065],
         [ 0.5284,  0.1572,  0.0066]],

        [[ 0.4911,  0.2400, -0.0150],
         [ 0.4897,  0.2175, -0.0268],
         [ 0.4902,  0.2248, -0.0140],
         ...,
         [ 0.4969,  0.1719, -0.0024],
         [ 0.5247,  0.1603,  0.0076],
         [ 0.5271,  0.1578,  0.0077]],

        [[ 0.4913,  0.2394, -0.0154],
         [ 0.4902,  0.2161, -0.0269],
         [ 0.4905,  0.2238, -0.0142],
         ...,
         [ 0.4972,  0.1717, -0.0020],
         [ 0.5247,  0.1602,  0.0083],
         [ 0.5271,  0.1577,  0.0084]],

        ...,

        [[ 0.4902,  0.2401, -0.0154],
         [ 0.4884,  0.2171, -0.0269],
         [ 0.4892,  0.2247, -0.0143],
         ...,
         [ 0.4957,  0.1719, -0.0020],
         [ 0.5233,  0.1599,  0.0076],
         [ 0.5257,  0.1574,  0.0077]],

        [[