In [2]:
import numpy as np
import pandas as pd
import mediapipe as mp
import cv2
import pytube
import os
import glob
import pickle
import imageio.v2 as imageio
import json
import time
from ffpyplayer.player import MediaPlayer

### Crawling Data  

In [14]:
def download_video(url, res="720p", path="./"):
    yt = pytube.YouTube(url)
    stream = yt.streams.filter(res=res).first()
    stream.download(path)

In [53]:
# [블랙핑크] 요즘 유행인 지글지글 춤 in 영국 대사관 24s
download_video("https://www.youtube.com/watch?v=JfGFx9tDVpc")

### KeyPoint 추출

In [3]:
def frames_to_gif(frames, output_name):
    images = []
    for frame in frames:
        images.append(imageio.imread(frame))
    imageio.mimsave(output_path+output_name+".gif", images)

In [54]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_style = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose

# TODO: OS 모듈로 영상 별로 이미지 디렉토리 만들고 저장 할 수 있게 변경
video_name = "220421 아이브 안유진 직캠 LOVE DIVE (IVE YUJIN FanCam)  @MCOUNTDOWN_2022421.mp4"
video_path = "./" + video_name

# 비디오 로드
cap = cv2.VideoCapture(video_path)
# 프레임 별로 잘린 이미지
frames_path = "./keypoint_extraction/frames/"
# gif
output_path = "./keypoint_extraction/output/"

annotate_frames = []
keypoint_dict = []

# 바로 윈도우 열리고 출력 됨
i = 0
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Empty Frame")
            break
        # 이미지 반전 및 BGR -> RGB
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = pose.process(image)
        
        if results.pose_landmarks is not None:
            annotated_pose_landmarks = {str(j): [lmk.x, lmk.y, lmk.z] for j, lmk in enumerate(results.pose_landmarks.landmark)}
            keypoint_dict.append(annotated_pose_landmarks)
        
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, landmark_drawing_spec=mp_drawing_style.get_default_pose_landmarks_style())
        cv2.imwrite(frames_path+str(i)+".png", image)
        annotate_frames.append(frames_path+str(i)+".png")
        i += 1
        cv2.imshow("Pose KeyPoint Extract: "+video_name, image)
        if cv2.waitKey(5) & 0xFF == 27: break
        
cap.release()
cv2.destroyAllWindows()

# gif 형식으로 저장
frames_to_gif(annotate_frames, video_name)

# 키포인트 json 형식으로 저장
with open(output_path+video_name+"_keypoints.json", "w") as fp:
    json.dump(keypoint_dict, fp)
    
# 대략 2분 정도 걸리는 듯

Empty Frame


  images.append(imageio.imread(frame))


### Pose Estimate

In [42]:
import json
import numpy as np

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose

# 동작의 좌표를 변환하는 메커니즘
# 두 포즈를 비교하는 L2 규제가 효과적이라고 함
def l2_norm(ground_relative_coords, webcam_relative_coords):
	return np.linalg.norm(ground_relative_coords - webcam_relative_coords)

def print_data(ground_points, webcam_points, translation_factors, w, h):
    print(ground_points[str(11)][0:2] * np.array([w, h]) - np.array(list(translation_factors)))
    print(webcam_points[11][0:2]* np.array([w, h]))

def compare_keypoints(ground_points, webcam_points, w, h, translation_factors):
	ground_points_array = []
	webcam_points_array = []
    
	for i in range(len(ground_points)):
        # 일단 공간은 생각 안하고 x, y만 고려
		ground_points_array.append(np.array(ground_points[str(i)])[0:2]* np.array([w, h]) - np.array(list(translation_factors)))
		webcam_points_array.append(np.array(webcam_points[i])[0:2]* np.array([w, h]))

	ground_points_array = np.vstack(ground_points_array)
	webcam_points_array = np.vstack(webcam_points_array)

	return l2_norm(ground_points_array, webcam_points_array)

def connect_points(points, translation_factors, image, image_shape, scale):
    h, w = image_shape
    points_connect_dict = {
        1: [2, 0],
        2: [3],
        3: [7],
        4: [0, 5],
        5: [6],
        6: [8],
        9: [10],
        11: [13],
        12: [11, 14],
        13: [15],
        14: [16],
        15: [21],
        16: [20, 14],
        17: [15],
        18: [20, 16],
        19: [17],
        20: [16],
        22: [16],
        23: [11, 25],
        24: [23, 12],
        25: [27],
        26: [24, 28],
        27: [31, 29],
        28: [30, 32],
        29: [31],
        30: [32],
        32: [28],
    }
    for p in points_connect_dict:
        curr_point = points[str(p)][0:2]*np.array([w, h]) - np.array(list(translation_factors))

        for endpoint in points_connect_dict[p]:
            endpoint = points[str(endpoint)][0:2]*np.array([w, h]) - np.array(list(translation_factors))

            cv2.line(image, (round(curr_point[0]*scale), round(curr_point[1]*scale)), (round(endpoint[0] * scale), round(endpoint[1] * scale)), (0, 0, 255), thickness=10)

    return image

def get_translation_factor(gt, person, h, w):
    x_gt, y_gt = gt['11'][0]*w, gt['11'][1]*h
    x_person, y_person = person[11][0]*w, person[11][1]*h

    if x_person >= x_gt:
        return x_person - x_gt, y_person - y_gt
    elif x_person <= x_gt:
        return x_gt - x_person, y_gt - y_person


def put_text(image, text, h, w):
    image = cv2.putText(img=image, org=(w - 700, 50), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=2, color=(0, 0, 0), text=text, thickness= 3)
    return image

### User Input Test

In [5]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_style = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose

cv2.startWindowThread()
# 비디오 로드
cap = cv2.VideoCapture(0)


video_name = "user_input_test"
# 프레임 별로 잘린 이미지
frames_path = "./keypoint_extraction/frames/"
# gif
output_path = "./keypoint_extraction/output/"

annotate_frames = []
keypoint_dict = []

# 바로 윈도우 열리고 출력 됨
i = 0
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Empty Frame")
            break
        # 이미지 반전 및 BGR -> RGB
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        results = pose.process(image)
        
        if results.pose_landmarks is not None:
            annotated_pose_landmarks = {str(j): [lmk.x, lmk.y, lmk.z] for j, lmk in enumerate(results.pose_landmarks.landmark)}
            keypoint_dict.append(annotated_pose_landmarks)
        
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS, landmark_drawing_spec=mp_drawing_style.get_default_pose_landmarks_style())
        cv2.imwrite(frames_path+str(i)+".png", image)
        annotate_frames.append(frames_path+str(i)+".png")
        i += 1
        cv2.imshow("Pose KeyPoint Extract: "+video_name, image)
        if cv2.waitKey(5) & 0xFF == 27: break
        
cap.release()
cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)

# gif 형식으로 저장
frames_to_gif(annotate_frames, video_name)

# 키포인트 json 형식으로 저장
with open(output_path+video_name+"_keypoints.json", "w") as fp:
    json.dump(keypoint_dict, fp)

1. 원본 영상에서 추출한 키포인트와, 입력으로 입력한 키포인트를 비교하는 방법
2. 추출한 키포인트를 사용자 웹캠 이미지에 덮어 씌우는 방법
3. 1, 2를 합쳐서 연습과 스코어링

사용자 입력을 받는 경우와 영상에서 추출하는 경우 불필요한 부분을 제외할 방법을 고민해봐야 함

In [2]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

output_path = "./"
video_name = "user_input"
video_path = "./"+video_name

# Open WebCam (Suppose Only 2 Maximum WebCam Install)
cv2.startWindowThread()
try:
    cap = cv2.VideoCapture(0)
except:
    cap = cv2.VideoCapture(1)
# 720p
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

keypoint_face = []
# keypoint_left_hand = []
# keypoint_right_hand = []
keypoint_pose = []

# Init Holistic Model    
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Video Error")
            break
        
        # BGR -> RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False
        # Make Detections
        results = holistic.process(image)

        # Collect Keypoint coord.
        if results.face_landmarks is not None:
            annotated_face_landmarks = {str(idx): [lmk.x, lmk.y, lmk.z] for idx, lmk in enumerate(results.face_landmarks.landmark)}
            keypoint_face.append(annotated_face_landmarks)
        # if results.left_hand_landmarks is not None:
        #     annotated_left_hand_landmarks = {str(idx): [lmk.x, lmk.y, lmk.z] for idx, lmk in enumerate(results.left_hand_landmarks.landmark)}
        # if results.right_hand_landmarks is not None:
        #     annotated_right_hand_landmarks = {str(idx): [lmk.x, lmk.y, lmk.z] for idx, lmk in enumerate(results.right_hand_landmarks.landmark)}
        if results.pose_landmarks is not None:
            annotated_pose_landmarks = {str(idx): [lmk.x, lmk.y, lmk.z] for idx, lmk in enumerate(results.pose_landmarks.landmark)}
            keypoint_pose.append(annotated_pose_landmarks)
        
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # Drawing Landmarks on Realtime
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                                  landmark_drawing_spec=mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=1, circle_radius=1),
                                  connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 255, 204), thickness=1, circle_radius=1))
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  landmark_drawing_spec=mp_drawing.DrawingSpec(color=(255, 51, 51), thickness=2, circle_radius=2),
                                  connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 204, 204), thickness=1, circle_radius=1))
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  landmark_drawing_spec=mp_drawing.DrawingSpec(color=(255, 51, 51), thickness=2, circle_radius=2),
                                  connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 204, 204), thickness=1, circle_radius=1))
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                  landmark_drawing_spec=mp_drawing.DrawingSpec(color=(102, 102, 255), thickness=2, circle_radius=2),
                                  connection_drawing_spec=mp_drawing.DrawingSpec(color=(153, 153, 255), thickness=1, circle_radius=1))
        
        cv2.imshow("KeyPoint Extraction", cv2.flip(image, 1))
        
        if cv2.waitKey(10)&0xFF == ord("q"):
            break

cap.release()
cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)

-1

In [36]:
def l2_norm(original_coor, compare_coor):
    return np.linalg.norm(original_coor - compare_coor)

def compare_origin_input(original_coor, compare_coor, w, h):
    ori_arr = []
    com_arr = []

    for i in range(len(original_coor)):
        ori_arr.append(np.array(original_coor[str(i)])[0:2] * np.array([w, h]))
        com_arr.append(np.array(compare_coor[str(i)])[0:2] * np.array([w, h]))
    ori_arr = np.vstack(ori_arr)
    com_arr = np.vstack(com_arr)
    return l2_norm(ori_arr, com_arr)

In [10]:
input_1_path = "../template/[주간아 직캠] IVE YUJIN - LOVE DIVE (아이브 유진 - 러브 다이브) l EP556.mp4_keypoints.json"
input_2_path = "../template/220421 아이브 안유진 직캠 LOVE DIVE (IVE YUJIN FanCam)  @MCOUNTDOWN_2022421.mp4_keypoints.json"

In [45]:
data1 = pd.read_json(input_1_path)
data2 = pd.read_json(input_2_path)

pose_landmark_dict = {
    # Face ###################################
    0: "nose",
    # eyes
    1: "left_eye_inner",
    2: "left_eye",
    3: "left_eye_outer",
    4: "right_eye_inner",
    5: "right_eye",
    6: "right_eye_outer",
    # ears
    7: "left_ear",
    8: "right_ear",
    # mouth
    9: "left_mouth",
    10: "right_mouth",
    # Body ###################################
    # shoulder
    11: "left_shoulder",
    12: "right_shoulder",
    # elbow
    13: "left_elbow",
    14: "right_elbow",
    # wrist
    15: "left_wrist",
    16: "right_wrist",
    # hand
    17: "left_pinky",
    18: "right_pinky",
    19: "left_index",
    20: "right_index",
    21: "left_thumb",
    22: "right_thumb",
    # hip
    23: "left_hip",
    24: "right_hip",
    # knee
    25: "left_knee",
    26: "right_knee",
    # ankle
    27: "left_ankle",
    28: "right_ankle",
    # heel
    29: "left_heel",
    30: "right_heel",
    # foot
    31: "left_foot_index",
    32: "right_foot_index"
}

In [27]:
data1.columns = pose_landmark_dict.values()
data2.columns = pose_landmark_dict.values()

In [56]:
data1.loc[0, 0]

[0.470056474208831, 0.26909977197647, -0.031963795423507003]

In [57]:
data2.loc[0, 0]

[0.28000330924987704, 0.13502180576324402, -0.007499510888010001]

In [60]:
idx = min(data1.shape, data2.shape)[0]

In [71]:
data2.to_numpy()[0][0]

[0.28000330924987704, 0.13502180576324402, -0.007499510888010001]

In [35]:
dance_video.get(cv2.CAP_PROP_FPS)

29.97002997002997

In [4]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

url = "./220421 아이브 안유진 직캠 LOVE DIVE (IVE YUJIN FanCam)  @MCOUNTDOWN_2022421.mp4"

dance_video = cv2.VideoCapture(url)
# Open WebCam (Suppose Only 2 Maximum WebCam Install)
try:
    user_video = cv2.VideoCapture(0)
except:
    user_video = cv2.VideoCapture(1)
# 720p
# dance_video.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
# dance_video.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
user_video.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
user_video.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

# For Mac & Linux err
cv2.startWindowThread()
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while user_video.isOpened():
        dance_ret, dance_image = dance_video.read()
        user_ret, user_image = user_video.read()
        if not dance_ret:
            break
        
        dance_image = cv2.cvtColor(dance_image, cv2.COLOR_BGR2RGB)
        results = holistic.process(dance_image)
        dance_image = cv2.cvtColor(dance_image, cv2.COLOR_RGB2BGR)
        
        # mp_drawing.draw_landmarks(user_image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
        #                           landmark_drawing_spec=mp_drawing.DrawingSpec(color=(255, 255, 0), thickness=1, circle_radius=1),
        #                           connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 255, 204), thickness=1, circle_radius=1))
        mp_drawing.draw_landmarks(user_image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  landmark_drawing_spec=mp_drawing.DrawingSpec(color=(255, 51, 51), thickness=2, circle_radius=2),
                                  connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 204, 204), thickness=1, circle_radius=1))
        mp_drawing.draw_landmarks(user_image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                  landmark_drawing_spec=mp_drawing.DrawingSpec(color=(255, 51, 51), thickness=2, circle_radius=2),
                                  connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 204, 204), thickness=1, circle_radius=1))
        mp_drawing.draw_landmarks(user_image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                  landmark_drawing_spec=mp_drawing.DrawingSpec(color=(102, 102, 255), thickness=2, circle_radius=2),
                                  connection_drawing_spec=mp_drawing.DrawingSpec(color=(153, 153, 255), thickness=1, circle_radius=1))
        
 
        h_output = np.hstack((cv2.flip(dance_image, 1), cv2.flip(user_image, 1)))
        cv2.imshow("Just DDance!", h_output)
        if cv2.waitKey(1)==ord("q"):
            break

dance_video.release()
user_video.release()
# For Mac & Linux err
cv2.destroyAllWindows()
cv2.waitKey(1)      

-1

In [7]:
dance_video.release()
user_video.release()