<a href="https://colab.research.google.com/github/okana2ki/intro-to-AI/blob/main/pose_land_video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ダンス動画ファイルからランドマーク検出（旧モデル）

In [None]:
!pip install mediapipe opencv-python

## 音声なし版

In [None]:
import cv2
import mediapipe as mp

# MediaPipe Poseの初期化
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_drawing = mp.solutions.drawing_utils

# 動画ファイルの読み込み
input_video_path = '/content/drive/MyDrive/Colab_files/dance-sample.mp4'
cap = cv2.VideoCapture(input_video_path)

# 動画ファイルのプロパティ取得
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# 出力動画ファイルの設定
output_video_path = '/content/drive/MyDrive/Colab_files/dance-sample-l.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # BGR画像をRGBに変換
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # ポーズランドマークの検出
    results = pose.process(rgb_frame)

    # ランドマークの描画
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    # フレームを書き込み
    out.write(frame)

cap.release()
out.release()
pose.close()

print(f'Output video saved to {output_video_path}')

## 音声あり版

In [None]:
!pip install mediapipe opencv-python moviepy
!sudo apt-get install ffmpeg

In [None]:
import cv2
import mediapipe as mp
from moviepy.editor import VideoFileClip, AudioFileClip

# MediaPipe Poseの初期化
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_drawing = mp.solutions.drawing_utils

# 入力動画ファイルのパス
input_video_path = '/content/drive/MyDrive/Colab_files/dance-sample.mp4'

# MoviePyを使って動画と音声を読み込み
clip = VideoFileClip(input_video_path)
audio = clip.audio

# OpenCVを使って動画ファイルを読み込み
cap = cv2.VideoCapture(input_video_path)

# 動画ファイルのプロパティ取得
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# 出力動画ファイルの設定
output_video_path = 'output_with_landmarks.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # BGR画像をRGBに変換
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # ポーズランドマークの検出
    results = pose.process(rgb_frame)

    # ランドマークの描画
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    # フレームを書き込み
    out.write(frame)

cap.release()
out.release()
pose.close()

# MoviePyを使って映像と音声を結合
final_clip = VideoFileClip(output_video_path)
final_clip = final_clip.set_audio(audio)
final_clip.write_videofile("final_output_with_audio.mp4", codec='libx264', audio_codec='aac')

print('Output video saved as final_output_with_audio.mp4')

## ワールド座標のファイル保存を追加

In [None]:
import cv2
import mediapipe as mp
import csv
from moviepy.editor import VideoFileClip

# MediaPipe Poseの初期化
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(model_complexity=1, enable_segmentation=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# 入力動画ファイルのパス
input_video_path = '/content/drive/MyDrive/Colab_files/dance-sample.mp4'

# MoviePyを使って動画と音声を読み込み
clip = VideoFileClip(input_video_path)
audio = clip.audio

# OpenCVを使って動画ファイルを読み込み
cap = cv2.VideoCapture(input_video_path)

# 動画ファイルのプロパティ取得
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# 出力動画ファイルの設定
output_video_path = 'output_with_landmarks.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# CSVファイルの設定
csv_file_path = 'landmarks_world_coordinates.csv'
csv_file = open(csv_file_path, mode='w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['frame', 'landmark_index', 'x', 'y', 'z', 'visibility'])

frame_index = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # BGR画像をRGBに変換
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # ポーズランドマークの検出
    results = pose.process(rgb_frame)

    # ランドマークの描画
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        # ランドマークのワールド座標を取得
        for i, landmark in enumerate(results.pose_world_landmarks.landmark):
            csv_writer.writerow([frame_index, i, landmark.x, landmark.y, landmark.z, landmark.visibility])

    # フレームを書き込み
    out.write(frame)
    frame_index += 1

cap.release()
out.release()
pose.close()
csv_file.close()

# MoviePyを使って映像と音声を結合
final_clip = VideoFileClip(output_video_path)
final_clip = final_clip.set_audio(audio)
final_clip.write_videofile("final_output_with_audio.mp4", codec='libx264', audio_codec='aac')

print('Output video saved as final_output_with_audio.mp4')
print(f'Landmark world coordinates saved as {csv_file_path}')

## num_poses=2と指定できるよう、変更しようとしてデバッグ中

In [None]:
!pip install mediapipe



In [None]:
import cv2
import mediapipe as mp
from moviepy.editor import VideoFileClip

# MediaPipe Poseの初期化
from mediapipe.tasks.python import vision
from mediapipe.tasks.python.vision import PoseLandmarker, PoseLandmarkerOptions, RunningMode
from mediapipe.framework.formats import landmark_pb2

# モデルファイルのパスを指定
model_path = '/content/drive/MyDrive/Colab_files/pose_landmarker_full.task'

# Set up the options with num_poses
options = PoseLandmarkerOptions(
    base_options=vision.BaseOptions(model_asset_path=model_path),
    running_mode=RunningMode.VIDEO,
    num_poses=2  # Set the number of poses to detect
)

# Initialize drawing utilities
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Create the pose landmarker instance
with PoseLandmarker.create_from_options(options) as landmarker:
    # 入力動画ファイルのパス
    input_video_path = '/content/drive/MyDrive/Colab_files/dance-sample.mp4'

    # MoviePyを使って動画と音声を読み込み
    clip = VideoFileClip(input_video_path)
    audio = clip.audio

    # OpenCVを使って動画ファイルを読み込み
    cap = cv2.VideoCapture(input_video_path)

    # 出力動画ファイルの設定
    output_video_path = 'output_with_poses.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # フレームをBGRからRGBに変換
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

        # タイムスタンプを取得し、ミリ秒からマイクロ秒に変換
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC) * 1000)

        # ポーズ検出を実行
        pose_landmarker_result = landmarker.detect_for_video(mp_image, timestamp_ms)

        # ポーズの描画
        for pose_landmarks in pose_landmarker_result.pose_landmarks:
            # Convert pose_landmarks to a NormalizedLandmarkList
            landmark_list = landmark_pb2.NormalizedLandmarkList(
                landmark=pose_landmarks
            )
            mp_drawing.draw_landmarks(
                frame,
                landmark_list,
                mp_pose.POSE_CONNECTIONS
            )

        # フレームを書き込み
        out.write(frame)

        # フレームを表示（オプション）
        cv2.imshow('MediaPipe Pose', frame)
        if cv2.waitKey(5) & 0xFF == 27:
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    # MoviePyを使って出力動画に音声を追加
    final_clip = VideoFileClip(output_video_path).set_audio(audio)
    final_clip.write_videofile('final_output_with_audio.mp4')

AttributeError: module 'mediapipe.tasks.python.vision' has no attribute 'BaseOptions'

In [None]:
import cv2
import mediapipe as mp
from moviepy.editor import VideoFileClip

# MediaPipe Poseの初期化
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# モデルファイルのパスを指定
model_path = '/content/drive/MyDrive/Colab_files/pose_landmarker_full.task'

# Set up the options with num_poses
options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.VIDEO,
    num_poses=2  # Set the number of poses to detect
)

# Initialize drawing utilities
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Create the pose landmarker instance
with PoseLandmarker.create_from_options(options) as landmarker:
    # 入力動画ファイルのパス
    input_video_path = '/content/drive/MyDrive/Colab_files/dance-sample.mp4'

    # MoviePyを使って動画と音声を読み込み
    clip = VideoFileClip(input_video_path)
    audio = clip.audio

    # OpenCVを使って動画ファイルを読み込み
    cap = cv2.VideoCapture(input_video_path)

    # 出力動画ファイルの設定
    output_video_path = 'output_with_poses.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # フレームをBGRからRGBに変換
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

        # タイムスタンプを取得し、ミリ秒からマイクロ秒に変換
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC) * 1000)

        # ポーズ検出を実行
        pose_landmarker_result = landmarker.detect_for_video(mp_image, timestamp_ms)

        # ポーズの描画
        for pose_landmarks in pose_landmarker_result.pose_landmarks:
            # Convert pose_landmarks to a NormalizedLandmarkList
            landmark_list = mp.tasks.vision.core.landmark_pb2.NormalizedLandmarkList(
                landmark=pose_landmarks
            )
            mp_drawing.draw_landmarks(
                frame,
                landmark_list,
                mp_pose.POSE_CONNECTIONS
            )

        # フレームを書き込み
        out.write(frame)

        # フレームを表示（オプション）
        cv2.imshow('MediaPipe Pose', frame)
        if cv2.waitKey(5) & 0xFF == 27:
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    # MoviePyを使って出力動画に音声を追加
    final_clip = VideoFileClip(output_video_path).set_audio(audio)
    final_clip.write_videofile('final_output_with_audio.mp4')

AttributeError: module 'mediapipe.tasks.python.vision' has no attribute 'core'

In [None]:
import cv2
import mediapipe as mp
from moviepy.editor import VideoFileClip

# MediaPipe Poseの初期化
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# モデルファイルのパスを指定
model_path = '/content/drive/MyDrive/Colab_files/pose_landmarker_full.task'

# Set up the options with num_poses
options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.VIDEO,
    num_poses=2  # Set the number of poses to detect
)

# Initialize drawing utilities
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Create the pose landmarker instance
with PoseLandmarker.create_from_options(options) as landmarker:
    # 入力動画ファイルのパス
    input_video_path = '/content/drive/MyDrive/Colab_files/dance-sample.mp4'

    # MoviePyを使って動画と音声を読み込み
    clip = VideoFileClip(input_video_path)
    audio = clip.audio

    # OpenCVを使って動画ファイルを読み込み
    cap = cv2.VideoCapture(input_video_path)

    # 出力動画ファイルの設定
    output_video_path = 'output_with_poses.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # フレームをBGRからRGBに変換
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

        # タイムスタンプを取得し、ミリ秒からマイクロ秒に変換
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC) * 1000)

        # ポーズ検出を実行
        pose_landmarker_result = landmarker.detect_for_video(mp_image, timestamp_ms)

        # ポーズの描画
        for pose_landmarks in pose_landmarker_result.pose_landmarks:
            # Convert pose_landmarks to a NormalizedLandmarkList
            landmark_list = mp.framework.formats.landmark_pb2.NormalizedLandmarkList(
                landmark=pose_landmarks
            )
            mp_drawing.draw_landmarks(
                frame,
                landmark_list,
                mp.solutions.pose.POSE_CONNECTIONS,
                landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
            )

        # フレームを書き込み
        out.write(frame)

        # フレームを表示（オプション）
        cv2.imshow('MediaPipe Pose', frame)
        if cv2.waitKey(5) & 0xFF == 27:
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    # MoviePyを使って出力動画に音声を追加
    final_clip = VideoFileClip(output_video_path).set_audio(audio)
    final_clip.write_videofile('final_output_with_audio.mp4')

AttributeError: module 'mediapipe' has no attribute 'framework'

In [None]:
import cv2
import mediapipe as mp
from moviepy.editor import VideoFileClip

# MediaPipe Poseの初期化
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# モデルファイルのパスを指定
model_path = '/content/drive/MyDrive/Colab_files/pose_landmarker_full.task'

# Set up the options with num_poses
options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.VIDEO,
    num_poses=2  # Set the number of poses to detect
)

# Initialize drawing utilities
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Create the pose landmarker instance
with PoseLandmarker.create_from_options(options) as landmarker:

    # 入力動画ファイルのパス
    input_video_path = '/content/drive/MyDrive/Colab_files/dance-sample.mp4'

    # MoviePyを使って動画と音声を読み込み
    clip = VideoFileClip(input_video_path)
    audio = clip.audio

    # OpenCVを使って動画ファイルを読み込み
    cap = cv2.VideoCapture(input_video_path)

    # 出力動画ファイルの設定
    output_video_path = 'output_with_poses.mp4'
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, 30.0, (int(cap.get(3)), int(cap.get(4))))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # フレームをBGRからRGBに変換
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

        # タイムスタンプを取得し、ミリ秒からマイクロ秒に変換
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC) * 1000)

        # ポーズ検出を実行
        pose_landmarker_result = landmarker.detect_for_video(mp_image, timestamp_ms)

        # ポーズの描画
        for pose_landmarks in pose_landmarker_result.pose_landmarks:
            # Convert pose_landmarks to a NormalizedLandmarkList
            pose_landmark_list = mp.tasks.components.containers.NormalizedLandmarkList(
                landmarks=pose_landmarks
            )
            mp_drawing.draw_landmarks(
                frame,
                pose_landmark_list,
                mp.solutions.pose.POSE_CONNECTIONS,
                landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style()
            )

        # フレームを書き込み
        out.write(frame)

        # フレームを表示（オプション）
        cv2.imshow('MediaPipe Pose', frame)
        if cv2.waitKey(5) & 0xFF == 27:
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()

    # MoviePyを使って出力動画に音声を追加
    final_clip = VideoFileClip(output_video_path).set_audio(audio)
    final_clip.write_videofile('final_output_with_audio.mp4')

AttributeError: module 'mediapipe.tasks.python.components.containers' has no attribute 'NormalizedLandmarkList'

## ランドマークを上から見た映像に変換（音声付き）
* 下から見た映像になってるっぽいので、後で上から見た映像になるように修正。
* 右足を上げたタイミングで、本来は右足首の垂直方向の位置が変わるだけで水平方向の位置は変わらないはずだが、水平方向の位置が変わってるっぽいところも要改善。奥行方向に位置推定は難しいということか？



In [None]:
!pip install mediapipe opencv-python moviepy
!sudo apt-get install ffmpeg

In [None]:
import cv2
import mediapipe as mp
from moviepy.editor import VideoFileClip
import numpy as np

# MediaPipe Poseの初期化
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(model_complexity=1, enable_segmentation=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# 入力動画ファイルのパス
input_video_path = '/content/drive/MyDrive/Colab_files/dance-sample.mp4'

# MoviePyを使って動画と音声を読み込み
clip = VideoFileClip(input_video_path)
audio = clip.audio

# OpenCVを使って動画ファイルを読み込み
cap = cv2.VideoCapture(input_video_path)

# 動画ファイルのプロパティ取得
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# 上から見た映像の設定
top_view_width = 800
top_view_height = 800

# 出力動画ファイルの設定
output_video_path = 'output_with_top_view.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (top_view_width, top_view_height))

# 足首、かかと、爪先のランドマークインデックス
highlight_landmarks = [mp_pose.PoseLandmark.LEFT_ANKLE, mp_pose.PoseLandmark.RIGHT_ANKLE,
                       mp_pose.PoseLandmark.LEFT_HEEL, mp_pose.PoseLandmark.RIGHT_HEEL,
                       mp_pose.PoseLandmark.LEFT_FOOT_INDEX, mp_pose.PoseLandmark.RIGHT_FOOT_INDEX]

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # BGR画像をRGBに変換
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # ポーズランドマークの検出
    results = pose.process(rgb_frame)

    # 空のトップビュー画像を作成
    top_view_frame = np.zeros((top_view_height, top_view_width, 3), dtype=np.uint8)

    # ランドマークの描画
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        # ランドマークのワールド座標を取得
        landmarks = results.pose_world_landmarks.landmark
        for i, landmark in enumerate(landmarks):
            # ワールド座標をトップビューの座標に変換
            x = int((landmark.x + 0.5) * top_view_width)
            y = int((landmark.z + 0.5) * top_view_height)

            # 特定のランドマークを強調表示
            if i in highlight_landmarks:
                color = (0, 0, 255)  # 赤色
            else:
                color = (0, 255, 0)  # 緑色

            # トップビューにランドマークを描画
            cv2.circle(top_view_frame, (x, y), 5, color, -1)

        # ランドマーク同士を線で結ぶ
        for connection in mp_pose.POSE_CONNECTIONS:
            start_idx = connection[0]
            end_idx = connection[1]
            start = landmarks[start_idx]
            end = landmarks[end_idx]
            start_point = (int((start.x + 0.5) * top_view_width), int((start.z + 0.5) * top_view_height))
            end_point = (int((end.x + 0.5) * top_view_width), int((end.z + 0.5) * top_view_height))
            cv2.line(top_view_frame, start_point, end_point, (255, 255, 255), 2)

    # フレームを書き込み
    out.write(top_view_frame)

cap.release()
out.release()
pose.close()

# MoviePyを使って映像と音声を結合
final_clip = VideoFileClip(output_video_path)
final_clip = final_clip.set_audio(audio)
final_clip.write_videofile("final_output_with_audio.mp4", codec='libx264', audio_codec='aac')

print('Output video saved as final_output_with_audio.mp4')