In [68]:
from pytube import YouTube
%matplotlib inline
import cv2
import os
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt

def download_youtube_video(youtube_url, save_path="."):
    yt = YouTube(youtube_url)
    ys = yt.streams.get_highest_resolution()
    ys.download(save_path)
    return ys.default_filename

# 유튜브 URL로부터 영상 다운로드
video_filename = download_youtube_video('https://www.youtube.com/watch?v=6-E6qrs99-k')




In [73]:

def capture_frames(video_path, frame_interval, prefix, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return []

    saved_images = [os.path.join(output_dir, f"{prefix}_{i}.png") for i in range(0, int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), frame_interval) if cap.read()[0]]
    cap.release()
    return saved_images

def capture_specific_frames(video_path, frame_list, prefix, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return []

    saved_images = []
    for i in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
        ret, frame = cap.read()
        if not ret:
            break
        if i in frame_list:
            img_path = os.path.join(output_dir, f"{prefix}_{i}.png")
            cv2.imwrite(img_path, frame)
            saved_images.append(img_path)

    cap.release()
    return saved_images

def preprocess_video(video_path):
    cap = cv2.VideoCapture(video_path)

    fps = cap.get(cv2.CAP_PROP_FPS)

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(f'pr_{video_path}', fourcc, fps, (frame_width, frame_height))

    frame_count = 0

    while True:
        ret, frame = cap.read()

        if not ret:
            break

        frame_count += 1
        cv2.putText(frame, f"Frame: {frame_count}", (frame_width - 350, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv2.LINE_AA)

        out.write(frame)

    cap.release()
    out.release()
    
def save_frames(video_path, frame_indices):
    cap = cv2.VideoCapture(video_path)

    frame_count = 0

    frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1

        if frame_count in frame_indices:
            frames.append(frame)

    cap.release()

    for i, frame in enumerate(frames):
        print(f"Saving frame {i}")
        cv2.imwrite(f"frame_{i}.png", frame)

conso_list = [80, 170, 275, 380, 470, 560, 650, 730, 820, 920, 1020, 1120, 1200, 1290]

save_frames("consonant korean.mp4", conso_list)
# preprocess_video("consonant korean.mp4")

Saving frame 0
Saving frame 1
Saving frame 2
Saving frame 3
Saving frame 4
Saving frame 5
Saving frame 6
Saving frame 7
Saving frame 8
Saving frame 9
Saving frame 10
Saving frame 11
Saving frame 12
Saving frame 13


In [81]:


# Mediapipe utilities
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles


def normalize_landmarks(landmarks):
    """Normalize landmarks to range [0, 1]."""
    normalized = np.array([[landmark.x, landmark.y, landmark.z] for landmark in landmarks.landmark])
    normalized -= np.min(normalized, axis=0)
    normalized /= np.max(normalized, axis=0)
    return normalized


def draw_bounding_box(frame, normalized_landmarks):
    """Draw bounding box around normalized hand landmarks."""
    x_min, y_min, _ = np.min(normalized_landmarks, axis=0)
    x_max, y_max, _ = np.max(normalized_landmarks, axis=0)
    x_range = x_max - x_min
    y_range = y_max - y_min
    x_min -= x_range
    y_min -= y_range
    x_max += x_range
    y_max += y_range
    x_min, y_min = max(0, x_min), max(0, y_min)
    x_max, y_max = min(1, x_max), min(1, y_max)
    cv2.rectangle(frame, 
                  (int(x_min * frame.shape[1]), int(y_min * frame.shape[0])), 
                  (int(x_max * frame.shape[1]), int(y_max * frame.shape[0])), 
                  (0, 255, 0), 2
    )


def plot_3d_landmarks(normalized_landmarks):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    x, y, z = zip(*normalized_landmarks)
    
    for connection in mp_hands.HAND_CONNECTIONS:
        x_pair = [x[connection[0]], x[connection[1]]]
        y_pair = [y[connection[0]], y[connection[1]]]
        z_pair = [z[connection[0]], z[connection[1]]]
        ax.plot(x_pair, y_pair, z_pair, color='b')

    ax.scatter(x, y, z, s=10, c='r')
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    ax.view_init(elev=45., azim=-90.)  # Adjust camera angles for a better view
    
    plt.ion()  # Interactive mode ON
    plt.show()

import csv

def process_images_in_directory(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for filename in os.listdir(input_dir):
        if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
            image_path = os.path.join(input_dir, filename)
            image = cv2.imread(image_path)
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = hands.process(image_rgb)

            if results.multi_hand_landmarks:
                print(f"{results.multi_handedness} hands detected.")
                for i, landmarks in enumerate(results.multi_hand_landmarks):
                    mp_drawing.draw_landmarks(
                        image, landmarks, 
                        mp_hands.HAND_CONNECTIONS,
                        mp_drawing_styles.get_default_hand_landmarks_style(),
                        mp_drawing_styles.get_default_hand_connections_style()
                    )
                    normalized_landmarks = normalize_landmarks(landmarks)

                    csv_filename = os.path.splitext(filename)[0] + ".csv"
                    csv_path = os.path.join(output_dir, 'csv', csv_filename)
                    with open(csv_path, mode='w', newline='') as csv_file:
                        writer = csv.writer(csv_file)
                        writer.writerow(['label', 'x', 'y', 'z'])
                        for j, landmark in enumerate(normalized_landmarks):
                            writer.writerow([f"{os.path.splitext(filename)[0]}", landmark[0], landmark[1], landmark[2]])

            output_path = os.path.join(output_dir, filename)
            cv2.imwrite(output_path, image)
            
def plot_csv_landmarks(csv_dir):
    """Plot hand landmarks from CSV files in directory with subplots for each label."""
    fig = plt.figure(figsize=(12, 12))

    labels = sorted(set([str(os.path.splitext(filename)[0]) for filename in os.listdir(csv_dir) if filename.endswith(".csv")]))
    print(f"Labels: {labels}")
    num_plots = len(labels)
    rows = int(np.sqrt(num_plots))
    cols = int(np.ceil(num_plots / rows))
    plot_num = 1
    for label in labels:
        ax = fig.add_subplot(rows, cols, plot_num, projection='3d')
        ax.set_title(label)
        plot_num += 1

        # Plot hand landmarks for label
        for filename in os.listdir(csv_dir):
            if filename.endswith(".csv") and os.path.splitext(filename)[0] == label:
                csv_path = os.path.join(csv_dir, filename)
                with open(csv_path, mode='r') as csv_file:
                    reader = csv.reader(csv_file)
                    next(reader)  # Skip header row
                    x = []
                    y = []
                    z = []
                    for row in reader:
                        x.append(float(row[1]))
                        y.append(float(row[2]))
                        z.append(float(row[3]))
                    for connection in mp_hands.HAND_CONNECTIONS:
                        x_pair = [x[connection[0]], x[connection[1]]]
                        y_pair = [y[connection[0]], y[connection[1]]]
                        z_pair = [z[connection[0]], z[connection[1]]]
                        ax.plot(x_pair, y_pair, z_pair, color='b')
                    ax.scatter(x, y, z, s=10, c='r')

                    ax.set_xlabel('X')
                    ax.set_ylabel('Y')
                    ax.set_zlabel('Z')
                    ax.view_init(elev=45., azim=-45.)  # Adjust camera angles for a better view
     
    # plt.title("Hand Landmarks")   
    plt.tight_layout()
    plt.show()
    
def process_image(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    results = hands.process(image)
    print(results.multi_hand_landmarks)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image=image,
                landmark_list=hand_landmarks,
                connections=mp_hands.HAND_CONNECTIONS,
                landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style(),
                connection_drawing_spec=mp_drawing_styles.get_default_hand_connections_style(),
            )

            normalize_landmarks(hand_landmarks)
            draw_bounding_box(image, normalize_landmarks(hand_landmarks))

    plt.imshow(image)
    plt.show()

plot_csv_landmarks('./dataset/korean/vowels_processed/csv/')

Labels: []


ZeroDivisionError: division by zero

<Figure size 1200x1200 with 0 Axes>

In [83]:
image_path = "23.png"
video_path = 'fingerspelling korean.mp4'

set_path = './dataset/korean/samples/vowels/'
# process_image(cv2.imread(os.path.join(set_path, image_path)))

process_images_in_directory(set_path, './dataset/korean/vowels_processed/')
# plot_csv_landmarks('./dataset/korean/vowels_processed/csv/')

[classification {
  index: 0
  score: 0.9954187
  label: "Left"
}
] hands detected.
[classification {
  index: 0
  score: 0.9984596
  label: "Left"
}
, classification {
  index: 0
  score: 0.9695541
  label: "Left"
}
] hands detected.
[classification {
  index: 0
  score: 0.99731225
  label: "Left"
}
] hands detected.
[classification {
  index: 0
  score: 0.99873877
  label: "Left"
}
] hands detected.
[classification {
  index: 0
  score: 0.99734986
  label: "Left"
}
] hands detected.
[classification {
  index: 0
  score: 0.9962988
  label: "Left"
}
] hands detected.
[classification {
  index: 0
  score: 0.98381436
  label: "Left"
}
] hands detected.
[classification {
  index: 0
  score: 0.9970956
  label: "Left"
}
, classification {
  index: 0
  score: 0.65354013
  label: "Left"
}
] hands detected.
[classification {
  index: 0
  score: 0.9604102
  label: "Left"
}
] hands detected.
[classification {
  index: 0
  score: 0.9914098
  label: "Left"
}
] hands detected.
[classification {
  i