In [21]:
import mediapipe as mp
import cv2
import os
import joblib
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [22]:
def transform_record(record):
    # Translate
    record[Xs] -= record[Xs[0]]
    record[Ys] -= record[Ys[0]]

    # Scale
    record[Xs] /= (max(record[Xs]) - min(record[Xs]))
    record[Ys] /= (max(record[Ys]) - min(record[Ys]))

    # Rotate
    theta = np.arctan2(record[Xs[9]], record[Ys[9]])
    cos = np.cos(theta)
    sin = np.sin(theta)

    R = np.array([
        [cos, -sin],
        [sin, cos]
    ])

    rotated_points = R @ np.vstack((record[Xs], record[Ys]))
    record[Xs], record[Ys] = rotated_points[0], rotated_points[1]

    # Adjust Xs sign based on the difference
    record[Xs] = np.sign(record[Xs[5]] - record[Xs[9]]) * record[Xs]

    return record

In [23]:
# Load the saved models
loaded_model = joblib.load('model_variations/best_xgb_model.pkl')
loaded_target_encoder = joblib.load('preprocessing_models/target_encoder.pkl')

#####   Try on Recorded Video

In [None]:
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

mycols = [item for i in range(1, 22) for item in (f"x{i}", f"y{i}")]
Xs = ["x" + str(i) for i in range(1, 22)]
Ys = ["y" + str(i) for i in range(1, 22)]

video_path = 'video_trial/my_video.mp4'
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# the video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can also use 'XVID' or 'MJPG'
out = cv2.VideoWriter('video_trial/output.mp4', fourcc, fps, (frame_height, frame_width))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Rotate the frame to the left (90 degrees counterclockwise)
    rotated_frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)

    image = rotated_frame
    image_height, image_width, _ = image.shape

    rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    hand_landmarks = []
    if results.multi_hand_landmarks:
        for landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(image, landmarks, mp_hands.HAND_CONNECTIONS)
            for landmark in landmarks.landmark:
                x_px = int(landmark.x * image_width)
                y_px = int(landmark.y * image_height)
                hand_landmarks.extend([x_px, y_px])

    # Check if hand_landmarks is empty
    if hand_landmarks:
        # Assuming you have a function transform_record defined
        record = pd.DataFrame(np.array(hand_landmarks).reshape(1, -1), columns=mycols)
        record = record.apply(transform_record, axis=1)
        y_pred = loaded_model.predict(record)
        label_pred = loaded_target_encoder.inverse_transform(y_pred)[0]

        position = (10, 30)
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 1
        color = (0, 255, 0)
        thickness = 2
        cv2.putText(image, label_pred, position, font, font_scale, color, thickness, cv2.LINE_AA)

    resized_image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
    cv2.imshow("Hand Landmark Detection", resized_image)

    # Write the frame to the output video
    out.write(resized_image)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()


#### Live Stream

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

mycols = [item for i in range(1, 22) for item in (f"x{i}", f"y{i}")]
Xs=["x"+str(i) for i in range(1,22)]
Ys=["y"+str(i) for i in range(1,22)]

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    image = frame
    image_height, image_width, _ = image.shape

    rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)
    if results.multi_hand_landmarks:
        for landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(image, landmarks, mp_hands.HAND_CONNECTIONS)
            hand_landmarks = []
            for landmark in landmarks.landmark:
                x_px = int(landmark.x * image_width)
                y_px = int(landmark.y * image_height)
                hand_landmarks.extend([x_px, y_px])

    record = pd.DataFrame(np.array(hand_landmarks).reshape(1, -1), columns=mycols)
    record = record.apply(transform_record, axis=1)
    y_pred = loaded_model.predict(record)
    label_pred = loaded_target_encoder.inverse_transform(y_pred)[0]

    position = (10, 30)
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    color = (0, 255, 0)
    thickness = 2
    cv2.putText(image, label_pred, position, font, font_scale, color, thickness, cv2.LINE_AA)

    resized_image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
    cv2.imshow("Hand Landmark Detection", resized_image)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

#### Images

In [14]:
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

mycols = [item for i in range(1, 22) for item in (f"x{i}", f"y{i}")]
Xs=["x"+str(i) for i in range(1,22)]
Ys=["y"+str(i) for i in range(1,22)]

images_folder = 'images_trial'
image_files = [f for f in os.listdir(images_folder) if f.endswith(('.jpg', '.png', '.jpeg'))]

current_index = 0
while True:
    if current_index >= len(image_files):
        print("No more images to process.")
        break

    image_path = os.path.join(images_folder, image_files[current_index])
    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape

    if image is None:
        print(f"Error reading image {image_path}")
        continue

    rgb_frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)
    if results.multi_hand_landmarks:
        for landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(image, landmarks, mp_hands.HAND_CONNECTIONS)
            hand_landmarks = []
            for landmark in landmarks.landmark:
                x_px = int(landmark.x * image_width)
                y_px = int(landmark.y * image_height)
                hand_landmarks.extend([x_px, y_px])

    record = pd.DataFrame(np.array(hand_landmarks).reshape(1, -1), columns=mycols)
    record = record.apply(transform_record, axis=1)
    y_pred = loaded_model.predict(record)
    label_pred = loaded_target_encoder.inverse_transform(y_pred)[0]
    
    # Write Class
    position = (10, 30)
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    color = (0, 255, 0)
    thickness = 2
    cv2.putText(image, label_pred, position, font, font_scale, color, thickness, cv2.LINE_AA)
    
    # Show the resized image
    resized_image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
    cv2.imshow("Hand Landmark Detection", resized_image)
    
    # Wait for key press
    key = cv2.waitKey(0) & 0xFF

    if key == ord('n'):  # Go to the next image on pressing 'n'
        current_index += 1
    elif key == ord('q'):  # Quit the loop on pressing 'q'
        break

cv2.destroyAllWindows()