In [None]:
!pip install mediapipe opencv-python

In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import mediapipe as mp
from google.colab import drive

In [None]:
drive.mount('/content/drive',force_remount=True)

In [None]:

# Define the process_image function as provided before

def get_grayscale(image):
    # Convert image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Apply Gaussian Blur
    blurred_image = cv2.GaussianBlur(gray_image, (9, 9), 0)
    # Enhance contrast
    contrast_enhanced = cv2.equalizeHist(blurred_image)

    # Resize image to maintain 1:1 aspect ratio and make it 128x128
    target_size = 128
    height, width = contrast_enhanced.shape
    scale = target_size / max(height, width)
    resized_image = cv2.resize(contrast_enhanced, (int(width * scale), int(height * scale)), interpolation=cv2.INTER_AREA)

    # Centering the image in a 128x128 frame
    delta_w = target_size - resized_image.shape[1]
    delta_h = target_size - resized_image.shape[0]
    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
    left, right = delta_w // 2, delta_w - (delta_w // 2)

    # Add border to make the image exactly 128x128
    final_image = cv2.copyMakeBorder(resized_image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])

    # Display the final processed image
    # plt.figure(figsize=(3, 3))
    # plt.imshow(final_image, cmap='gray')
    # plt.title("Processed Image")
    # plt.axis('off')
    # plt.show()

    return final_image

def process_image(image):
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    with mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5) as hands:
        results = hands.process(image)
        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            h, w, _ = image.shape

            # Create a mask for drawing landmarks
            mask = np.zeros((h, w), dtype=np.uint8)
            mask_bgr = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)

            # Draw landmarks and connections using MediaPipe drawing utils
            mp_drawing.draw_landmarks(
                mask_bgr, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=4, circle_radius=2),
                mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=22)
            )

            mask = cv2.cvtColor(mask_bgr, cv2.COLOR_BGR2GRAY)
            kernel = np.ones((15, 15), np.uint8)


            # Determine palm points for fillPoly
            palm_indices = [0, 2, 5, 9, 13, 17]  # Indices for wrist and bases of each finger
            palm_points = np.array([[
                int(hand_landmarks.landmark[idx].x * w),
                int(hand_landmarks.landmark[idx].y * h)
            ] for idx in palm_indices], dtype=np.int32)

            # Fill the palm area on the mask
            cv2.fillPoly(mask, [palm_points], (255, 255, 255))
            mask = cv2.dilate(mask, kernel, iterations=1)
            # Find contours to determine the bounding box of the hand
            contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            if contours:
                cnt = max(contours, key=cv2.contourArea)  # find the largest contour
                x, y, w, h = cv2.boundingRect(cnt)

                # Crop and resize the mask
                cropped_mask = mask[y:y+h, x:x+w]
                final_img = cv2.resize(cropped_mask, (128, 128))

                plt.figure(figsize=(3, 3))
                plt.imshow(final_img, cmap='gray')
                plt.title("Processed Image")
                plt.axis('off')
                plt.show()



                return [final_img, None]
        else:
            return [None, None]

In [None]:
# full_dir_path = os.path.join(base_dir, dir_name)
training_path = '/content/drive/MyDrive/training_vids'
output_path = '/content/drive/MyDrive/training_v2'
# Create directories if they do not exist
if not os.path.exists('frame_hands_v2'):
    os.makedirs('frame_hands_v2')
if not os.path.exists('bw_plain_hands_v3'):
    os.makedirs('bw_plain_hands_v3')

# Load the video
videos = ["closed_left", "closed_right", "open_left", "open_right"]
for vid in videos:
  file_name = f'{training_path}/{vid}.mp4'
  cap = cv2.VideoCapture(file_name)
  frame_number = 0

  while cap.isOpened():
      print("Frame number:", frame_number)
      ret, frame = cap.read()
      if not ret:
          break

      # Process the image
      processed_image, bw_image = process_image(frame)
      source_video = file_name.split("/")[-1][:-4]
      if processed_image is not None:
          cv2.imwrite(f'{output_path}/frame_hands_v2/frame_{source_video}_{frame_number}.png', processed_image)
          # cv2.imwrite(f'{output_path}/bw_plain_hands_v3/frame_{source_video}_{frame_number}.png', bw_image)
          print(f'SAVED: {output_path}/frame_hands_v2/frame_{source_video}_{frame_number}.png')
          # print(f'SAVED: {output_path}/bw_plain_hands_v3/frame_{source_video}_{frame_number}.png')

      frame_number += 1

cap.release()
print("Processing complete.")

In [None]:

# Define the process_image function as provided before

def get_grayscale(image):
    # Convert image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Apply Gaussian Blur
    blurred_image = cv2.GaussianBlur(gray_image, (9, 9), 0)
    # Enhance contrast
    contrast_enhanced = cv2.equalizeHist(blurred_image)

    # Resize image to maintain 1:1 aspect ratio and make it 128x128
    target_size = 128
    height, width = contrast_enhanced.shape
    scale = target_size / max(height, width)
    resized_image = cv2.resize(contrast_enhanced, (int(width * scale), int(height * scale)), interpolation=cv2.INTER_AREA)

    # Centering the image in a 128x128 frame
    delta_w = target_size - resized_image.shape[1]
    delta_h = target_size - resized_image.shape[0]
    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
    left, right = delta_w // 2, delta_w - (delta_w // 2)

    # Add border to make the image exactly 128x128
    final_image = cv2.copyMakeBorder(resized_image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])

    # Display the final processed image
    # plt.figure(figsize=(3, 3))
    # plt.imshow(final_image, cmap='gray')
    # plt.title("Processed Image")
    # plt.axis('off')
    # plt.show()

    return final_image

def process_image(image):
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    with mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5) as hands:
        results = hands.process(image)
        if results.multi_hand_landmarks:


            hand_landmarks = results.multi_hand_landmarks[0]
            h, w, _ = image.shape
            x_min, x_max = int(min(lm.x for lm in hand_landmarks.landmark) * w), int(max(lm.x for lm in hand_landmarks.landmark) * w)
            y_min, y_max = int(min(lm.y for lm in hand_landmarks.landmark) * h), int(max(lm.y for lm in hand_landmarks.landmark) * h)



#----------------
           # Adjust to keep the aspect ratio 1:1 and include padding
            box_width = x_max - x_min
            box_height = y_max - y_min
            side_length = max(box_width, box_height)
            padding = int(side_length * 0.05)
            side_length += 2 * padding

            # Center the square around the hand, adjust if it goes out of bounds
            x_center = (x_min + x_max) // 2
            y_center = (y_min + y_max) // 2
            x_min = max(0, x_center - side_length // 2)
            x_max = x_min + side_length
            y_min = max(0, y_center - side_length // 2)
            y_max = y_min + side_length

            if x_max > w:
                x_max = w
                x_min = w - side_length
            if y_max > h:
                y_max = h
                y_min = h - side_length

            cropped_image = image[y_min:y_max, x_min:x_max]

            # Convert the cropped image to grayscale and process it
            # final_processed_image = get_grayscale(cropped_image)
            # cropped_image = image[y_min:y_max, x_min:x_max]

            # # Do contrasted hand
            # contrast_enhanced = get_grayscale(cropped_image)

            cropped_image = cv2.resize(cropped_image, (256, 256))


            mask = np.zeros(cropped_image.shape[:2], dtype=np.uint8)
            mask_bgr = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
            mp_drawing.draw_landmarks(
                mask_bgr, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=1, circle_radius=2),
                mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=4)
            )
            landmarks = [lm for lm in hand_landmarks.landmark]
            # adjusted_landmarks = [(int((lm.x - x_min/w) * side_length), int((lm.y - y_min/h) * side_length)) for lm in landmarks]
            # adjusted_landmarks = [(int(lm.x * 256), int(lm.y * 256)) for lm in landmarks]
            adjusted_landmarks = [(int((lm.x * w - x_min) / (x_max - x_min) * 256),
                                   int((lm.y * h - y_min) / (y_max - y_min) * 256))
                                  for lm in hand_landmarks.landmark]

            # adjusted_landmarks = [(int(lm.x * 256), int(lm.y * 256)) for lm in landmarks]
            palm = [adjusted_landmarks[i] for i in [0, 2, 5, 9, 13, 17]]
            palm_indices = [0, 2, 5, 9, 13, 17] # This depends on the landmarks of the hand model
            palm_points = np.array([adjusted_landmarks[i] for i in palm_indices], dtype=np.int32)
            cv2.fillPoly(mask_bgr, [palm_points], (255, 255, 255))

            mask = cv2.cvtColor(mask_bgr, cv2.COLOR_BGR2GRAY)
            kernel = np.ones((15, 15), np.uint8)
            mask = cv2.dilate(mask, kernel, iterations=1)
            hand_image = np.zeros(cropped_image.shape[:2], dtype=np.uint8)
            hand_image[mask > 0] = 255
            final_img = cv2.resize(hand_image, (128, 128))




            plt.figure(figsize=(3, 3))
            plt.imshow(final_img, cmap='gray')  # Ensure it is displayed as grayscale
            plt.title(f"Processed Image")
            plt.axis('off')
            plt.show()



            return [final_img, None]
        else:
            return [None, None]