In [None]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2
import numpy as np
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import cvzone
import os

In [None]:
class HandLandmarker:
    def __init__(self, model_path):
        self.model_path = model_path
        self.base_options = python.BaseOptions(model_asset_path=self.model_path)
        self.options = vision.HandLandmarkerOptions(base_options=self.base_options, num_hands=2)
        self.detector = vision.HandLandmarker.create_from_options(self.options)
        self.saving_path = 'anaconda_projects/mediapipe_hand'
        os.makedirs(self.saving_path, exist_ok=True)
        self.watch_path = 'FREE-Watch-Clipart.png'
        self.watch_image = cv2.imread(self.watch_path, cv2.IMREAD_UNCHANGED)
        if self.watch_image is None:
            print(f"Warning: Unable to load watch image from {self.watch_path}")

        flag = True
        while flag:
            choice = input("""
Enter your choice to proceed:
1: Image
2: Video
3: WebCam
0: Exit
>""")
            if choice == '0':
                print('Good bye!')
                flag = False
            elif choice == '1':
                try:
                    self.process_image()
                except FileNotFoundError:
                    print('Some of the files not found on defined path.')
                except Exception as e:
                    print(f'An error occurred: {e}')

            elif choice == '2':
                try:
                    self.process_video()
                except FileNotFoundError:
                    print('Some of the files not found on defined path.')
                except Exception as e:
                    print(f'An error occurred: {e}')

            elif choice == '3':
                try:
                    self.process_webcam()
                except FileNotFoundError:
                    print('Some of the files not found on defined path.')
                except Exception as e:
                    print(f'An error occurred: {e}')

            else:
                print('Invalid choice! Please enter a valid option.')

    def process_image(self):
        path_choice = input('''
What do you want to provide?
1: Image path (for a single image)
2: Folder path (for multiple images)
0: Exit to main menu
>''')
        if path_choice == '1':
            image_path = input('Enter the image path:\n>')
            image_path_list = [image_path]
            print('Processing image...')
        elif path_choice == '2':
            folder_path = input('Enter the folder path:\n>')
            image_path_list = [os.path.join(folder_path, img) for img in os.listdir(folder_path)]
            print(image_path_list)
            print('Processing images...')
        elif path_choice == '0':
            print('Redirecting to main menu...')
            return
        else:
            print('Invalid choice! Redirecting to main menu...')
            return

        image_count = 1
        for image_path in image_path_list:
            lh_count, rh_count = 0, 0
            lw_coordinates, rw_coordinates = [], []
            rt_coordinates, ri_coordinates, rm_coordinates, rr_coordinates, rl_coordinates = [], [], [], [], []

            image_path = image_path.replace('"', '').replace("'", '')
            self.mp_image = mp.Image.create_from_file(image_path)
            self.result = self.detector.detect(self.mp_image)

            handedness_list = self.result.handedness
            hand_landmarks_list = self.result.hand_landmarks
            image = cv2.imread(image_path)

            if image is None:
                print(f"Failed to read image at {image_path}")
                continue

            if image_path[-4:] == '.png':
                image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)

            height, width, _ = image.shape

            if len(handedness_list) > 0:
                for i in range(len(handedness_list)):
                    if handedness_list[i][0].category_name == 'Left':
                        lh_count += 1
                        keypoints_x = [landmark.x * width for landmark in hand_landmarks_list[i]]
                        keypoints_y = [landmark.y * height for landmark in hand_landmarks_list[i]]
                        wrist_x = keypoints_x[0]
                        wrist_y = keypoints_y[0]
                        lw_coordinates.append((wrist_x, wrist_y))

                        for j in range(len(keypoints_x)):
                            cv2.circle(img=image, center=(int(keypoints_x[j]), int(keypoints_y[j])), radius=4, color=(0, 255, 0), thickness=-2)

                        x_min = min(keypoints_x)
                        y_min = min(keypoints_y)
                        x_max = max(keypoints_x)
                        y_max = max(keypoints_y)
                        cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 1)
                        cv2.putText(image, "Left", (int(x_min), int(y_min - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
                    else:
                        rh_count += 1
                        keypoints_x = [landmark.x * width for landmark in hand_landmarks_list[i]]
                        keypoints_y = [landmark.y * height for landmark in hand_landmarks_list[i]]
                        wrist_x = keypoints_x[0]
                        wrist_y = keypoints_y[0]
                        rw_coordinates.append((wrist_x, wrist_y))

                        rt_coordinates.append((keypoints_x[2], keypoints_y[2]))
                        ri_coordinates.append((keypoints_x[5], keypoints_y[5]))
                        rm_coordinates.append((keypoints_x[9], keypoints_y[9]))
                        rr_coordinates.append((keypoints_x[13], keypoints_y[13]))
                        rl_coordinates.append((keypoints_x[17], keypoints_y[17]))

                        rt_rl = rt_coordinates[0][0] - rl_coordinates[0][0]
                        tan_theta = (rm_coordinates[0][1] - rw_coordinates[0][1]) / (rm_coordinates[0][0] - rw_coordinates[0][0]) if (rm_coordinates[0][0] - rw_coordinates[0][0]) != 0 else 0
                        theta = np.arctan(tan_theta)
                        theta_degrees = np.degrees(theta)

                        if rw_coordinates[0][1] > ri_coordinates[0][1] and rw_coordinates[0][1] > rm_coordinates[0][1] and rw_coordinates[0][1] > rr_coordinates[0][1]:
                            hand_orientation = 'straight'
                            hand_view = 'palm' if rt_rl > 0 else 'dorsum'
                        else:
                            hand_orientation = 'rotated'
                            hand_view = 'dorsum' if rt_rl > 0 else 'palm'

                        if hand_view == 'dorsum' and self.watch_image is not None:
                            x_min = min(keypoints_x)
                            y_min = min(keypoints_y)
                            x_max = max(keypoints_x)
                            y_max = max(keypoints_y)
                            watch_resized = cv2.resize(self.watch_image, (int(x_max - x_min) // 2, int(x_max - x_min) // 2))
                            fit_ymin = int(wrist_y - (watch_resized.shape[0] // 2))
                            fit_xmin = int(wrist_x - (watch_resized.shape[1] // 2))
                            fit_ymax = fit_ymin + watch_resized.shape[0]
                            fit_xmax = fit_xmin + watch_resized.shape[1]

                            if fit_xmin >= 0 and fit_ymin >= 0 and fit_xmax <= width and fit_ymax <= height:
                                M = cv2.getRotationMatrix2D((watch_resized.shape[1] // 2, watch_resized.shape[0] // 2), -theta_degrees, 1)
                                watch_rotated = cv2.warpAffine(watch_resized, M, (watch_resized.shape[1], watch_resized.shape[0]))
                                image = cvzone.overlayPNG(image, watch_rotated, (fit_xmin, fit_ymin))
                        else:
                            for k in range(len(keypoints_x)):
                                cv2.circle(img=image, center=(int(keypoints_x[k]), int(keypoints_y[k])), radius=4, color=(0, 255, 0), thickness=-2)
                            x_min = min(keypoints_x)
                            y_min = min(keypoints_y)
                            x_max = max(keypoints_x)
                            y_max = max(keypoints_y)
                            cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 0, 255), 1)
                            cv2.putText(image, "Right", (int(x_min), int(y_min - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

                print(f'Total number of hands detected: {len(handedness_list)}')
                print(f'Left Hand count: {lh_count}')
                print(f'Right Hand count: {rh_count}')
                print(f'Left Wrist coordinates: {lw_coordinates}')
                print(f'Right Wrist coordinates: {rw_coordinates}\n')
                output_path = os.path.join(self.saving_path, f'output_image{image_count}.jpg')
                print(f"Saving processed image at '{output_path}' ...")
                cv2.imwrite(output_path, image)
                print(f'Image saved successfully.')
                image_count += 1
            else:
                print('No hands detected in the image.')

    def process_video(self):
        path_choice = input('''
What do you want to provide?
1: Video path (for a single video)
2: Folder path (for multiple videos)
0: Exit to main menu
>''')
        if path_choice == '0':
            return
        elif path_choice not in ['1', '2']:
            print('Invalid choice.')
            return

        video_paths = []
        if path_choice == '1':
            video_path = input('Enter the video path:\n>').strip().replace('"', '').replace("'", '')
            video_paths.append(video_path)
        elif path_choice == '2':
            folder_path = input('Enter the folder path:\n>').strip().replace('"', '').replace("'", '')
            video_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.flv', '.MP4']
            for filename in os.listdir(folder_path):
                if os.path.splitext(filename)[1].lower() in video_extensions:
                    video_paths.append(os.path.join(folder_path, filename))

        if not video_paths:
            print("No valid video files found.")
            return

        for video_path in video_paths:
            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                print(f"Could not open video {video_path}")
                continue

            fps = cap.get(cv2.CAP_PROP_FPS)
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            output_filename = os.path.basename(video_path)
            output_path = os.path.join(self.saving_path, f'processed_{output_filename}')
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

            frame_count = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break

                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
                result = self.detector.detect(mp_image)

                annotated_frame = frame.copy()
                handedness_list = result.handedness
                hand_landmarks_list = result.hand_landmarks

                if len(handedness_list) > 0:
                    for i in range(len(handedness_list)):
                        handedness = handedness_list[i][0].category_name
                        landmarks = hand_landmarks_list[i]

                        keypoints_x = [lm.x * width for lm in landmarks]
                        keypoints_y = [lm.y * height for lm in landmarks]

                        if handedness == 'Left':
                            for j in range(len(keypoints_x)):
                                cv2.circle(annotated_frame, (int(keypoints_x[j]), int(keypoints_y[j])), 4, (0, 255, 0), -2)
                            x_min = int(min(keypoints_x))
                            y_min = int(min(keypoints_y))
                            x_max = int(max(keypoints_x))
                            y_max = int(max(keypoints_y))
                            cv2.rectangle(annotated_frame, (x_min, y_min), (x_max, y_max), (0, 0, 255), 1)
                            cv2.putText(annotated_frame, "Left", (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
                        else:
                            wrist_x = keypoints_x[0]
                            wrist_y = keypoints_y[0]

                            rt_coord = (keypoints_x[2], keypoints_y[2])
                            ri_coord = (keypoints_x[5], keypoints_y[5])
                            rm_coord = (keypoints_x[9], keypoints_y[9])
                            rr_coord = (keypoints_x[13], keypoints_y[13])
                            rl_coord = (keypoints_x[17], keypoints_y[17])

                            rt_rl = rt_coord[0] - rl_coord[0]
                            tan_theta = (rm_coord[1] - wrist_y) / (rm_coord[0] - wrist_x) if (rm_coord[0] - wrist_x) != 0 else 0
                            theta = np.arctan(tan_theta)
                            theta_degrees = np.degrees(theta)

                            if wrist_y > ri_coord[1] and wrist_y > rm_coord[1] and wrist_y > rr_coord[1]:
                                hand_orientation = 'straight'
                                hand_view = 'palm' if rt_rl > 0 else 'dorsum'
                            else:
                                hand_orientation = 'rotated'
                                hand_view = 'dorsum' if rt_rl > 0 else 'palm'

                            if hand_view == 'dorsum' and self.watch_image is not None:
                                x_min = int(min(keypoints_x))
                                y_min = int(min(keypoints_y))
                                x_max = int(max(keypoints_x))
                                y_max = int(max(keypoints_y))

                                watch_resized = cv2.resize(self.watch_image, ((x_max - x_min) // 2, (x_max - x_min) // 2))

                                fit_xmin = int(wrist_x - watch_resized.shape[1] // 2)
                                fit_ymin = int(wrist_y - watch_resized.shape[0] // 2)
                                fit_xmax = fit_xmin + watch_resized.shape[1]
                                fit_ymax = fit_ymin + watch_resized.shape[0]

                                if fit_xmin >= 0 and fit_ymin >= 0 and fit_xmax <= width and fit_ymax <= height:
                                    M = cv2.getRotationMatrix2D((watch_resized.shape[1] // 2, watch_resized.shape[0] // 2), -theta_degrees, 1)
                                    watch_rotated = cv2.warpAffine(watch_resized, M, (watch_resized.shape[1], watch_resized.shape[0]))
                                    annotated_frame = cvzone.overlayPNG(annotated_frame, watch_rotated, (fit_xmin, fit_ymin))
                            else:
                                for k in range(len(keypoints_x)):
                                    cv2.circle(annotated_frame, (int(keypoints_x[k]), int(keypoints_y[k])), 4, (0, 255, 0), -2)
                                x_min = int(min(keypoints_x))
                                y_min = int(min(keypoints_y))
                                x_max = int(max(keypoints_x))
                                y_max = int(max(keypoints_y))
                                cv2.rectangle(annotated_frame, (x_min, y_min), (x_max, y_max), (0, 0, 255), 1)
                                cv2.putText(annotated_frame, "Right", (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

                out.write(annotated_frame)
                frame_count += 1
                print(f"Processed frame {frame_count} of {total_frames}")

            cap.release()
            out.release()
            print(f"Video saved to {output_path}")

    def process_webcam(self):
        cap = cv2.VideoCapture(0)
        if not cap.isOpened():
            print("Error: Could not open webcam.")
            return

        print("Webcam started. Press 'q' to quit.")

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print("Error: Failed to capture frame.")
                break

            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

            result = self.detector.detect(mp_image)

            annotated_frame = frame.copy()
            handedness_list = result.handedness
            hand_landmarks_list = result.hand_landmarks

            if len(handedness_list) > 0:
                for i in range(len(handedness_list)):
                    handedness = handedness_list[i][0].category_name
                    landmarks = hand_landmarks_list[i]

                    height, width, _ = frame.shape

                    keypoints_x = [lm.x * width for lm in landmarks]
                    keypoints_y = [lm.y * height for lm in landmarks]

                    if handedness == 'Left':
                        for j in range(len(keypoints_x)):
                            cv2.circle(annotated_frame, (int(keypoints_x[j]), int(keypoints_y[j])), 4, (0, 255, 0), -2)
                        x_min = int(min(keypoints_x))
                        y_min = int(min(keypoints_y))
                        x_max = int(max(keypoints_x))
                        y_max = int(max(keypoints_y))
                        cv2.rectangle(annotated_frame, (x_min, y_min), (x_max, y_max), (0, 0, 255), 1)
                        cv2.putText(annotated_frame, "Left", (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
                    else:
                        wrist_x = keypoints_x[0]
                        wrist_y = keypoints_y[0]

                        rt_coord = (keypoints_x[2], keypoints_y[2])
                        ri_coord = (keypoints_x[5], keypoints_y[5])
                        rm_coord = (keypoints_x[9], keypoints_y[9])
                        rr_coord = (keypoints_x[13], keypoints_y[13])
                        rl_coord = (keypoints_x[17], keypoints_y[17])

                        rt_rl = rt_coord[0] - rl_coord[0]
                        tan_theta = (rm_coord[1] - wrist_y) / (rm_coord[0] - wrist_x) if (rm_coord[0] - wrist_x) != 0 else 0
                        theta = np.arctan(tan_theta)
                        theta_degrees = np.degrees(theta)

                        if wrist_y > ri_coord[1] and wrist_y > rm_coord[1] and wrist_y > rr_coord[1]:
                            hand_orientation = 'straight'
                            hand_view = 'palm' if rt_rl > 0 else 'dorsum'
                        else:
                            hand_orientation = 'rotated'
                            hand_view = 'dorsum' if rt_rl > 0 else 'palm'

                        if hand_view == 'dorsum' and self.watch_image is not None:
                            x_min = int(min(keypoints_x))
                            y_min = int(min(keypoints_y))
                            x_max = int(max(keypoints_x))
                            y_max = int(max(keypoints_y))

                            watch_resized = cv2.resize(self.watch_image, ((x_max - x_min) // 2, (x_max - x_min) // 2))

                            fit_xmin = int(wrist_x - watch_resized.shape[1] // 2)
                            fit_ymin = int(wrist_y - watch_resized.shape[0] // 2)
                            fit_xmax = fit_xmin + watch_resized.shape[1]
                            fit_ymax = fit_ymin + watch_resized.shape[0]

                            if fit_xmin >= 0 and fit_ymin >= 0 and fit_xmax <= width and fit_ymax <= height:
                                M = cv2.getRotationMatrix2D((watch_resized.shape[1] // 2, watch_resized.shape[0] // 2), -theta_degrees, 1)
                                watch_rotated = cv2.warpAffine(watch_resized, M, (watch_resized.shape[1], watch_resized.shape[0]))
                                annotated_frame = cvzone.overlayPNG(annotated_frame, watch_rotated, (fit_xmin, fit_ymin))
                        else:
                            for k in range(len(keypoints_x)):
                                cv2.circle(annotated_frame, (int(keypoints_x[k]), int(keypoints_y[k])), 4, (0, 255, 0), -2)
                            x_min = int(min(keypoints_x))
                            y_min = int(min(keypoints_y))
                            x_max = int(max(keypoints_x))
                            y_max = int(max(keypoints_y))
                            cv2.rectangle(annotated_frame, (x_min, y_min), (x_max, y_max), (0, 0, 255), 1)
                            cv2.putText(annotated_frame, "Right", (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

            cv2.imshow("Webcam Feed", annotated_frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()
        print("Webcam stopped.")

In [18]:
model_path = 'hand_landmarker.task'
HandLandmarker(model_path)


Enter your choice to proceed:
1: Image
2: Video
3: WebCam
0: Exit
>2

What do you want to provide?
1: Video path (for a single video)
2: Folder path (for multiple videos)
0: Exit to main menu
>1
Enter the video path:
>/content/drive/MyDrive/Colab Notebooks/MediaPipe/videos/Green Screen Hand (20+ Effects 4K _ Free Download Link).mp4
Processed frame 1 of 1814
Processed frame 2 of 1814
Processed frame 3 of 1814
Processed frame 4 of 1814
Processed frame 5 of 1814
Processed frame 6 of 1814
Processed frame 7 of 1814
Processed frame 8 of 1814
Processed frame 9 of 1814
Processed frame 10 of 1814
Processed frame 11 of 1814
Processed frame 12 of 1814
Processed frame 13 of 1814
Processed frame 14 of 1814
Processed frame 15 of 1814
Processed frame 16 of 1814
Processed frame 17 of 1814
Processed frame 18 of 1814
Processed frame 19 of 1814
Processed frame 20 of 1814
Processed frame 21 of 1814
Processed frame 22 of 1814
Processed frame 23 of 1814
Processed frame 24 of 1814
Processed frame 25 of 1814

<__main__.HandLandmarker at 0x78958405e590>