In [1]:
import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
import os
import pandas as pd
import numpy as np
import cv2

In [2]:
model_path = 'C:/Projects/ISL-to-text/INCLUDE 50/MP_Models/pose_landmarker_heavy.task'
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
options_pose = PoseLandmarkerOptions(base_options = BaseOptions(model_asset_path = model_path),running_mode=VisionRunningMode.IMAGE)
MARGIN = 10 
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54)

In [3]:
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions

options_hand = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path="C:\Projects\ISL-to-text\INCLUDE 50\MP_Models\hand_landmarker.task"),
    running_mode=VisionRunningMode.IMAGE)


In [4]:
def draw_landmarks_pose(img, result):
    if len(result.pose_landmarks)>0:
        landmarks = result.pose_landmarks[0]
        image = np.copy(img)
        x = []
        y = []
        for landmark in landmarks:
            x.append(landmark.x)
            y.append(landmark.y)
        lm_proto = landmark_pb2.NormalizedLandmarkList()
        lm_proto.landmark.extend([landmark_pb2.NormalizedLandmark(x= landmark.x,y= landmark.y, z=landmark.z) for landmark in landmarks])
        solutions.drawing_utils.draw_landmarks(image,lm_proto)
        return image,x,y
    else:
        return img,[],[]
    
def draw_landmarks_hand(rgb_image, detection_result):
    hand_landmarks_list = detection_result.hand_landmarks
    handedness_list = detection_result.handedness
    annotated_image = np.copy(rgb_image)
    x_coordinates = []
    y_coordinates = []
    for idx in range(len(hand_landmarks_list)):
        hand_landmarks = hand_landmarks_list[idx]
        handedness = handedness_list[idx]

        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
            ])
        solutions.drawing_utils.draw_landmarks(
            annotated_image,
            hand_landmarks_proto,
            solutions.hands.HAND_CONNECTIONS,
            solutions.drawing_styles.get_default_hand_landmarks_style(),
            solutions.drawing_styles.get_default_hand_connections_style())
    
        height, width, _ = annotated_image.shape
        x_coordinates = [landmark.x for landmark in hand_landmarks]
        y_coordinates = [landmark.y for landmark in hand_landmarks]
        text_x = int(min(x_coordinates) * width)
        text_y = int(min(y_coordinates) * height) - MARGIN

        cv2.putText(annotated_image, f"{handedness[0].category_name}",
                    (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                    FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

    return annotated_image,x_coordinates,y_coordinates

In [9]:
vid = cv2.VideoCapture("C:\Projects\ISL Research\INCLUDE50\Places_1of4\Places/19. House\MVI_3350.MOV")

while vid.isOpened():
    suc,frame = vid.read()
    while suc:
        cv2.imwrite('C:/Projects/ISL-to-text/INCLUDE 50/MP_Models/frame.png',frame)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
        h,w,_  = frame.shape
        with PoseLandmarker.create_from_options(options_pose) as landmarker:
            results = landmarker.detect(mp_image)
        frame,_,_ = draw_landmarks_pose(frame,results)
        if results.pose_landmarks:
            landmarks = results.pose_landmarks[0]
            min_x = min([(landmark.x)*w for landmark in landmarks]) -130
            max_x = max([(landmark.x)*w for landmark in landmarks]) +130
            min_y = min([(landmark.y)*h for landmark in landmarks]) -130
            max_y = max([(landmark.y)*h for landmark in landmarks])  +130
        img = frame[int(min_y):int(max_y),int(min_x):int(max_x)]
        cv2.imwrite('C:/Projects/ISL-to-text/INCLUDE 50/MP_Models/crop.png',img)
        image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.array(img))
        with HandLandmarker.create_from_options(options_hand) as landmarker:
            landmarks = landmarker.detect(image)
        an_img,x,y = draw_landmarks_hand(img,landmarks)
        print(landmarks)
        cv2.imwrite('C:/Projects/ISL-to-text/INCLUDE 50/MP_Models/annotatedframe.png',an_img)
        suc,frame = vid.read()
    vid.release()
cv2.destroyAllWindows()

None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
[landmark {
  x: 0.395729482
  y: 0.388442218
  z: 1.377731e-007
}
landmark {
  x: 0.405753523
  y: 0.378838062
  z: -0.00759254768
}
landmark {
  x: 0.412032545
  y: 0.357417673
  z: -0.0110371886
}
landmark {
  x: 0.415762752
  y: 0.340862662
  z: -0.0128710978
}
landmark {
  x: 0.419211686
  y: 0.326015621
  z: -0.014600995
}
landmark {
  x: 0.401755691
  y: 0.337082148
  z: -0.00983852707
}
landmark {
  x: 0.409330964
  y: 0.3151564
  z: -0.0148387887
}
landmark {
  x: 0.415798038
  y: 0.300329804
  z: -0.0188644882
}
landmark {
  x: 0.420998096
  y: 0.290228903
  z: -0.0212903954
}
landmark {
  x: 0.397450387
  y: 0.336183131
  z: -0.00707287062
}
landmark {
  x: 0.405655265
  y: 0.314469606
  z: -0.0114022885
}
landmark {
  x: 0.412553251
  y: 0.301673263
  z: -0.0142195495
}
landmark {
  x: 0.418042332
  y: 0.2919797
  z: -0.0160362143
}
landmark {
  x: 0.395074576




None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None


In [6]:
print(landmarks)

HandLandmarkerResult(handedness=[], hand_landmarks=[], hand_world_landmarks=[])


Format for getting coordinates from landmarks

landmarks.pose_landmarks[0][index].x

In [5]:
#Load Frames form still image folder
path = 'C:\Projects\ISL-to-text\Word Level ISL' # path to root foldere
# start loop with word as listdir through entire staationary image folder
word = 'angry'
data = pd.DataFrame()
for file in os.listdir(path): # update listdir with word
    dt = []
    frame = cv2.imread(os.path.join(path,file)) # here too
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    h,w,_  = frame.shape
    with PoseLandmarker.create_from_options(options_pose) as landmarker:
        results = landmarker.detect(mp_image)
    frame,_,_ = draw_landmarks_pose(frame,results)
    if(len(results.pose_landmarks)!=0):
        X_pose = [results.pose_landmarks[0][i].x for i in range(len(results.pose_landmarks[0]))]
        Y_pose = [results.pose_landmarks[0][i].y for i in range(len(results.pose_landmarks[0]))]
        landmarks = results.pose_landmarks[0]
        min_x = min(X_pose)*w -130
        max_x = max(X_pose)*w +130
        min_y = min(Y_pose)*h -130
        max_y = max(Y_pose)*h +130
        img = frame[int(min_y):int(max_y),int(min_x):int(max_x)]
        cv2.imwrite('C:/Projects/ISL-to-text/INCLUDE 50/MP_Models/crop.png',img)
        image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.array(img))
        with HandLandmarker.create_from_options(options_hand) as landmarker:
            landmarks = landmarker.detect(image)
        an_img,x,y = draw_landmarks_hand(img,landmarks)
        if(len(landmarks.hand_landmarks)!=0):
            X_left = [0 for i in range(21)]
            X_right = [0 for i in range(21)]
            Y_left = [0 for i in range(21)]
            Y_right = [0 for i in range(21)]
            for j in range(len(landmarks.hand_landmarks)):
                if landmarks.handedness[j][0].display_name == 'Right':
                    X_right = [landmarks.hand_landmarks[j][i].x for i in range(len(landmarks.hand_landmarks[j]))]
                    Y_right = [landmarks.hand_landmarks[j][i].y for i in range(len(landmarks.hand_landmarks[j]))]
                if landmarks.handedness[j][0].display_name == 'Left':
                    X_left = [landmarks.hand_landmarks[j][i].x for i in range(len(landmarks.hand_landmarks[j]))]
                    Y_left = [landmarks.hand_landmarks[j][i].y for i in range(len(landmarks.hand_landmarks[j]))]
            dt = [*X_pose,*Y_pose,*X_left,*Y_left,*X_right,*Y_right,word]
            print(pd.DataFrame(dt).T)
            data = pd.concat([data,pd.DataFrame(dt).T])
            cv2.imwrite('C:/Projects/ISL-to-text/INCLUDE 50/MP_Models/lmed'+file+'.png',an_img)
data.to_csv('C:\Projects\ISL-to-text\INCLUDE 50\Word level data/'+word+'.csv')




        0         1         2         3         4         5         6    \
0  0.534977  0.551095  0.559139  0.564903  0.526483  0.518967  0.510994   

        7         8         9    ...       141       142       143       144  \
0  0.579321  0.505561  0.554191  ...  0.074779  0.157729  0.117867  0.091306   

       145       146       147       148       149    150  
0  0.07216  0.164582  0.135074  0.113023  0.094968  angry  

[1 rows x 151 columns]




        0         1         2         3         4         5         6    \
0  0.499329  0.511712  0.520211  0.527218  0.488838  0.481872  0.474852   

        7         8         9    ... 141 142 143 144 145 146 147 148 149  \
0  0.538835  0.468964  0.517445  ...   0   0   0   0   0   0   0   0   0   

     150  
0  angry  

[1 rows x 151 columns]




        0         1         2         3         4         5         6    \
0  0.536189  0.551367  0.558678  0.564701  0.525808  0.518358  0.510009   

        7         8         9    ...       141       142       143      144  \
0  0.578015  0.502063  0.553199  ...  0.129889  0.167938  0.123677  0.11791   

        145       146       147       148       149    150  
0  0.130466  0.165874  0.129703  0.125295  0.137225  angry  

[1 rows x 151 columns]




In [16]:
print(landmarks.handedness[0][0].display_name)

Right


In [16]:
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2.imread('C:\Projects\ISL-to-text\Word Level ISL\open-hand-on-white-background-260nw-407475856.webp'))
with HandLandmarker.create_from_options(options_hand) as landmarker:
       landmarks = landmarker.detect(image)
print(landmarks.handedness)

RuntimeError: Unable to open file at c:\Users\arpit\AppData\Local\Programs\Python\Python310\lib\site-packages/C:\Projects\ISL-to-text\INCLUDE 50\MP_Models\hand_landmarker.task, errno=22

Check for which hand, pair with check for how many hands

Notes:
1. If one of the hands is undetected/random, that hand can take any position, use to augment data, use atleast 20 random undetected hand positions to crfeate 20x new entries.
2. Match the number and side of hands detected in each category and eleminate the odd ones.
3. Augment the data based on the open and closed palm simulation.
4. augment the data using spacial transformations like the venus paper.
5. fred data into embedding layer (returns 3 vectors of 42,1)
6. Throw into a Transformer Encoder without the positional engoding.
7. Use a transformer decoder with avalible dictionary.

In [35]:
print(file[:-8])

ANGRY
