In [3]:
import cv2
import glob
from google.protobuf.json_format import MessageToDict
import mediapipe as mp
import numpy as np
import pandas as pd
import re
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [4]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_detection = mp.solutions.face_detection
mp_hands = mp.solutions.hands

In [5]:

files = glob.glob('./leapGestRecog/*/0[13457]*/*') + glob.glob('./leapGestRecog/*/10*/*')

In [6]:


def landmarks_to_df(landmarks, df=None, target=None):
    
   
    landmarks = MessageToDict(landmarks[0])['landmark']
    
    x = np.empty(len(landmarks))
    y = np.empty(len(landmarks))
    z = np.empty(len(landmarks))
    
    for i, v in enumerate(landmarks):
        x[i] = v['x']
        y[i] = v['y']
        z[i] = v['z']
    
    
    scaler = MinMaxScaler()
    x = scaler.fit_transform(x.reshape(-1, 1)).reshape(1, -1)
    y = scaler.fit_transform(y.reshape(-1, 1)).reshape(1, -1)
    z = scaler.fit_transform(z.reshape(-1, 1)).reshape(1, -1)
    
    if target is not None:
        features = np.c_[x, y, z, target]
    else:
        features = np.c_[x, y, z]
    
    if df is not None:
        df = pd.concat((df, pd.DataFrame(features)), axis=0)
    else:
        df = pd.DataFrame(features)
    
    return df

In [7]:


mapping_dict = {'03': 1, '07': 2, '10': 3}
df = pd.DataFrame()

with mp_hands.Hands(max_num_hands=1, static_image_mode=True) as hands:
    
        for idx, file in enumerate(files):
            image = cv2.imread(file)
            results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

            if not results.multi_hand_landmarks:
                continue
            
            target = mapping_dict.get(re.findall('frame_\d{2}_(\d{2})', file)[0], 0)
            df = landmarks_to_df(results.multi_hand_landmarks, df, target)

error: OpenCV(4.8.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [8]:

df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.932551,1.0,0.840035,0.616619,0.396974,0.560857,0.359051,0.326545,0.304288,0.375863,...,0.537024,0.199077,0.376006,0.601129,0.753265,0.197671,0.454545,0.762016,1.0,0.0
0,0.932551,1.0,0.840035,0.616619,0.396974,0.560857,0.359051,0.326545,0.304288,0.375863,...,0.537024,0.199077,0.376006,0.601129,0.753265,0.197671,0.454545,0.762016,1.0,0.0
0,0.543261,0.861219,1.0,0.935152,0.785339,0.610136,0.583174,0.559931,0.520734,0.36689,...,0.546823,0.482218,0.528118,0.642798,0.699108,0.567163,0.658,0.845135,1.0,0.0
0,0.543261,0.861219,1.0,0.935152,0.785339,0.610136,0.583174,0.559931,0.520734,0.36689,...,0.546823,0.482218,0.528118,0.642798,0.699108,0.567163,0.658,0.845135,1.0,0.0
0,0.576677,0.875788,1.0,0.948283,0.789037,0.685325,0.60591,0.556439,0.486062,0.419823,...,0.314369,0.111222,0.012356,0.313299,0.555948,0.147427,0.180956,0.608599,1.0,0.0


In [9]:
df.shape

(4709, 64)

In [10]:
df[63].value_counts()

0.0    2579
2.0    1610
1.0     520
Name: 63, dtype: int64

In [11]:


df.to_csv('./data.csv', index=False)

In [12]:

df = pd.read_csv('./data.csv')
df.columns = df.columns.astype(int)

In [13]:

X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :-1], df.iloc[:, -1], test_size=0.2, stratify=df[63], random_state=29)

In [14]:

model = LogisticRegression(max_iter=400)
model.fit(X_train, y_train)
preds = model.predict(X_test)

In [15]:

print(classification_report(y_test, preds))

              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99       516
         1.0       0.97      0.97      0.97       104
         2.0       0.99      1.00      1.00       322

    accuracy                           0.99       942
   macro avg       0.99      0.99      0.99       942
weighted avg       0.99      0.99      0.99       942



In [16]:

model.fit(df.iloc[:, :-1], df.iloc[:, -1])

In [17]:

cap = cv2.VideoCapture(0)

with mp_face_detection.FaceDetection() as face_detection:
    with mp_hands.Hands(max_num_hands=1) as hands:
        while cap.isOpened():
            success, image = cap.read()
            if not success:
                print('Ignoring empty camera frame.')
                continue

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = face_detection.process(image)
            
            if results.detections and MessageToDict(results.detections[0])['score'][0] > 0.85:
                
                results = hands.process(image)
                
                if results.multi_hand_landmarks:
                    gesture = model.predict_proba(landmarks_to_df(results.multi_hand_landmarks))[0]
                    
                    if max(gesture) > 0.5:
                        gesture = np.argmax(gesture)
                        
                        match gesture:
                            case 0:
                                text = 'Unknown sign!'
                            case 1:
                                text = 'Hello!'
                            case 2:
                                text = 'OK!'
                            case 3:
                                text = 'Scary!'
                    
                    
                    else:
                        text = 'Is it a sign?'
            
                
                    cv2.putText(image, text, (170, 50),
                                cv2.FONT_HERSHEY_COMPLEX,
                                1.3, (0, 0, 255), 2)
            
            else:
                cv2.putText(image, 'Is anyone here?', (170, 50),
                            cv2.FONT_HERSHEY_COMPLEX,
                            1.3, (0, 0, 255), 2)
            
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            cv2.imshow('MediaPipe Hands', image)
            if cv2.waitKey(5) & 0xFF == 27:
                break

cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

: 