In [2]:
!pip install opencv-python
!pip install mediapipe
!pip install scikit-learn



# 1. Importing libraries and defining dataset

In [1]:
# For capturing hand coordinates
import cv2
import mediapipe as mp

# For processing data
import pandas as pd
import numpy as np

In [2]:
dataset = pd.read_csv('hand_dataset.csv')

# Show dataset first five data
dataset.head()

Unnamed: 0,class,x1,y1,x2,y2,x3,y3,x4,y4,x5,...,x17,y17,x18,y18,x19,y19,x20,y20,x21,y21
0,A,0.530803,0.581163,0.41399,0.501149,0.355407,0.369223,0.345517,0.263534,0.35425,...,0.534037,0.476981,0.665,0.338577,0.605741,0.320499,0.588447,0.400885,0.594126,0.455363
1,A,0.503664,0.616673,0.377512,0.536617,0.317852,0.408111,0.30645,0.296595,0.31667,...,0.494619,0.50849,0.635405,0.349954,0.561715,0.359349,0.546541,0.440563,0.551058,0.498263
2,A,0.267808,0.670679,0.18736,0.610215,0.14114,0.503103,0.133638,0.422246,0.138989,...,0.29892,0.586336,0.368706,0.504098,0.369881,0.461587,0.362212,0.517269,0.349507,0.565708
3,A,0.289091,0.751779,0.167962,0.654705,0.100007,0.499485,0.095107,0.371244,0.122449,...,0.294725,0.624627,0.457846,0.480413,0.393687,0.455553,0.365366,0.547497,0.356335,0.612805
4,A,0.28516,0.762803,0.161197,0.667723,0.094453,0.5121,0.090644,0.383932,0.113567,...,0.292644,0.635424,0.450271,0.489606,0.388296,0.466932,0.361084,0.560643,0.352804,0.623975


In [5]:
# Show dataset overview, should return 1000 for each alphabet (excluding y and z)
dataset['class'].value_counts()

A    1000
B    1000
Y    1000
X    1000
W    1000
V    1000
U    1000
T    1000
S    1000
R    1000
Q    1000
P    1000
O    1000
N    1000
M    1000
L    1000
K    1000
J    1000
I    1000
H    1000
G    1000
F    1000
E    1000
D    1000
C    1000
Z    1000
Name: class, dtype: int64

# 2. Creating Train and Test Data
- We use *train_test_split* since we don't really have test dataset.
- Normalizing dataset can be ignored, since we predict our data directly using raw handlandmark. 

In [6]:
# Defining X and Y from dataset for training and testing

X = dataset.iloc[:, 1:].values
Y = dataset.iloc[:, 0].values

In [7]:
from sklearn.model_selection import train_test_split

# We will take 33% from 1000 for our test data.
# Recommended value 80:20, 67:33, 50:50
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33)

In [8]:
#Normalize / Standarize dataset

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# 3. Creating classifier model for our alphabet recognition.
- *n_neighbors* can be adjusted as we provide graph for mean errors for each *n_neighbors*

In [9]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)

# 4. Calculate model accuracy

In [10]:
y_pred = classifier.predict(X_test)

In [11]:
from sklearn.metrics import classification_report, accuracy_score
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           A       0.98      0.99      0.98       347
           B       0.98      1.00      0.99       324
           C       1.00      1.00      1.00       336
           D       0.99      0.97      0.98       337
           E       1.00      0.97      0.98       327
           F       1.00      0.99      1.00       308
           G       0.98      1.00      0.99       314
           H       1.00      0.99      1.00       356
           I       1.00      0.99      0.99       348
           J       0.98      0.99      0.99       351
           K       0.99      0.99      0.99       319
           L       1.00      1.00      1.00       326
           M       0.94      0.96      0.95       323
           N       0.96      0.93      0.94       325
           O       0.97      0.99      0.98       317
           P       1.00      0.99      0.99       344
           Q       0.99      0.99      0.99       338
           R       0.97    

# 5. Show graph for adjusting number of *n_neighbors*

In [12]:
error = []

# Calculating error for K values between 1 and 40
for i in range(1, 40):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    pred_i = knn.predict(X_test)
    error.append(np.mean(pred_i != y_test))

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
         markerfacecolor='blue', markersize=10)
plt.title('Error Rate K Value')
plt.xlabel('K Value')
plt.ylabel('Mean Error')

# 6. Intialize Mediapipe Hands for alphabet recognition.

In [13]:
# Initialize mediapipe hand

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [15]:
# Initialize mediapipe hand capture webcam

cap = cv2.VideoCapture(0)
with mp_hands.Hands(
    max_num_hands = 1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        success, image = cap.read()

        if not success:
            print("Ignoring empty camera frame.")
            # If loading a video, use 'break' instead of 'continue'.
            continue

        # Flip the image horizontally for a later selfie-view display, and convert
        # the BGR image to RGB.
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)

        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        image.flags.writeable = False
        results = hands.process(image)

        # Draw the hand annotations on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                coords = hand_landmarks.landmark
                mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                coords = list(np.array([[landmark.x, landmark.y] for landmark in coords]).flatten())
                coords = scaler.transform([coords])
                
                # Alternative for dataset using z coordinates.
                # Z coordinates is not recommended, since you need to adjust your distance from camera.
#                 coords = list(np.array([[landmark.x, landmark.y, landmark.z] for landmark in coords]).flatten())
                
                predicted = classifier.predict(coords)

            # Get status box
            cv2.rectangle(image, (0,0), (100, 60), (245, 90, 16), -1)

            # Display Class
            cv2.putText(image, 'CLASS'
                        , (20,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
            cv2.putText(image, str(predicted[0])
                        , (20,45), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.imshow('MediaPipe Hands', image)

        # Press esc to close webcam
        if cv2.waitKey(5) & 0xFF == 27:
            break
cap.release()
cv2.destroyAllWindows()
