In [None]:
!pip install opencv-python
!pip install mediapipe
!pip install scikit-learn

# 1. Importing libraries and defining dataset

In [None]:
# For capturing hand coordinates
import cv2
import mediapipe as mp

# For processing data
import pandas as pd
import numpy as np

#For check file
import os
import time
import webbrowser
from tkinter import messagebox

In [None]:
# dataset = pd.read_csv('./Dataset/hand_dataset_1000_24.csv')
# dataset = pd.read_csv('./Dataset/hand_dataset_3000.csv')
dataset = pd.read_csv('./Dataset/hand_dataset_MAI_3000.csv')
# dataset = pd.read_csv('./Dataset/hand_dataset_MAI_3000_space_del.csv')

# Show dataset first five data
dataset.head()

In [None]:
# Show dataset overview, should return 1000 for each alphabet (excluding y and z)
dataset['class'].value_counts()

# 2. Creating Train and Test Data
- We use *train_test_split* since we don't really have test dataset.
- Normalizing dataset can be ignored, since we predict our data directly using raw handlandmark. 

In [None]:
# Defining X and Y from dataset for training and testing

X = dataset.iloc[:, 1:].values
Y = dataset.iloc[:, 0].values

In [None]:
from sklearn.model_selection import train_test_split

# We will take 33% from 1000 for our test data.
# Recommended value 80:20, 67:33, 50:50
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.80)

In [None]:
#Normalize / Standarize dataset

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# 3. Creating classifier model for our alphabet recognition.
- *n_neighbors* can be adjusted as we provide graph for mean errors for each *n_neighbors*

In [None]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(X_train, y_train)

# 4. Calculate model accuracy

In [None]:
y_pred = classifier.predict(X_test)

In [None]:
from sklearn.metrics import classification_report, accuracy_score
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

# 5. Show graph for adjusting number of *n_neighbors*

In [None]:
error = []

# Calculating error for K values between 1 and 40
for i in range(1, 40):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    pred_i = knn.predict(X_test)
    error.append(np.mean(pred_i != y_test))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
         markerfacecolor='blue', markersize=10)
plt.title('Error Rate K Value')
plt.xlabel('K Value')
plt.ylabel('Mean Error')

# 6. Intialize Mediapipe Hands for alphabet recognition.

In [None]:
# Initialize mediapipe hand

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# init Check Var MAI
M = False
A = False
I = False

In [None]:
# Count File In Folder
def countFile(Ipath):
    initial_count = 0
    for path in os.listdir(Ipath):
        if os.path.isfile(os.path.join(Ipath, path)):
            initial_count += 1
    return initial_count
# Create Function to Detect complete label "M","A","I"
def isComplete():
    if(M == True and A == True and I == True):
        return True
    else:
        return False
def checkAlpha(alph):
    global M
    global A
    global I
    if(str(alph) == "M" and M == False):
        M = True
    if(str(alph) == "A" and A == False):
        A = True
    if(str(alph) == "I" and I == False):
        I = True
def DisplayCheckAlpha():
    global M
    global A
    global I
    result = "_|_|_"
    arr = result.split("|");
    if(M==True):
        arr[0]="M"
    if(A==True):
        arr[1]="A"
    if(I==True):
        arr[2]="I"
    print("M : "+str(M)+" A : "+str(A)+" I : "+str(I))
    return arr[0]+arr[1]+arr[2]
# Write detect file
def writeFile(iAlpha,img):
    BasePath = "./archive/ceremony/"+iAlpha
    isExist = os.path.exists(BasePath)
    if not isExist:
        os.makedirs(BasePath)
    cv2.imwrite(os.path.join(BasePath, f'{countFile(BasePath):04d}' +'.jpg'), img)

In [None]:
# Initialize mediapipe hand capture webcam
cap = cv2.VideoCapture(0)

with mp_hands.Hands(
    max_num_hands = 1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
    start_time = time.time()
    DELAY_SECONDS = 5
    while cap.isOpened():
        success, image = cap.read()

        if not success:
            print("Ignoring empty camera frame.")
            # If loading a video, use 'break' instead of 'continue'.
            continue

        # Flip the image horizontally for a later selfie-view display, and convert
        # the BGR image to RGB.
        image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)

        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        image.flags.writeable = False
        results = hands.process(image)

        # Draw the hand annotations on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # Get status box
        cv2.rectangle(image, (0,0), (200, 70), (245, 90, 16), -1)
        cv2.putText(image, DisplayCheckAlpha(), (20,25), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        # Display Class
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                coords = hand_landmarks.landmark
                mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                coords = list(np.array([[landmark.x, landmark.y] for landmark in coords]).flatten())
#                 coords = list(np.array([[landmark.x, landmark.y, landmark.z] for landmark in coords]).flatten())
                coords = scaler.transform([coords])
                
                # Alternative for dataset using z coordinates.
                # Z coordinates is not recommended, since you need to adjust your distance from camera.
                
                
                predicted = classifier.predict(coords)

            cv2.putText(image, "Found : "+str(predicted[0])
                        , (20,55), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            if(len(predicted) > 0):
                # writeFile(predicted[0],image)
                checkAlpha(predicted[0]);
        cv2.imshow('Hand Tracking', image)
#         if(isComplete() == True):
#             break
        # Press esc to close webcam
        if cv2.waitKey(5) & 0xFF == 27:
            break
cap.release()
cv2.destroyAllWindows()
if(isComplete() == True):
    webbrowser.open("https://www.youtube.com/watch?v=GaKNGh9JK2k")
