## initial code for our Hand Gesture Recognition

In [117]:
# ML models imports
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
from skimage.feature import hog

# destribution models imports
from scipy.stats import randint, uniform

# Data manipulation imports
import numpy as np
import pandas as pd
import tqdm as tqdm

# visualisation models imports
import matplotlib.pyplot as plt

# image processing imports
import skimage.io as io
import cv2
from skimage.transform import resize

# dealing with files
import os

# visual dataset (to test randomized gridsearch not needed for now)
from sklearn.datasets import make_hastie_10_2  # to test our models

# from utils import prepareData, LoadData, FeatureExtraction, preprocess
import csv

In [118]:
def segment(image):
    blured_image = cv2.GaussianBlur(image, (7, 7), 0)
    ycbcr_image = cv2.cvtColor(blured_image, cv2.COLOR_BGR2YCrCb)
    # Extract the Cr channel
    cr_channel = ycbcr_image[:,:,1]

    # Apply thresholding to obtain a binary image
    _, binary_img = cv2.threshold(cr_channel,0,255,cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # Define the structuring element for the closing operation
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))

    # Perform the closing operation
    closed_img = cv2.morphologyEx(binary_img, cv2.MORPH_CLOSE, kernel)

    # Find the contours in the binary image
    contours, hierarchy = cv2.findContours(closed_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filling the contours on a copy of the original image
    # img_contours = cv2.cvtColor(cr_channel, cv2.COLOR_GRAY2BGR)
    # cv2.drawContours(img_contours, contours, -1, (0, 0, 0), -1)

    segmented_image = closed_img.copy()
    cv2.drawContours(segmented_image, contours, -1, 255, -1)

    return segmented_image

In [208]:
def FeatureExtraction(image):
        
    # Extract the hog features
    # block_norm uses L2 norm with hysterisis for reducing effect of illuminacity
    # transform_sqrt for applying gamma correction
    preprocessed_image = segment(image)
    
    # resized_image = resize(preprocessed_image,(64,128))   # downscaing from approx 2500x4000 to 500x500
    resized_image = resize(preprocessed_image,(64,128))

    hog_features = hog(resized_image, block_norm='L2-Hys', feature_vector=True, transform_sqrt=True, pixels_per_cell=(12, 12), cells_per_block=(2, 2))

    return hog_features

In [209]:
def LoadData():
    Features=[]
    labels=[]

    i = 0

    
    for gender in ["men","Women"]:
        datadir = r"Dataset\{}".format(gender)
        # loop over gender
        for hand in os.listdir(datadir): 
            # loop over each class [0,1,2,3,4,5]
            for img in os.listdir(datadir+ "/" +str(hand)):
                # ignoring anything except images
                if((img.split('.')[-1]).lower() not in ['jpg','png','jpeg']):
                    continue

                # loading our images
                img_array=io.imread(datadir + "/" + str(hand) + "/" + img )  # approx 2500 * 4000

                # append extracted features to Featurees list   
                Feature = FeatureExtraction(img_array)        
                Features.append(Feature) 

                # append class of image.
                labels.append(hand)  

                print(f'image Number: {i}')
                i+=1
                # # print(f'saving block : {i//100}')
                # # np.save(f'Features/{i}.npy', Feature)
                # # Features = []
                # # labels = []
                # with open("Features.csv", 'a') as csvfile:
                #     csvwriter = csv.writer(csvfile)
                #     csvwriter.writerow(Feature)

    return np.asarray(Features),np.asarray(labels)

In [210]:
Features, labels = LoadData() 

image Number: 0
image Number: 1
image Number: 2
image Number: 3
image Number: 4
image Number: 5
image Number: 6
image Number: 7
image Number: 8
image Number: 9
image Number: 10
image Number: 11
image Number: 12
image Number: 13
image Number: 14
image Number: 15
image Number: 16
image Number: 17
image Number: 18
image Number: 19
image Number: 20
image Number: 21
image Number: 22
image Number: 23
image Number: 24
image Number: 25
image Number: 26
image Number: 27
image Number: 28
image Number: 29
image Number: 30
image Number: 31
image Number: 32
image Number: 33
image Number: 34
image Number: 35
image Number: 36
image Number: 37
image Number: 38
image Number: 39
image Number: 40
image Number: 41
image Number: 42
image Number: 43
image Number: 44
image Number: 45
image Number: 46
image Number: 47
image Number: 48
image Number: 49
image Number: 50
image Number: 51
image Number: 52
image Number: 53
image Number: 54
image Number: 55
image Number: 56
image Number: 57
image Number: 58
image N

In [195]:
# labels

In [196]:
# np.save('labels.npy', labels)
# np.save('features.npy', Features)
# labels=np.load('labels.npy')
# len(labels)

In [197]:
# Use Pandas to read the CSV file in chunks of 1000 rows
# chunk_size = 10
# for chunk in pd.read_csv('Features.csv', chunksize=chunk_size):
#     # Process each chunk of data here
#     print(chunk.head())

In [198]:
# test = np.load('Features/1.npy')
# test.max()

In [199]:
# img = cv2.imread('1_men (2).JPG')
# lower_rgb=np.array([0, 0, 0],dtype="uint8")
# upper_rgb=np.array([230, 230, 230],dtype="uint8")
# skin_region=cv2.inRange(img,lower_rgb,upper_rgb)
# cv2.imwrite("segmented_2.jpg",skin_region)

* # Selecting the best model

- ### Define hyperparameter grids for each model

In [211]:
param_distributions = {
    'RandomForestClassifier': {
        'n_estimators': randint(50, 500),
        'max_depth': randint(2, 20),
        'min_samples_split': randint(2, 10),
        'min_samples_leaf': randint(1, 5),
        'max_features': ['sqrt', 'log2']
    },
    'GradientBoostingClassifier': {
        'learning_rate': uniform(0.01, 0.2),
        'n_estimators': randint(50, 500),
        'max_depth': randint(2, 20),
        'min_samples_split': randint(2, 10),
        'min_samples_leaf': randint(1, 5),
        'max_features': ['sqrt', 'log2']
    },
    'SVC': {
        'C': uniform(0.01, 10),
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'degree': randint(2, 5),
        'gamma': ['scale', 'auto'] + list(np.arange(0.1, 1, 0.1))
    },
    'LogisticRegression': {
        'C': uniform(0.01, 10),
        'penalty': ['l1', 'l2', 'elasticnet', 'none'],
        'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
        'max_iter': randint(50, 500)
    },
    'DecisionTreeClassifier': {
        'criterion': ['gini', 'entropy'],
        'splitter': ['best', 'random'],
        'max_depth': randint(2, 20),
        'min_samples_split': randint(2, 10),
        'min_samples_leaf': randint(1, 5),
        'max_features': ['sqrt', 'log2']
    },
    'KNeighborsClassifier': {
        'n_neighbors': randint(3, 30),
        'weights': ['uniform', 'distance'],
        'algorithm': ['ball_tree', 'kd_tree', 'brute'],
        'leaf_size': randint(10, 100)
    },
    'GaussianNB': {
        'var_smoothing': uniform(1e-09, 1e-07)
    },
    'MLPClassifier': {
        'hidden_layer_sizes': [(50, 50), (100,), (100, 50)],
        'activation': ['identity', 'logistic', 'tanh', 'relu'],
        'solver': ['lbfgs', 'sgd', 'adam'],
        'alpha': uniform(0.0001, 0.01),
        'max_iter': randint(100, 1000)
    }
}

- ### Create a list of models to train (as example)

In [212]:
models = [
    RandomForestClassifier(),
    GradientBoostingClassifier(),
    SVC(),
    KNeighborsClassifier(),
    GaussianNB()
]

- ### Loop over the models and fit  

In [218]:
# load our dummy data to test the randomizedSearche function
x,y = Features,labels
df = pd.DataFrame(x)
df['Y'] = y

train, test = train_test_split(df, test_size=0.2) # this function shuffles the data points, and splits the data into
                                                  # 80% training set and 20% test set (indicated by test_size=0.2)
X_train, Y_train = train.iloc[:, :-1], train.iloc[:, -1]
X_test, Y_test = test.iloc[:, :-1], test.iloc[:, -1]

In [214]:
# load our dummy data to test the randomizedSearche function
# df= pd.read_csv('Features.csv')
# df['Y'] = labels

# train, test = train_test_split(df, test_size=0.2) # this function shuffles the data points, and splits the data into
#                                                   # 80% training set and 20% test set (indicated by test_size=0.2)
# X_train, Y_train = train.iloc[:, :-1], train.iloc[:, -1]
# X_test, Y_test = test.iloc[:, :-1], test.iloc[:, -1]
# Fit a simple decision tree first

In [220]:
for i, model in enumerate(models):
    print(f'Training Model {i+1}/{len(models)}: {str(model)[:-2]}')
    # Define randomized grid search
    random_search = RandomizedSearchCV(model, param_distributions[str(model)[:-2]], n_iter=10,cv=5, n_jobs=-1) # n_jobs means number of jobs to run in parallel. None means 1,
                                                                                                                # -1 means using all processors 😈.
    # Fit the randomized grid search to the data
    random_search.fit(X_train, Y_train)
    print(f'Best score: {random_search.best_score_:.3f}')
    print(f'Best parameters: {random_search.best_params_}\n')

Training Model 1/5: RandomForestClassifier
Best score: 0.607
Best parameters: {'max_depth': 13, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 6, 'n_estimators': 275}

Training Model 2/5: GradientBoostingClassifier
Best score: 0.587
Best parameters: {'learning_rate': 0.05976068850150411, 'max_depth': 2, 'max_features': 'log2', 'min_samples_leaf': 3, 'min_samples_split': 2, 'n_estimators': 408}

Training Model 3/5: SVC
Best score: 0.607
Best parameters: {'C': 8.291249026598944, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf'}

Training Model 4/5: KNeighborsClassifier
Best score: 0.522
Best parameters: {'algorithm': 'kd_tree', 'leaf_size': 41, 'n_neighbors': 3, 'weights': 'distance'}

Training Model 5/5: GaussianNB
Best score: 0.504
Best parameters: {'var_smoothing': 2.6525382160701654e-09}

