## initial code for our Hand Gesture Recognition

In [84]:
# ML models imports
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

# destribution models imports
from scipy.stats import randint, uniform

# Data manipulation imports
import numpy as np
import pandas as pd
import tqdm as tqdm

# visualisation models imports
import matplotlib.pyplot as plt

# image processing imports
import skimage.io as io
import cv2
from skimage.transform import resize

# dealing with files
import os

# visual dataset (to test randomized gridsearch not needed for now)
from sklearn.datasets import make_hastie_10_2  # to test our models

# Data processing


In [85]:
'''
Input:
    images: a list or array-like object containing the input data, where each row represents an image
    labels: a list or array-like object containing the corresponding output labels for each image

Output:
    x: a pandas DataFrame containing the input data (images)
    y: a pandas Series containing the output (predicted) data (labels)

Functionality:
    This function takes two inputs, images and labels, and returns a tuple of input and output data,
    respectively. First, it converts the input data (images) into a pandas DataFrame,
    and then it adds a new column 'Target' to the DataFrame with corresponding output labels.
    The input data is then extracted from the DataFrame by selecting all the columns except the last column,
    which represents the output data. The output data is also extracted by selecting only the last column of the DataFrame.
    Finally, the input and output data are returned as x and y, respectively. this function is used for dealing with images 
    easily and smoothly.
Example:
    x, y = prepareData(images,labels) 

'''
def prepareData(images,labels):
    df=pd.DataFrame(images) 
    df['Target'] = labels
    x = df.iloc[:,:-1]  
    y = df.iloc[:,-1] 
    return np.asarray(x),np.asarray(y)



'''
Input:
    Gender: a list containing  

Output:
    Features: a list containing extracted features from the images
    labels: a list containing extracted labels

Functionality:
    This function is used to process data from their directory and extract features from the images.

Example:
    Features, labels = LoadData() 

Note: 
    [*] Structure of Dataset foulder should be as follows in the project directory:

                       Dataset
                    men       Women
              0 1 2 3 4 5    0 1 2 3 4 5
           
        -> each number is a directory contains the images of that class.

    [*] this structure existed initially in the given dataset i didn't came up with :) 



'''
def LoadData():
    Features=[]
    labels=[]

    for gender in ["men","Women"]:
        datadir = r"Dataset\{}".format(gender)
        # loop over gender
        for hand in os.listdir(datadir): 
            # loop over each class [0,1,2,3,4,5]
            for img in os.listdir(datadir+ "/" +str(hand)):
                # ignoring anything except images
                if((img.split('.')[-1]).lower() not in ['jpg','png','jpeg']):
                    continue

                # loading our images
                img_array=io.imread(datadir + "/" + str(hand) + "/" + img ,as_gray=True)  # approx 2500 * 4000

                # append extracted features to Featurees list           
                Features.append(FeatureExtraction(img_array)) 

                # append class of image.
                labels.append(hand)     

    return np.asarray(Features),np.asarray(labels)


'''
input:
    images: an image of hand.

output:
    Extracted features ready to be used in training.

Functionality: 
    This function is used for Extracting features from images.
Example:
    features = extractFeatures(image)
'''
def FeatureExtraction(image):
    # this written code is an initial code for extracting features
    resized = resize(image,(500,500))   # downscaing from approx 2500x4000 to 500x500

    '''

        TODO: Feature Extraction code should be implemented here.  

    '''

    image = np.array(resized).flatten() # flatten our image to be used as input vector to our model

    return image

    



In [86]:
LoadData()

(array([[1.        , 1.        , 1.        , ..., 0.57579441, 0.57231646,
         0.57919813],
        [0.55088159, 0.56361918, 0.57627312, ..., 0.52987367, 0.53041833,
         0.5319493 ],
        [0.55018642, 0.54455157, 0.53863661, ..., 0.44312991, 0.43867312,
         0.43528299],
        ...,
        [0.70066962, 0.69860664, 0.69564254, ..., 0.74752134, 0.74584222,
         0.74773582],
        [0.86934983, 0.87384208, 0.86957133, ..., 0.64137493, 0.64072342,
         0.64580521],
        [0.86898109, 0.86972548, 0.87227469, ..., 0.85155641, 0.85046445,
         0.851305  ]]),
 array(['0', '0', '0', '0', '0', '1', '1', '1', '1', '1', '2', '2', '2',
        '2', '2', '3', '3', '3', '3', '3', '4', '4', '4', '4', '5', '5',
        '5', '5', '5', '0', '0', '0', '0', '0', '1', '1', '1', '1', '1',
        '2', '2', '2', '2', '2', '3', '3', '3', '3', '3', '4', '4', '4',
        '4', '4', '5', '5', '5', '5', '5'], dtype='<U1'))

In [62]:
#! pip install imageio[pyav] (in case of having error while loading some of images try this one)

* # Selecting the best model

- ### Define hyperparameter grids for each model

In [2]:
param_distributions = {
    'RandomForestClassifier': {
        'n_estimators': randint(50, 500),
        'max_depth': randint(2, 20),
        'min_samples_split': randint(2, 10),
        'min_samples_leaf': randint(1, 5),
        'max_features': ['sqrt', 'log2']
    },
    'GradientBoostingClassifier': {
        'learning_rate': uniform(0.01, 0.2),
        'n_estimators': randint(50, 500),
        'max_depth': randint(2, 20),
        'min_samples_split': randint(2, 10),
        'min_samples_leaf': randint(1, 5),
        'max_features': ['sqrt', 'log2']
    },
    'SVC': {
        'C': uniform(0.01, 10),
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'degree': randint(2, 5),
        'gamma': ['scale', 'auto'] + list(np.arange(0.1, 1, 0.1))
    },
    'LogisticRegression': {
        'C': uniform(0.01, 10),
        'penalty': ['l1', 'l2', 'elasticnet', 'none'],
        'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
        'max_iter': randint(50, 500)
    },
    'DecisionTreeClassifier': {
        'criterion': ['gini', 'entropy'],
        'splitter': ['best', 'random'],
        'max_depth': randint(2, 20),
        'min_samples_split': randint(2, 10),
        'min_samples_leaf': randint(1, 5),
        'max_features': ['sqrt', 'log2']
    },
    'KNeighborsClassifier': {
        'n_neighbors': randint(3, 30),
        'weights': ['uniform', 'distance'],
        'algorithm': ['ball_tree', 'kd_tree', 'brute'],
        'leaf_size': randint(10, 100)
    },
    'GaussianNB': {
        'var_smoothing': uniform(1e-09, 1e-07)
    },
    'MLPClassifier': {
        'hidden_layer_sizes': [(50, 50), (100,), (100, 50)],
        'activation': ['identity', 'logistic', 'tanh', 'relu'],
        'solver': ['lbfgs', 'sgd', 'adam'],
        'alpha': uniform(0.0001, 0.01),
        'max_iter': randint(100, 1000)
    }
}

- ### Create a list of models to train (as example)

In [3]:
models = [
    RandomForestClassifier(),
    GradientBoostingClassifier(),
    SVC(),
    LogisticRegression(),
    DecisionTreeClassifier(),
    KNeighborsClassifier(),
    GaussianNB(),
    MLPClassifier()
]

- ### Loop over the models and fit  

In [4]:
# load our dummy data to test the randomizedSearche function
x,y = make_hastie_10_2()
df = pd.DataFrame(x)
df['Y'] = y

train, test = train_test_split(df, test_size=0.2) # this function shuffles the data points, and splits the data into
                                                  # 80% training set and 20% test set (indicated by test_size=0.2)
X_train, Y_train = train.iloc[:, :-1], train.iloc[:, -1]
X_test, Y_test = test.iloc[:, :-1], test.iloc[:, -1]
# Fit a simple decision tree first

In [21]:
for i, model in enumerate(models):
    print(f'Training Model {i+1}/{len(models)}: {str(model)[:-2]}')
    # Define randomized grid search
    random_search = RandomizedSearchCV(model, param_distributions[str(model)[:-2]], n_iter=10,cv=5, n_jobs=-1) # n_jobs means number of jobs to run in parallel. None means 1,
                                                                                                                # -1 means using all processors 😈.
    # Fit the randomized grid search to the data
    random_search.fit(X_train, Y_train)
    print(f'Best score: {random_search.best_score_:.3f}')
    print(f'Best parameters: {random_search.best_params_}\n')

Training Model 1/8: RandomForestClassifier
Best score: 0.882
Best parameters: {'max_depth': 18, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'min_samples_split': 7, 'n_estimators': 302}

Training Model 2/8: GradientBoostingClassifier
Best score: 0.941
Best parameters: {'learning_rate': 0.18591736381178015, 'max_depth': 5, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 251}

Training Model 3/8: SVC
Best score: 0.994
Best parameters: {'C': 8.004579877520495, 'degree': 2, 'gamma': 0.7000000000000001, 'kernel': 'poly'}

Training Model 4/8: LogisticRegression
Best score: 0.481
Best parameters: {'C': 1.1575614261989053, 'max_iter': 227, 'penalty': 'l2', 'solver': 'lbfgs'}

Training Model 5/8: DecisionTreeClassifier


20 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Khaled Hesham\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Khaled Hesham\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users\Khaled Hesham\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
    raise ValueError

Best score: 0.762
Best parameters: {'criterion': 'entropy', 'max_depth': 16, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5, 'splitter': 'best'}

Training Model 6/8: KNeighborsClassifier
Best score: 0.712
Best parameters: {'algorithm': 'ball_tree', 'leaf_size': 13, 'n_neighbors': 5, 'weights': 'distance'}

Training Model 7/8: GaussianNB
Best score: 0.973
Best parameters: {'var_smoothing': 6.553132429912167e-08}

Training Model 8/8: MLPClassifier
Best score: 0.957
Best parameters: {'activation': 'relu', 'alpha': 0.008916689358333677, 'hidden_layer_sizes': (100,), 'max_iter': 503, 'solver': 'sgd'}



