<a href="https://colab.research.google.com/github/swedaa/DS-NLP/blob/main/Gesture_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Importing required libraries

In [None]:
# DATA PREPARATION

import os
import pandas as pd
import numpy as np

from scipy.stats import norm, kurtosis
from scipy.stats import skew

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)

# MODEL BUILDING

# Splitting data into train and test
from sklearn.model_selection import train_test_split

# Visualization
import matplotlib.pyplot as plt

# Accuracy score
from sklearn.metrics import accuracy_score

# DT and RF
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier

# Importing library for KNN
from sklearn.neighbors import KNeighborsClassifier

# Importing library for logistic regression
from sklearn.linear_model import LogisticRegression

# Importing library for SVM
from sklearn import svm
from sklearn.svm import SVC

# Importing libraries for Grid Search CV
from sklearn.model_selection import GridSearchCV

## Data Preparation

#### Defining a function to calculate the statistical features

In [None]:
# Defining root mean square-(RMS) value
def rms_value(arr, n):
  import math
  square = 0
  mean = 0.0
  root = 0.0

  # Calculate the square
  for i in range(0,n):
    square += (arr[i]**2)

  #Calculate mean
  mean = (square / (float)(n))

  #Calculate Root
  root = math.sqrt(mean)

  return root

In [None]:
def statistical_features(data):
  input_data = data.columns
  feature = []
  for i in input_data:
    mean = data[i].mean()
    feature.append(mean)
    median = data[i].median()
    feature.append(median)
    variance = data[i].var()
    feature.append(variance)
    std_deviation = data[i].std()
    feature.append(std_deviation)
    skewness = data[i].skew()
    feature.append(skewness)
    kurtosis = data[i].kurtosis()
    feature.append(kurtosis)
    rmsValue = rms_value(data[i],len(data[i]))
    feature.append(rmsValue)
  return feature

#### Sampling and Flattening the data

In [None]:
def sample_points(data):

  df = data.sample(n=10)
  col = data.columns
  val = []

  for i in col:
    q = df[i].tolist()
    val.append(q)

  val = flatten(val)

  return val

In [None]:
def flatten(value):
  result = []
  for sublist in value:
    for item in sublist:
      result.append(item)
  return result

### Creating a dataframe

In [None]:
df = pd.DataFrame(columns=['F'+str(x) for x in range(1,53)])
home_path = '/content/drive/MyDrive/Gesture Project /Dataset /gestures-dataset'
dir1 = os.listdir(home_path)
for root in dir1:
  path1 = home_path + '/' + root
  dir2 = os.listdir(path1)
  for dir in dir2:
    path2 = path1 + '/' + dir
    dir3 = os.listdir(path2)
    for file in dir3:
      path3 = path2 + '/' + file
      data = pd.read_csv(path3, names = ['T1','T2','T3','x','y','z'], sep=' ')
      data = data.drop(data.columns[[0,1,2]],axis=1)
      sample = []
      stats = statistical_features(data)
      point = sample_points(data)
      sample = stats+point+[dir]
      df_new = pd.DataFrame([sample], columns=['F'+str(x) for x in range (1,53)])
      df = pd.concat([df,df_new])

In [None]:
df

In [None]:
df.reset_index(drop=True)

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28,F29,F30,F31,F32,F33,F34,F35,F36,F37,F38,F39,F40,F41,F42,F43,F44,F45,F46,F47,F48,F49,F50,F51,F52
0,4.423208,1.379060,31.458192,5.608760,0.864222,-0.854975,6.994694,-1.716164,-0.919373,6.688860,2.586283,-0.450919,-0.508174,3.031197,8.693187,9.959879,29.353796,5.417914,-0.526808,0.121832,10.147333,0.766145,-0.153229,9.653421,1.379060,12.258313,-0.153229,0.000000,4.443638,1.991976,15.629349,0.153229,-0.919373,-3.677494,-2.298434,0.153229,-0.766145,-1.072602,-6.742072,1.225831,-4.903325,9.959879,10.419566,17.008410,1.838747,12.871228,9.653421,10.113108,-2.604892,1.379060,16.548723,14
1,5.342581,1.072602,42.295474,6.503497,0.929039,-0.576110,8.247360,-0.663992,-0.766145,3.158498,1.777216,-0.900687,0.578621,1.840874,8.478666,9.959879,28.744007,5.361344,0.134772,0.058926,9.935569,2.145205,0.000000,14.403518,-0.306458,0.919373,0.459687,6.282385,1.072602,10.266337,0.306458,-3.677494,0.766145,-4.596867,0.612916,0.459687,-0.766145,-0.766145,-1.225831,-1.072602,1.838747,-0.919373,10.266337,13.177687,6.895301,9.959879,9.959879,1.379060,5.363012,5.975927,2.145205,14
2,5.254850,1.532289,34.601631,5.882315,0.815900,-1.062613,7.757557,-3.019511,-2.145205,8.049706,2.837200,-0.525408,-1.138799,4.085785,8.283375,10.113108,27.222462,5.217515,-0.218214,0.969899,9.707494,10.266337,0.459687,2.145205,0.766145,16.855181,10.572795,1.225831,12.258313,-0.153229,0.766145,-8.274362,-0.919373,-6.895301,0.153229,-2.604892,-6.129157,-4.596867,-5.975927,-2.145205,-2.758120,10.419566,6.742072,7.201759,10.266337,15.935806,6.129157,-3.371036,3.983952,8.274362,10.419566,14
3,5.430049,1.532289,38.533798,6.207560,0.836362,-0.932119,8.100054,-1.551443,-1.072602,4.197671,2.048822,0.011910,-0.716398,2.518391,8.331822,10.036493,24.261344,4.925581,-0.789664,0.530319,9.600222,1.379060,0.766145,13.330915,11.645397,0.306458,0.612916,18.234240,1.072602,0.919373,9.346964,-0.306458,-2.451663,-4.750096,-3.217807,-0.306458,0.153229,-4.750096,-0.919373,-1.225831,-3.677494,10.879252,5.669470,10.419566,4.596867,10.572795,11.032481,15.169662,10.266337,9.806650,3.677494,14
4,3.614399,-0.306458,30.038746,5.480761,0.962918,-0.515235,6.429280,-1.027535,-0.459687,5.578530,2.361891,-0.695383,0.472503,2.511217,9.094586,9.959879,40.398550,6.355985,-0.207786,2.397879,10.987887,-0.459687,11.338940,-0.612916,-0.459687,15.476120,8.121132,-0.306458,-0.459687,6.129157,9.653421,-0.459687,-6.588843,1.991976,0.459687,-3.371036,-2.145205,0.306458,-0.459687,-0.919373,-3.217807,8.887277,2.451663,9.959879,9.959879,23.750481,14.556746,9.959879,7.048530,3.677494,15.169662,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3246,-3.938885,-6.129157,28.464783,5.335240,0.515652,-1.515724,6.504244,-0.333498,0.000000,18.126554,4.257529,-0.022102,1.650570,4.143852,7.382028,5.975927,20.321848,4.507976,1.902445,3.845342,8.580255,-3.371036,-9.346964,2.758120,3.217807,-7.508216,4.596867,2.145205,-0.306458,-6.129157,-8.274362,8.734048,-3.064578,-4.443638,-2.758120,1.225831,-1.225831,-3.217807,6.435614,0.000000,0.306458,7.201759,4.903325,15.935806,9.040505,5.363012,20.226215,2.758120,1.991976,6.129157,5.516241,09
3247,-3.200782,-6.282386,34.455962,5.869920,0.524949,-1.649808,6.541158,-0.570352,-0.153229,6.965849,2.639290,0.114860,0.644103,2.627577,7.295399,5.975927,16.797606,4.098488,2.217426,4.797241,8.311874,6.282385,-4.903325,-8.427590,-0.459687,4.290410,-7.967903,2.604892,3.983952,-9.346964,-8.274362,-0.306458,-0.459687,0.153229,2.911349,-3.524265,0.766145,-1.379060,-3.217807,0.612916,0.000000,19.613300,5.669470,5.975927,3.677494,8.887277,5.975927,3.830723,8.427590,5.669470,5.209783,09
3248,-1.889823,-4.750096,28.583732,5.346376,0.439050,-1.693516,5.528759,-0.689530,-0.459686,14.958258,3.867591,0.514198,0.252048,3.821347,8.248823,7.738060,16.712285,4.088066,1.907963,5.819150,9.155704,-6.895301,3.983952,1.225831,4.903325,-6.895301,-6.282385,-6.588843,4.903325,-3.371036,3.983952,1.225831,-3.677494,3.371036,-1.685518,0.306458,1.072602,1.225831,-5.056554,-1.225831,-4.443638,6.895301,7.967903,2.145205,14.403518,7.048530,7.508216,8.121132,2.758120,8.121132,8.274362,09
3249,-2.136692,-5.746085,30.416405,5.515107,0.487121,-1.653827,5.769927,-0.468199,-0.383072,14.431282,3.798853,0.615253,0.743042,3.721392,8.385026,7.661446,14.233320,3.772707,1.679970,6.413624,9.151571,4.290410,4.443638,1.685518,-1.532289,-7.508216,4.750096,-6.435614,-6.895301,2.604892,-5.669470,-1.838747,-2.604892,-1.072602,8.274362,-5.975927,-2.758120,0.919373,1.225831,6.435614,0.612916,20.379444,8.734048,0.919373,9.653421,7.354988,8.427590,7.661446,7.661446,5.975927,8.887277,09


### Model Building

#### Splitting the data into train and test

In [None]:
#Splitting data as test and train
#Creating a list
data_columns_list = list(df.columns)

#Separating the dependent variable
features = list(set(data_columns_list)-set(['F52']))

#Setting x and y
x= df[features].values
y= df['F52'].values

#Splitting train and test with train 70% and test 30%
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.30, random_state=12)

In [None]:
# Standardising the data
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
train_x = scalar.fit_transform(train_x)
test_x = scalar.transform(test_x)

#### Random forest

In [None]:
# Building Random Forest Model
rf = RandomForestClassifier(n_estimators=100, random_state=0)

# Fitting the model
rf.fit(train_x, train_y)

# Predicting the model on test data
pred_rf = rf.predict(test_x)

# Accuracy score
print(accuracy_score(test_y, pred_rf))

0.8340163934426229


### Support Vector Machine

In [None]:
# Building the model
Model_Svm = svm.SVC(kernel='rbf', C=1, gamma='scale',random_state=12)

# Fitting the model
Model_Svm.fit(train_x,train_y)

# Predicting the model on test data and calculating the accuracy
Model_Svm.score(test_x,test_y)

0.6670081967213115

### k-Nearest Neighbors

In [None]:
# Storing the K nearest classifier
knn_classifier = KNeighborsClassifier(n_neighbors = 3)

# Fitting the model
knn_classifier.fit(train_x,train_y)

# Predicting the model on test data
pred_knn = knn_classifier.predict(test_x)

# Accuracy
print(accuracy_score(test_y, pred_knn))

0.46209016393442626


#### Hyper parameter tuning and Choosing the best model

In [None]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='scale'),
        'params': {
            'C':[10,20],
            'kernel':['rbf','linear'],
        }
    },
    'random_forest':{
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators':[200,300,400]
        },
        'knn':{
            'model': KNeighborsClassifier(),
            'params': {
                'n_neighbors':[3,5,7]
            }
        }
    }
}

In [None]:
scores_grid_1 = []

for model_name, mp in model_params.items():
  clf_1 = GridSearchCV(mp['model'], mp['params'],cv=5, return_train_score=False)
  clf_1.fit(train_x,train_y)
  scores_grid_1.append({
      'model':model_name,
      'best_score': clf_1.best_score_,
      'best_params':clf_1.best_params_
  })


In [None]:
df_grid_1 = pd.DataFrame(scores_grid_1,columns=['model','best_score','best_params'])
df_grid_1

Unnamed: 0,model,best_score,best_params
0,svm,0.694945,"{'C': 10, 'kernel': 'rbf'}"
1,random_forest,0.826813,{'n_estimators': 200}


### Without sampling

In [None]:
df_stat = pd.DataFrame(columns=['F'+str(x) for x in range(1,23)])
home_path = '/content/drive/MyDrive/Gesture Project /Dataset /gestures-dataset'
dir1 = os.listdir(home_path)
for root in dir1:
  path1 = home_path + '/' + root
  dir2 = os.listdir(path1)
  for dir in dir2:
    path2 = path1 + '/' + dir
    dir3 = os.listdir(path2)
    for file in dir3:
      path3 = path2 + '/' + file
      data = pd.read_csv(path3, names = ['T1','T2','T3','x','y','z'], sep=' ')
      data = data.drop(data.columns[[0,1,2]],axis=1)
      sample = []
      stats = statistical_features(data)
      sample = stats+[dir]
      df_new_stat = pd.DataFrame([sample], columns=['F'+str(x) for x in range (1,23)])
      df_stat = pd.concat([df_stat,df_new_stat])

In [None]:
df_stat.reset_index(drop=True)

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22
0,4.423208,1.379060,31.458192,5.608760,0.864222,-0.854975,6.994694,-1.716164,-0.919373,6.688860,2.586283,-0.450919,-0.508174,3.031197,8.693187,9.959879,29.353796,5.417914,-0.526808,0.121832,10.147333,14
1,5.342581,1.072602,42.295474,6.503497,0.929039,-0.576110,8.247360,-0.663992,-0.766145,3.158498,1.777216,-0.900687,0.578621,1.840874,8.478666,9.959879,28.744007,5.361344,0.134772,0.058926,9.935569,14
2,5.254850,1.532289,34.601631,5.882315,0.815900,-1.062613,7.757557,-3.019511,-2.145205,8.049706,2.837200,-0.525408,-1.138799,4.085785,8.283375,10.113108,27.222462,5.217515,-0.218214,0.969899,9.707494,14
3,5.430049,1.532289,38.533798,6.207560,0.836362,-0.932119,8.100054,-1.551443,-1.072602,4.197671,2.048822,0.011910,-0.716398,2.518391,8.331822,10.036493,24.261344,4.925581,-0.789664,0.530319,9.600222,14
4,3.614399,-0.306458,30.038746,5.480761,0.962918,-0.515235,6.429280,-1.027535,-0.459687,5.578530,2.361891,-0.695383,0.472503,2.511217,9.094586,9.959879,40.398550,6.355985,-0.207786,2.397879,10.987887,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3246,-3.938885,-6.129157,28.464783,5.335240,0.515652,-1.515724,6.504244,-0.333498,0.000000,18.126554,4.257529,-0.022102,1.650570,4.143852,7.382028,5.975927,20.321848,4.507976,1.902445,3.845342,8.580255,09
3247,-3.200782,-6.282386,34.455962,5.869920,0.524949,-1.649808,6.541158,-0.570352,-0.153229,6.965849,2.639290,0.114860,0.644103,2.627577,7.295399,5.975927,16.797606,4.098488,2.217426,4.797241,8.311874,09
3248,-1.889823,-4.750096,28.583732,5.346376,0.439050,-1.693516,5.528759,-0.689530,-0.459686,14.958258,3.867591,0.514198,0.252048,3.821347,8.248823,7.738060,16.712285,4.088066,1.907963,5.819150,9.155704,09
3249,-2.136692,-5.746085,30.416405,5.515107,0.487121,-1.653827,5.769927,-0.468199,-0.383072,14.431282,3.798853,0.615253,0.743042,3.721392,8.385026,7.661446,14.233320,3.772707,1.679970,6.413624,9.151571,09


In [None]:
#Splitting data as test and train
#Creating a list
data_columns = list(df_stat.columns)

In [None]:
#Separating the dependent variable
features_1 = list(set(data_columns)-set(['F22']))

In [None]:
#Setting x and y
X= df_stat[features_1].values
Y= df_stat['F22'].values

In [None]:
#Splitting train and test with train 70% and test 30%
Train_X, Test_X, Train_Y, Test_Y = train_test_split(X, Y, test_size=0.30, random_state=12)

In [None]:
# Standardising the data
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
Train_X = scalar.fit_transform(Train_X)
Test_X = scalar.transform(Test_X)

### Model Building

#### Random forest

In [None]:
# Building Random Forest Model
rf_1 = RandomForestClassifier(n_estimators=100, random_state=15)

# Fitting the model
rf_1.fit(Train_X, Train_Y)

# Predicting the model on test data
rf_pred_1 = rf_1.predict(Test_X)

# Accuracy score
print(accuracy_score(Test_Y, rf_pred_1))

0.8493852459016393


#### SVM

In [None]:
# Building the model
Model_Svm = svm.SVC(kernel='rbf', C=1, gamma='scale',random_state=12)

# Fitting the model
Model_Svm.fit(Train_X,Train_Y)

# Predicting the model on test data and calculating the accuracy
Model_Svm.score(Test_X,Test_Y)

0.826844262295082

#### KNN

In [None]:
# Storing the K nearest classifier
knn_1 = KNeighborsClassifier(n_neighbors = 3)

# Fitting the model
knn_1.fit(Train_X,Train_Y)

# Predicting the model on test data
knn_pred_1 = knn_1.predict(Test_X)

# Accuracy
print(accuracy_score(Test_Y, knn_pred_1))

0.8473360655737705


#### Hyper parameter tuning and choosing the best model

In [None]:
Scores_Grid_2 = []

for model_name, mp in model_params.items():
  Clf_1 = GridSearchCV(mp['model'], mp['params'],cv=5, return_train_score=False)
  Clf_1.fit(Train_X,Train_Y)
  Scores_Grid_2.append({
      'model':model_name,
      'best_score': Clf_1.best_score_,
      'best_params':Clf_1.best_params_
  })

In [None]:
DF_Grid_2 = pd.DataFrame(Scores_Grid_2,columns=['model','best_score','best_params'])
DF_Grid_2

Unnamed: 0,model,best_score,best_params
0,svm,0.865934,"{'C': 20, 'kernel': 'rbf'}"
1,random_forest,0.854945,{'n_estimators': 400}
