In [43]:
#standard libraries
import pandas as pd, numpy as np

import glob
import os
import time

#visualization libraries
import seaborn as sns
import matplotlib as plt
from matplotlib import pyplot as plt

#encoding
from sklearn.preprocessing import LabelEncoder

#model learning libraries
from sklearn import linear_model
from sklearn import ensemble
from sklearn import svm
from sklearn import metrics
from sklearn import preprocessing
from sklearn import model_selection
from sklearn import feature_selection
from sklearn.metrics import classification_report

#hyperparameters
from sklearn.model_selection import cross_val_score
import optuna
from optuna import Trial, study, samplers

#pre-build functions
from pre_build_functions import *

### **Data**

#### Meta File

In [44]:
data_meta = pd.read_csv('./additional_data/meta_information.csv', index_col=0)
data_meta.head(3)

Unnamed: 0,montage,pilote_id,last_train_idx,len(train),len(test)
0,2023-05-15_16-16-08.palm,1,23337,23337,5810
1,2023-05-15_17-12-24.palm,1,23336,23336,5803
2,2023-06-05_16-12-38.palm,1,17939,17939,4431


##### Palm File 1

In [45]:
#assign the value of the generator
length_of_file_number(length=len(data_meta['montage']))
n = length_of_file_number(length=len(data_meta['montage']))

In [46]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [47]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [48]:
#encode the features
le = LabelEncoder()

In [49]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [50]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [51]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [52]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [53]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.602; 0.68\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [54]:
#select the features
X = gestures[gestures.columns].values

In [55]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((21858, 65), (21858,)) 
 test: ((7287, 65), (7287,))


### **Model Learning**

In [56]:
from collections import OrderedDict

#create an ordered dict to store model results
review = OrderedDict([
    ('palm_file', []),
    ('model_name', []),
    ('learning_time', []),
    ('hyperparameters', []),
    ('F1-Score, train', []), 
    ('F1-Score, test', []),
    ('F1-Score: Neutral', []),
    ('F1-Score: Open', []),
    ('F1-Score: Pistol', []),
    ('F1-Score: Thumb', []),
    ('F1-Score: OK', []),
    ('F1-Score: Grab', [])
    ]
)

### Random Forest with Optuna

In [57]:
#use the Optuna method to look for best hyper parameters
def optuna_rf(trial):
  #set hyperparameters
  n_estimators = trial.suggest_categorical('n_estimators', [100, 125, 150, 175, 200])
  max_depth = trial.suggest_categorical('max_depth', [10, 15, 20])
  min_samples_leaf = trial.suggest_categorical('min_samples_leaf', [3, 4, 5])
  criterion = trial.suggest_categorical('criterion', ['gini', 'entropy'])
  max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2', None])
  
  #use the combinations for model build
  model = ensemble.RandomForestClassifier(n_estimators=n_estimators,
                                          max_depth=max_depth,
                                          min_samples_leaf=min_samples_leaf,
                                          criterion=criterion,
                                          max_features=max_features,
                                          random_state=42)
  
  #model learning through cross-validation
  score = model_selection.cross_val_score(
    model,
    X=X_train,
    y=y_train,
    cv=5,
    scoring='f1_micro', 
    n_jobs=-1).mean()

  return score

In [58]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:13:36,111] A new study created in memory with name: RandomForest
[I 2024-10-08 00:14:30,563] Trial 0 finished with value: 0.972733214400912 and parameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': None}. Best is trial 0 with value: 0.972733214400912.
[I 2024-10-08 00:14:36,690] Trial 1 finished with value: 0.8205698667274515 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.972733214400912.
[I 2024-10-08 00:14:42,732] Trial 2 finished with value: 0.9792754499578546 and parameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 2 with value: 0.9792754499578546.
[I 2024-10-08 00:14:45,393] Trial 3 finished with value: 0.7881790341104965 and parameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'gini'

CPU times: user 94.2 ms, sys: 115 ms, total: 210 ms
Wall time: 1min 13s


In [59]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': 'sqrt'}


In [60]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [61]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.947
accuracy: 0.947

test data:
f1_score: 0.987
accuracy: 0.987


In [62]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.96      0.98      0.97     14173
        Open       0.95      0.83      0.89      1538
      Pistol       0.91      0.84      0.88      1535
       Thumb       0.89      0.95      0.92      1537
          OK       0.93      0.91      0.92      1540
        Grab       0.92      0.85      0.88      1535

    accuracy                           0.95     21858
   macro avg       0.93      0.90      0.91     21858
weighted avg       0.95      0.95      0.95     21858

              precision    recall  f1-score   support

     Neutral       0.99      0.99      0.99      4725
        Open       0.99      0.97      0.98       513
      Pistol       0.99      0.98      0.98       511
       Thumb       0.96      0.98      0.97       512
          OK       0.99      0.98      0.98       514
        Grab       0.99      0.97      0.98       512

    accuracy                           0.99      7287
   macro avg       0.98

In [63]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [64]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 2

In [65]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [66]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [67]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [68]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [69]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [70]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [71]:
#display
summary

['Оптимальные свдиги для концевых выборок:   20 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.62; 0.683\n',
 'Размер оптимального сдвига (как среднего): 10']

### **Train-Test Split**

In [72]:
#select the features
X = gestures[gestures.columns].values

In [73]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((21852, 65), (21852,)) 
 test: ((7285, 65), (7285,))


### **Model Learning**

### Random Forest with Optuna

In [74]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:15:05,320] A new study created in memory with name: RandomForest
[I 2024-10-08 00:16:00,660] Trial 0 finished with value: 0.881291683746683 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.881291683746683.
[I 2024-10-08 00:16:46,784] Trial 1 finished with value: 0.785374281395949 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': None}. Best is trial 0 with value: 0.881291683746683.
[I 2024-10-08 00:17:25,476] Trial 2 finished with value: 0.8558480038238295 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': None}. Best is trial 0 with value: 0.881291683746683.
[I 2024-10-08 00:17:28,103] Trial 3 finished with value: 0.729177819066481 and parameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_fea

CPU times: user 131 ms, sys: 88.6 ms, total: 219 ms
Wall time: 2min 27s


In [75]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}


In [76]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [77]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.827
accuracy: 0.827

test data:
f1_score: 0.966
accuracy: 0.966


In [78]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.85      0.92      0.88     14146
        Open       0.89      0.70      0.79      1540
      Pistol       0.69      0.60      0.64      1540
       Thumb       0.67      0.68      0.67      1541
          OK       0.82      0.66      0.73      1542
        Grab       0.86      0.62      0.72      1543

    accuracy                           0.83     21852
   macro avg       0.79      0.70      0.74     21852
weighted avg       0.83      0.83      0.82     21852

              precision    recall  f1-score   support

     Neutral       0.95      1.00      0.98      4716
        Open       1.00      0.97      0.98       514
      Pistol       1.00      0.90      0.94       513
       Thumb       0.97      0.86      0.91       514
          OK       1.00      0.89      0.94       514
        Grab       0.99      0.92      0.96       514

    accuracy                           0.97      7285
   macro avg       0.98

In [79]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [80]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 3

In [81]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [82]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [83]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [84]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [85]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [86]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [87]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.61; 0.58\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [88]:
#select the features
X = gestures[gestures.columns].values

In [89]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((16776, 65), (16776,)) 
 test: ((5592, 65), (5592,))


### **Model Learning**

### Random Forest with Optuna

In [90]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:19:09,750] A new study created in memory with name: RandomForest
[I 2024-10-08 00:19:14,889] Trial 0 finished with value: 0.9617907557965004 and parameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9617907557965004.
[I 2024-10-08 00:19:17,735] Trial 1 finished with value: 0.7872565629723839 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.9617907557965004.
[I 2024-10-08 00:20:10,656] Trial 2 finished with value: 0.9076063158006924 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': None}. Best is trial 0 with value: 0.9617907557965004.
[I 2024-10-08 00:20:15,937] Trial 3 finished with value: 0.8586667471921189 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'entro

CPU times: user 107 ms, sys: 57.8 ms, total: 165 ms
Wall time: 1min 12s


In [91]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}


In [92]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [93]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.929
accuracy: 0.929

test data:
f1_score: 0.983
accuracy: 0.983


In [94]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.95      0.97      0.96      9970
        Open       0.93      0.89      0.91      1363
      Pistol       0.84      0.86      0.85      1355
       Thumb       0.88      0.94      0.91      1362
          OK       0.91      0.86      0.89      1363
        Grab       0.94      0.82      0.87      1363

    accuracy                           0.93     16776
   macro avg       0.91      0.89      0.90     16776
weighted avg       0.93      0.93      0.93     16776

              precision    recall  f1-score   support

     Neutral       0.98      0.99      0.99      3324
        Open       0.98      0.99      0.98       455
      Pistol       0.99      0.96      0.98       451
       Thumb       0.97      0.97      0.97       454
          OK       0.98      0.97      0.98       454
        Grab       0.99      0.96      0.97       454

    accuracy                           0.98      5592
   macro avg       0.98

In [95]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [96]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 4

In [97]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [98]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [99]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [100]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [101]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [102]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [103]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.574; 0.576\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [104]:
#select the features
X = gestures[gestures.columns].values

In [105]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((16653, 65), (16653,)) 
 test: ((5551, 65), (5551,))


### **Model Learning**

### Random Forest with Optuna

In [106]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:20:37,250] A new study created in memory with name: RandomForest
[I 2024-10-08 00:20:41,720] Trial 0 finished with value: 0.8574418489338935 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.8574418489338935.
[I 2024-10-08 00:20:44,730] Trial 1 finished with value: 0.7618446426011722 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.8574418489338935.
[I 2024-10-08 00:21:23,437] Trial 2 finished with value: 0.9535817414532515 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 2 with value: 0.9535817414532515.
[I 2024-10-08 00:21:26,586] Trial 3 finished with value: 0.8145083720766699 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'en

CPU times: user 134 ms, sys: 56.3 ms, total: 191 ms
Wall time: 53.2 s


In [107]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': 'sqrt'}


In [108]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [109]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.919
accuracy: 0.919

test data:
f1_score: 0.974
accuracy: 0.974


In [110]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.94      0.97      0.95      9797
        Open       0.94      0.81      0.87      1369
      Pistol       0.89      0.82      0.86      1379
       Thumb       0.85      0.92      0.88      1367
          OK       0.88      0.90      0.89      1366
        Grab       0.92      0.80      0.85      1375

    accuracy                           0.92     16653
   macro avg       0.90      0.87      0.88     16653
weighted avg       0.92      0.92      0.92     16653

              precision    recall  f1-score   support

     Neutral       0.98      0.98      0.98      3265
        Open       0.99      0.95      0.97       456
      Pistol       0.97      0.96      0.96       460
       Thumb       0.96      0.98      0.97       456
          OK       0.93      0.98      0.95       456
        Grab       0.98      0.95      0.96       458

    accuracy                           0.97      5551
   macro avg       0.97

In [111]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [112]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 5

In [113]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [114]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [115]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [116]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [117]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [118]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [119]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.612; 0.588\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [120]:
#select the features
X = gestures[gestures.columns].values

In [121]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((16781, 65), (16781,)) 
 test: ((5594, 65), (5594,))


### **Model Learning**

### Random Forest with Optuna

In [122]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:21:42,851] A new study created in memory with name: RandomForest
[I 2024-10-08 00:22:36,281] Trial 0 finished with value: 0.9221141279513784 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': None}. Best is trial 0 with value: 0.9221141279513784.
[I 2024-10-08 00:22:41,703] Trial 1 finished with value: 0.8767653060173839 and parameters: {'n_estimators': 175, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9221141279513784.
[I 2024-10-08 00:22:47,986] Trial 2 finished with value: 0.945771612729596 and parameters: {'n_estimators': 175, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 2 with value: 0.945771612729596.
[I 2024-10-08 00:23:25,300] Trial 3 finished with value: 0.9428518247498777 and parameters: {'n_estimators': 150, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'entropy

CPU times: user 133 ms, sys: 79.5 ms, total: 212 ms
Wall time: 1min 46s


In [123]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 175, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}


In [124]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [125]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.920
accuracy: 0.920

test data:
f1_score: 0.971
accuracy: 0.971


In [126]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.95      0.97      0.96      9937
        Open       0.95      0.79      0.86      1376
      Pistol       0.83      0.86      0.84      1367
       Thumb       0.90      0.92      0.91      1367
          OK       0.84      0.87      0.86      1363
        Grab       0.90      0.80      0.85      1371

    accuracy                           0.92     16781
   macro avg       0.89      0.87      0.88     16781
weighted avg       0.92      0.92      0.92     16781

              precision    recall  f1-score   support

     Neutral       0.97      0.98      0.98      3313
        Open       0.99      0.95      0.97       459
      Pistol       0.94      0.95      0.95       456
       Thumb       0.96      0.96      0.96       455
          OK       0.96      0.94      0.95       454
        Grab       0.98      0.95      0.96       457

    accuracy                           0.97      5594
   macro avg       0.97

In [127]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [128]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 6

In [129]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [130]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [131]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [132]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [133]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [134]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [135]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.602; 0.578\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [136]:
#select the features
X = gestures[gestures.columns].values

In [137]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((16770, 65), (16770,)) 
 test: ((5591, 65), (5591,))


### **Model Learning**

### Random Forest with Optuna

In [138]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:23:46,486] A new study created in memory with name: RandomForest
[I 2024-10-08 00:23:52,178] Trial 0 finished with value: 0.9587954680977937 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.9587954680977937.
[I 2024-10-08 00:24:36,558] Trial 1 finished with value: 0.95855694692904 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.9587954680977937.
[I 2024-10-08 00:25:36,274] Trial 2 finished with value: 0.9547406082289804 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': None}. Best is trial 0 with value: 0.9587954680977937.
[I 2024-10-08 00:25:40,537] Trial 3 finished with value: 0.9236135957066189 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entrop

CPU times: user 133 ms, sys: 80.9 ms, total: 214 ms
Wall time: 1min 59s


In [139]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'log2'}


In [140]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [141]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.932
accuracy: 0.932

test data:
f1_score: 0.985
accuracy: 0.985


In [142]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.94      0.98      0.96      9881
        Open       0.96      0.85      0.90      1385
      Pistol       0.92      0.85      0.88      1378
       Thumb       0.90      0.93      0.92      1381
          OK       0.93      0.89      0.91      1375
        Grab       0.93      0.83      0.87      1370

    accuracy                           0.93     16770
   macro avg       0.93      0.89      0.91     16770
weighted avg       0.93      0.93      0.93     16770

              precision    recall  f1-score   support

     Neutral       0.98      0.99      0.99      3294
        Open       1.00      0.98      0.99       462
      Pistol       0.99      0.97      0.98       459
       Thumb       0.99      0.96      0.97       461
          OK       0.99      0.97      0.98       458
        Grab       0.98      0.98      0.98       457

    accuracy                           0.98      5591
   macro avg       0.99

In [143]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [144]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 7

In [145]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [146]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [147]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [148]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [149]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [150]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [151]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.57; 0.594\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [152]:
#select the features
X = gestures[gestures.columns].values

In [153]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((16650, 65), (16650,)) 
 test: ((5550, 65), (5550,))


### **Model Learning**

### Random Forest with Optuna

In [154]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:26:00,159] A new study created in memory with name: RandomForest
[I 2024-10-08 00:26:33,377] Trial 0 finished with value: 0.9448048048048048 and parameters: {'n_estimators': 150, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.9448048048048048.
[I 2024-10-08 00:26:40,610] Trial 1 finished with value: 0.9606006006006007 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9606006006006007.
[I 2024-10-08 00:27:08,650] Trial 2 finished with value: 0.8701501501501502 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 1 with value: 0.9606006006006007.
[I 2024-10-08 00:27:12,094] Trial 3 finished with value: 0.8183183183183182 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'entr

CPU times: user 122 ms, sys: 73.6 ms, total: 196 ms
Wall time: 1min 52s


In [155]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'sqrt'}


In [156]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [157]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.927
accuracy: 0.927

test data:
f1_score: 0.979
accuracy: 0.979


In [158]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.95      0.97      0.96      9780
        Open       0.94      0.82      0.88      1377
      Pistol       0.89      0.84      0.87      1370
       Thumb       0.85      0.91      0.88      1366
          OK       0.89      0.90      0.89      1379
        Grab       0.92      0.83      0.87      1378

    accuracy                           0.93     16650
   macro avg       0.91      0.88      0.89     16650
weighted avg       0.93      0.93      0.93     16650

              precision    recall  f1-score   support

     Neutral       0.98      0.99      0.98      3260
        Open       0.99      0.97      0.98       459
      Pistol       0.98      0.96      0.97       457
       Thumb       0.97      0.97      0.97       455
          OK       0.98      0.96      0.97       460
        Grab       0.98      0.97      0.97       459

    accuracy                           0.98      5550
   macro avg       0.98

In [159]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [160]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 8

In [161]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [162]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [163]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [164]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [165]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [166]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [167]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.599; 0.58\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [168]:
#select the features
X = gestures[gestures.columns].values

In [169]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8400, 65), (8400,)) 
 test: ((2800, 65), (2800,))


### **Model Learning**

### Random Forest with Optuna

In [170]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:28:10,372] A new study created in memory with name: RandomForest
[I 2024-10-08 00:28:12,785] Trial 0 finished with value: 0.9620238095238095 and parameters: {'n_estimators': 125, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9620238095238095.
[I 2024-10-08 00:28:16,717] Trial 1 finished with value: 0.968095238095238 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.968095238095238.
[I 2024-10-08 00:28:31,614] Trial 2 finished with value: 0.9564285714285715 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 1 with value: 0.968095238095238.
[I 2024-10-08 00:28:49,262] Trial 3 finished with value: 0.9577380952380953 and parameters: {'n_estimators': 175, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'entropy'

CPU times: user 82 ms, sys: 47.6 ms, total: 130 ms
Wall time: 57 s


In [171]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}


In [172]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [173]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.938
accuracy: 0.938

test data:
f1_score: 0.986
accuracy: 0.986


In [174]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.95      0.98      0.96      4966
        Open       0.92      0.94      0.93       688
      Pistol       0.93      0.83      0.88       687
       Thumb       0.92      0.89      0.90       685
          OK       0.94      0.85      0.89       688
        Grab       0.91      0.91      0.91       686

    accuracy                           0.94      8400
   macro avg       0.93      0.90      0.91      8400
weighted avg       0.94      0.94      0.94      8400

              precision    recall  f1-score   support

     Neutral       0.98      1.00      0.99      1655
        Open       0.99      0.98      0.98       229
      Pistol       1.00      0.94      0.97       229
       Thumb       0.99      0.97      0.98       229
          OK       1.00      0.98      0.99       229
        Grab       1.00      0.98      0.99       229

    accuracy                           0.99      2800
   macro avg       0.99

In [175]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [176]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 9

In [177]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [178]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [179]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [180]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [181]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [182]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [183]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.602; 0.579\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [184]:
#select the features
X = gestures[gestures.columns].values

In [185]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8402, 65), (8402,)) 
 test: ((2801, 65), (2801,))


### **Model Learning**

### Random Forest with Optuna

In [186]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:29:19,810] A new study created in memory with name: RandomForest
[I 2024-10-08 00:29:21,441] Trial 0 finished with value: 0.9538210673918586 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.9538210673918586.
[I 2024-10-08 00:29:23,974] Trial 1 finished with value: 0.9682215801252088 and parameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9682215801252088.
[I 2024-10-08 00:29:34,834] Trial 2 finished with value: 0.9634611625732982 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 1 with value: 0.9682215801252088.
[I 2024-10-08 00:29:36,941] Trial 3 finished with value: 0.9646509305685391 and parameters: {'n_estimators': 125, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'entro

CPU times: user 58.6 ms, sys: 32.6 ms, total: 91.3 ms
Wall time: 19 s


In [187]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'sqrt'}


In [188]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [189]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.931
accuracy: 0.931

test data:
f1_score: 0.979
accuracy: 0.979


In [190]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.92      0.98      0.95      4971
        Open       0.93      0.91      0.92       685
      Pistol       0.96      0.76      0.85       686
       Thumb       0.95      0.90      0.93       688
          OK       0.94      0.84      0.88       686
        Grab       0.96      0.88      0.92       686

    accuracy                           0.93      8402
   macro avg       0.94      0.88      0.91      8402
weighted avg       0.93      0.93      0.93      8402

              precision    recall  f1-score   support

     Neutral       0.97      0.99      0.98      1657
        Open       0.98      0.96      0.97       229
      Pistol       0.99      0.93      0.96       228
       Thumb       1.00      0.97      0.98       230
          OK       0.99      0.96      0.98       228
        Grab       0.98      0.96      0.97       229

    accuracy                           0.98      2801
   macro avg       0.99

In [191]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [192]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 10

In [193]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [194]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [195]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [196]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [197]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [198]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [199]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.601; 0.578\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [200]:
#select the features
X = gestures[gestures.columns].values

In [201]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8396, 65), (8396,)) 
 test: ((2799, 65), (2799,))


### **Model Learning**

### Random Forest with Optuna

In [202]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:29:48,679] A new study created in memory with name: RandomForest
[I 2024-10-08 00:30:10,034] Trial 0 finished with value: 0.9655794265293969 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.9655794265293969.
[I 2024-10-08 00:30:12,620] Trial 1 finished with value: 0.9610532771774583 and parameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.9655794265293969.
[I 2024-10-08 00:30:33,117] Trial 2 finished with value: 0.9642693354888113 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.9655794265293969.
[I 2024-10-08 00:30:51,011] Trial 3 finished with value: 0.9590282622876428 and parameters: {'n_estimators': 150, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'gini

CPU times: user 95.1 ms, sys: 51.3 ms, total: 146 ms
Wall time: 1min 4s


In [203]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'sqrt'}


In [204]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [205]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.929
accuracy: 0.929

test data:
f1_score: 0.982
accuracy: 0.982


In [206]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.91      0.98      0.95      4965
        Open       0.97      0.85      0.91       685
      Pistol       0.96      0.82      0.88       688
       Thumb       0.94      0.86      0.90       687
          OK       0.95      0.88      0.91       686
        Grab       0.96      0.87      0.91       685

    accuracy                           0.93      8396
   macro avg       0.95      0.88      0.91      8396
weighted avg       0.93      0.93      0.93      8396

              precision    recall  f1-score   support

     Neutral       0.97      1.00      0.99      1655
        Open       0.99      0.97      0.98       229
      Pistol       1.00      0.94      0.97       229
       Thumb       1.00      0.97      0.98       229
          OK       1.00      0.97      0.98       229
        Grab       0.99      0.97      0.98       228

    accuracy                           0.98      2799
   macro avg       0.99

In [207]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [208]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 11

In [209]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [210]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [211]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [212]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [213]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [214]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [215]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.599; 0.583\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [216]:
#select the features
X = gestures[gestures.columns].values

In [217]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((4288, 65), (4288,)) 
 test: ((1430, 65), (1430,))


### **Model Learning**

### Random Forest with Optuna

In [218]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:31:01,266] A new study created in memory with name: RandomForest
[I 2024-10-08 00:31:03,008] Trial 0 finished with value: 0.9542916826464085 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9542916826464085.
[I 2024-10-08 00:31:04,529] Trial 1 finished with value: 0.9158119204793651 and parameters: {'n_estimators': 175, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.9542916826464085.
[I 2024-10-08 00:31:05,292] Trial 2 finished with value: 0.8526123817839103 and parameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.9542916826464085.
[I 2024-10-08 00:31:06,706] Trial 3 finished with value: 0.947994440409843 and parameters: {'n_estimators': 125, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'e

CPU times: user 39.5 ms, sys: 18.4 ms, total: 57.9 ms
Wall time: 6.93 s


In [219]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'sqrt'}


In [220]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [221]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.910
accuracy: 0.910

test data:
f1_score: 0.988
accuracy: 0.988


In [222]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.88      0.98      0.93      2568
        Open       0.96      0.81      0.87       342
      Pistol       0.97      0.84      0.90       344
       Thumb       0.98      0.72      0.83       345
          OK       0.96      0.86      0.91       343
        Grab       0.96      0.77      0.86       346

    accuracy                           0.91      4288
   macro avg       0.95      0.83      0.88      4288
weighted avg       0.92      0.91      0.91      4288

              precision    recall  f1-score   support

     Neutral       0.98      1.00      0.99       857
        Open       1.00      0.97      0.99       114
      Pistol       1.00      0.97      0.99       115
       Thumb       1.00      0.97      0.98       115
          OK       0.99      0.97      0.98       114
        Grab       1.00      0.97      0.99       115

    accuracy                           0.99      1430
   macro avg       1.00

In [223]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [224]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 12

In [225]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [226]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [227]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [228]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [229]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [230]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [231]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.604; 0.581\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [232]:
#select the features
X = gestures[gestures.columns].values

In [233]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((4275, 65), (4275,)) 
 test: ((1426, 65), (1426,))


### **Model Learning**

### Random Forest with Optuna

In [234]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:31:15,831] A new study created in memory with name: RandomForest
[I 2024-10-08 00:31:17,900] Trial 0 finished with value: 0.9604678362573098 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9604678362573098.
[I 2024-10-08 00:31:18,988] Trial 1 finished with value: 0.888421052631579 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.9604678362573098.
[I 2024-10-08 00:31:20,634] Trial 2 finished with value: 0.9574269005847954 and parameters: {'n_estimators': 175, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.9604678362573098.
[I 2024-10-08 00:31:30,876] Trial 3 finished with value: 0.9560233918128656 and parameters: {'n_estimators': 175, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'entr

CPU times: user 52 ms, sys: 25.6 ms, total: 77.6 ms
Wall time: 21.9 s


In [235]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'sqrt'}


In [236]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [237]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.928
accuracy: 0.928

test data:
f1_score: 0.989
accuracy: 0.989


In [238]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.91      0.98      0.95      2555
        Open       0.97      0.83      0.89       345
      Pistol       0.93      0.84      0.88       343
       Thumb       0.97      0.85      0.91       344
          OK       0.96      0.89      0.92       343
        Grab       0.94      0.86      0.90       345

    accuracy                           0.93      4275
   macro avg       0.95      0.87      0.91      4275
weighted avg       0.93      0.93      0.93      4275

              precision    recall  f1-score   support

     Neutral       0.99      0.99      0.99       852
        Open       0.99      0.99      0.99       115
      Pistol       0.98      0.99      0.99       114
       Thumb       0.97      0.99      0.98       115
          OK       0.98      0.97      0.98       115
        Grab       0.99      0.99      0.99       115

    accuracy                           0.99      1426
   macro avg       0.99

In [239]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [240]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 13

In [241]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [242]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [243]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [244]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [245]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [246]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [247]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.605; 0.579\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [248]:
#select the features
X = gestures[gestures.columns].values

In [249]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((4276, 65), (4276,)) 
 test: ((1426, 65), (1426,))


### **Model Learning**

### Random Forest with Optuna

In [250]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:31:46,548] A new study created in memory with name: RandomForest
[I 2024-10-08 00:31:51,408] Trial 0 finished with value: 0.9611783352462151 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.9611783352462151.
[I 2024-10-08 00:31:52,699] Trial 1 finished with value: 0.9529947532382359 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9611783352462151.
[I 2024-10-08 00:31:53,729] Trial 2 finished with value: 0.9600071049898891 and parameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.9611783352462151.
[I 2024-10-08 00:32:00,425] Trial 3 finished with value: 0.9637508881237361 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gi

CPU times: user 50 ms, sys: 23.8 ms, total: 73.8 ms
Wall time: 15.5 s


In [251]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': None}


In [252]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [253]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.962
accuracy: 0.962

test data:
f1_score: 0.978
accuracy: 0.978


In [254]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.98      0.97      0.97      2563
        Open       0.97      0.97      0.97       342
      Pistol       0.93      0.94      0.93       343
       Thumb       0.90      0.96      0.93       343
          OK       0.94      0.94      0.94       342
        Grab       0.97      0.94      0.96       343

    accuracy                           0.96      4276
   macro avg       0.95      0.95      0.95      4276
weighted avg       0.96      0.96      0.96      4276

              precision    recall  f1-score   support

     Neutral       0.97      0.99      0.98       855
        Open       0.98      0.96      0.97       114
      Pistol       0.97      0.98      0.98       115
       Thumb       0.99      0.95      0.97       114
          OK       0.99      0.93      0.96       114
        Grab       0.98      0.96      0.97       114

    accuracy                           0.98      1426
   macro avg       0.98

In [255]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [256]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 14

In [257]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [258]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [259]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [260]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [261]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [262]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [263]:
#display
summary

['Оптимальные свдиги для концевых выборок:   20 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.627; 0.678\n',
 'Размер оптимального сдвига (как среднего): 10']

### **Train-Test Split**

In [264]:
#select the features
X = gestures[gestures.columns].values

In [265]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((14676, 65), (14676,)) 
 test: ((4892, 65), (4892,))


### **Model Learning**

### Random Forest with Optuna

In [266]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:32:16,849] A new study created in memory with name: RandomForest
[I 2024-10-08 00:32:20,526] Trial 0 finished with value: 0.8965660844176039 and parameters: {'n_estimators': 125, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.8965660844176039.
[I 2024-10-08 00:32:54,705] Trial 1 finished with value: 0.9200056166997015 and parameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 1 with value: 0.9200056166997015.
[I 2024-10-08 00:32:57,732] Trial 2 finished with value: 0.8712866187931987 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 1 with value: 0.9200056166997015.
[I 2024-10-08 00:33:01,383] Trial 3 finished with value: 0.8965660844176039 and parameters: {'n_estimators': 125, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gi

CPU times: user 84.5 ms, sys: 48.4 ms, total: 133 ms
Wall time: 48 s


In [267]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}


In [268]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [269]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.887
accuracy: 0.887

test data:
f1_score: 0.969
accuracy: 0.969


In [270]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.88      0.97      0.92      9494
        Open       0.94      0.78      0.86      1035
      Pistol       0.93      0.74      0.82      1031
       Thumb       0.90      0.70      0.79      1035
          OK       0.84      0.70      0.77      1035
        Grab       0.89      0.79      0.84      1046

    accuracy                           0.89     14676
   macro avg       0.90      0.78      0.83     14676
weighted avg       0.89      0.89      0.88     14676

              precision    recall  f1-score   support

     Neutral       0.96      0.99      0.98      3165
        Open       0.97      0.92      0.95       345
      Pistol       0.99      0.92      0.95       343
       Thumb       0.99      0.92      0.95       345
          OK       0.98      0.92      0.95       345
        Grab       0.99      0.93      0.96       349

    accuracy                           0.97      4892
   macro avg       0.98

In [271]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [272]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 15

In [273]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [274]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [275]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [276]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [277]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [278]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [279]:
#display
summary

['Оптимальные свдиги для концевых выборок:   20 и 3\n',
 'Accuracy/correlation на концевых выборках: 0.619; 0.677\n',
 'Размер оптимального сдвига (как среднего): 12']

### **Train-Test Split**

In [280]:
#select the features
X = gestures[gestures.columns].values

In [281]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((14674, 65), (14674,)) 
 test: ((4892, 65), (4892,))


### **Model Learning**

### Random Forest with Optuna

In [282]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:34:05,749] A new study created in memory with name: RandomForest
[I 2024-10-08 00:34:10,246] Trial 0 finished with value: 0.9291945341522583 and parameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.9291945341522583.
[I 2024-10-08 00:34:13,122] Trial 1 finished with value: 0.9176774676035763 and parameters: {'n_estimators': 125, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.9291945341522583.
[I 2024-10-08 00:34:57,172] Trial 2 finished with value: 0.945754677870563 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 2 with value: 0.945754677870563.
[I 2024-10-08 00:35:42,565] Trial 3 finished with value: 0.9488212567455051 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entr

CPU times: user 126 ms, sys: 74.4 ms, total: 201 ms
Wall time: 1min 39s


In [283]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': None}


In [284]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [285]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.929
accuracy: 0.929

test data:
f1_score: 0.990
accuracy: 0.990


In [286]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.92      0.98      0.95      9492
        Open       0.99      0.88      0.93      1035
      Pistol       0.96      0.83      0.89      1041
       Thumb       0.93      0.81      0.86      1024
          OK       0.94      0.78      0.85      1043
        Grab       0.95      0.85      0.90      1039

    accuracy                           0.93     14674
   macro avg       0.95      0.86      0.90     14674
weighted avg       0.93      0.93      0.93     14674

              precision    recall  f1-score   support

     Neutral       0.99      1.00      0.99      3164
        Open       1.00      0.97      0.99       345
      Pistol       1.00      0.97      0.98       347
       Thumb       1.00      0.98      0.99       342
          OK       0.99      0.98      0.99       348
        Grab       1.00      0.97      0.98       346

    accuracy                           0.99      4892
   macro avg       0.99

In [287]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [288]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 16

In [289]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [290]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [291]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [292]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [293]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [294]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [295]:
#display
summary

['Оптимальные свдиги для концевых выборок:   20 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.616; 0.679\n',
 'Размер оптимального сдвига (как среднего): 10']

### **Train-Test Split**

In [296]:
#select the features
X = gestures[gestures.columns].values

In [297]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((14604, 65), (14604,)) 
 test: ((4868, 65), (4868,))


### **Model Learning**

### Random Forest with Optuna

In [298]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:37:05,277] A new study created in memory with name: RandomForest
[I 2024-10-08 00:37:08,172] Trial 0 finished with value: 0.9176942593313419 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.9176942593313419.
[I 2024-10-08 00:37:11,928] Trial 1 finished with value: 0.9236515454924895 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9236515454924895.
[I 2024-10-08 00:37:15,143] Trial 2 finished with value: 0.9207071841600503 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 1 with value: 0.9236515454924895.
[I 2024-10-08 00:37:21,610] Trial 3 finished with value: 0.9215289378285725 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion'

CPU times: user 64.9 ms, sys: 33.7 ms, total: 98.7 ms
Wall time: 20.6 s


In [299]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'sqrt'}


In [300]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [301]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.918
accuracy: 0.918

test data:
f1_score: 0.978
accuracy: 0.978


In [302]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.90      0.98      0.94      9455
        Open       0.98      0.80      0.88      1032
      Pistol       0.98      0.81      0.89      1031
       Thumb       0.96      0.86      0.90      1029
          OK       0.94      0.74      0.83      1028
        Grab       0.96      0.79      0.86      1029

    accuracy                           0.92     14604
   macro avg       0.95      0.83      0.88     14604
weighted avg       0.92      0.92      0.92     14604

              precision    recall  f1-score   support

     Neutral       0.97      1.00      0.98      3152
        Open       1.00      0.92      0.95       344
      Pistol       0.99      0.93      0.96       343
       Thumb       1.00      0.94      0.97       343
          OK       0.99      0.97      0.98       343
        Grab       1.00      0.95      0.97       343

    accuracy                           0.98      4868
   macro avg       0.99

In [303]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [304]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 17

In [305]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [306]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [307]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [308]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [309]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [310]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [311]:
#display
summary

['Оптимальные свдиги для концевых выборок:   20 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.616; 0.68\n',
 'Размер оптимального сдвига (как среднего): 10']

### **Train-Test Split**

In [312]:
#select the features
X = gestures[gestures.columns].values

In [313]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((21812, 65), (21812,)) 
 test: ((7271, 65), (7271,))


### **Model Learning**

### Random Forest with Optuna

In [314]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:37:36,957] A new study created in memory with name: RandomForest
[I 2024-10-08 00:37:48,320] Trial 0 finished with value: 0.9289839857338968 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9289839857338968.
[I 2024-10-08 00:37:52,861] Trial 1 finished with value: 0.8963871613058962 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.9289839857338968.
[I 2024-10-08 00:38:44,340] Trial 2 finished with value: 0.9362278646149422 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}. Best is trial 2 with value: 0.9362278646149422.
[I 2024-10-08 00:39:51,456] Trial 3 finished with value: 0.9300384322629658 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'gi

CPU times: user 179 ms, sys: 108 ms, total: 287 ms
Wall time: 2min 52s


In [315]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}


In [316]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [317]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.918
accuracy: 0.918

test data:
f1_score: 0.987
accuracy: 0.987


In [318]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.91      0.97      0.94     14092
        Open       0.95      0.85      0.89      1546
      Pistol       0.92      0.81      0.86      1546
       Thumb       0.95      0.89      0.92      1543
          OK       0.90      0.75      0.82      1541
        Grab       0.92      0.81      0.86      1544

    accuracy                           0.92     21812
   macro avg       0.93      0.85      0.88     21812
weighted avg       0.92      0.92      0.92     21812

              precision    recall  f1-score   support

     Neutral       0.99      1.00      0.99      4698
        Open       1.00      0.97      0.99       515
      Pistol       0.99      0.96      0.98       516
       Thumb       0.99      0.97      0.98       514
          OK       0.99      0.97      0.98       514
        Grab       0.99      0.98      0.98       514

    accuracy                           0.99      7271
   macro avg       0.99

In [319]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [320]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 18

In [321]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [322]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [323]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [324]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [325]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [326]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [327]:
#display
summary

['Оптимальные свдиги для концевых выборок:   20 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.617; 0.68\n',
 'Размер оптимального сдвига (как среднего): 10']

### **Train-Test Split**

In [328]:
#select the features
X = gestures[gestures.columns].values

In [329]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((21817, 65), (21817,)) 
 test: ((7273, 65), (7273,))


### **Model Learning**

### Random Forest with Optuna

In [330]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:41:59,674] A new study created in memory with name: RandomForest
[I 2024-10-08 00:42:07,980] Trial 0 finished with value: 0.9179082161825349 and parameters: {'n_estimators': 150, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9179082161825349.
[I 2024-10-08 00:42:54,924] Trial 1 finished with value: 0.91268282173674 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.9179082161825349.
[I 2024-10-08 00:43:01,692] Trial 2 finished with value: 0.904524243844528 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9179082161825349.
[I 2024-10-08 00:43:06,070] Trial 3 finished with value: 0.9135996221034602 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'en

CPU times: user 116 ms, sys: 62.1 ms, total: 178 ms
Wall time: 1min 13s


In [331]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 150, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'sqrt'}


In [332]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [333]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.906
accuracy: 0.906

test data:
f1_score: 0.946
accuracy: 0.946


In [334]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.89      0.98      0.93     14093
        Open       0.98      0.76      0.86      1543
      Pistol       0.94      0.80      0.86      1544
       Thumb       0.97      0.82      0.89      1548
          OK       0.89      0.71      0.79      1545
        Grab       0.94      0.77      0.84      1544

    accuracy                           0.91     21817
   macro avg       0.93      0.81      0.86     21817
weighted avg       0.91      0.91      0.90     21817

              precision    recall  f1-score   support

     Neutral       0.93      0.99      0.96      4698
        Open       1.00      0.88      0.93       514
      Pistol       0.99      0.83      0.90       515
       Thumb       0.99      0.89      0.94       516
          OK       0.97      0.86      0.91       515
        Grab       0.98      0.83      0.90       515

    accuracy                           0.95      7273
   macro avg       0.98

In [335]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [336]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 19

In [337]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [338]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [339]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [340]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [341]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [342]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [343]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.576; 0.62\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [344]:
#select the features
X = gestures[gestures.columns].values

In [345]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((28206, 65), (28206,)) 
 test: ((9402, 65), (9402,))


### **Model Learning**

### Random Forest with Optuna

In [346]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:43:33,475] A new study created in memory with name: RandomForest
[I 2024-10-08 00:43:40,858] Trial 0 finished with value: 0.9621355484586094 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.9621355484586094.
[I 2024-10-08 00:43:51,807] Trial 1 finished with value: 0.9758561303054101 and parameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9758561303054101.
[I 2024-10-08 00:43:57,441] Trial 2 finished with value: 0.869850246282016 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9758561303054101.
[I 2024-10-08 00:44:04,560] Trial 3 finished with value: 0.9365380169407137 and parameters: {'n_estimators': 150, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'entr

CPU times: user 86.2 ms, sys: 54.8 ms, total: 141 ms
Wall time: 38.9 s


In [347]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': 'sqrt'}


In [348]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [349]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.947
accuracy: 0.947

test data:
f1_score: 0.995
accuracy: 0.995


In [350]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.94      0.99      0.96     16608
        Open       0.96      0.90      0.93      2315
      Pistol       0.95      0.89      0.92      2321
       Thumb       0.94      0.89      0.91      2319
          OK       0.96      0.94      0.95      2321
        Grab       0.98      0.82      0.89      2322

    accuracy                           0.95     28206
   macro avg       0.95      0.90      0.93     28206
weighted avg       0.95      0.95      0.95     28206

              precision    recall  f1-score   support

     Neutral       0.99      1.00      1.00      5536
        Open       1.00      0.99      0.99       772
      Pistol       1.00      0.99      0.99       773
       Thumb       1.00      0.98      0.99       773
          OK       0.99      0.99      0.99       774
        Grab       1.00      0.99      1.00       774

    accuracy                           0.99      9402
   macro avg       1.00

In [351]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [352]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 20

In [353]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [354]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [355]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [356]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [357]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [358]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [359]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.611; 0.601\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [360]:
#select the features
X = gestures[gestures.columns].values

In [361]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8485, 65), (8485,)) 
 test: ((2829, 65), (2829,))


### **Model Learning**

### Random Forest with Optuna

In [362]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:44:36,730] A new study created in memory with name: RandomForest
[I 2024-10-08 00:44:59,703] Trial 0 finished with value: 0.8816735415439011 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.8816735415439011.
[I 2024-10-08 00:45:01,862] Trial 1 finished with value: 0.8214496169711255 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.8816735415439011.
[I 2024-10-08 00:45:14,968] Trial 2 finished with value: 0.9255156157925752 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}. Best is trial 2 with value: 0.9255156157925752.
[I 2024-10-08 00:45:18,160] Trial 3 finished with value: 0.8555097230406601 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini

CPU times: user 66.3 ms, sys: 42 ms, total: 108 ms
Wall time: 53.4 s


In [363]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}


In [364]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [365]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.901
accuracy: 0.901

test data:
f1_score: 0.990
accuracy: 0.990


In [366]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.94      0.97      0.96      5150
        Open       0.82      0.82      0.82       668
      Pistol       0.83      0.63      0.71       665
       Thumb       0.84      0.88      0.86       665
          OK       0.81      0.88      0.84       670
        Grab       0.86      0.74      0.80       667

    accuracy                           0.90      8485
   macro avg       0.85      0.82      0.83      8485
weighted avg       0.90      0.90      0.90      8485

              precision    recall  f1-score   support

     Neutral       0.99      1.00      0.99      1717
        Open       1.00      0.98      0.99       223
      Pistol       1.00      0.97      0.99       222
       Thumb       0.99      0.97      0.98       222
          OK       0.98      0.99      0.98       223
        Grab       0.99      0.98      0.98       222

    accuracy                           0.99      2829
   macro avg       0.99

In [367]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [368]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 21

In [369]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [370]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [371]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [372]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [373]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [374]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [375]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.577; 0.628\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [376]:
#select the features
X = gestures[gestures.columns].values

In [377]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8432, 65), (8432,)) 
 test: ((2811, 65), (2811,))


### **Model Learning**

### Random Forest with Optuna

In [378]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:45:56,750] A new study created in memory with name: RandomForest
[I 2024-10-08 00:45:58,445] Trial 0 finished with value: 0.8815237026427056 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.8815237026427056.
[I 2024-10-08 00:46:21,512] Trial 1 finished with value: 0.9621685894717894 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': None}. Best is trial 1 with value: 0.9621685894717894.
[I 2024-10-08 00:46:34,291] Trial 2 finished with value: 0.9143735396138639 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': None}. Best is trial 1 with value: 0.9621685894717894.
[I 2024-10-08 00:46:37,146] Trial 3 finished with value: 0.9141367135888775 and parameters: {'n_estimators': 175, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'entropy', 

CPU times: user 72 ms, sys: 50 ms, total: 122 ms
Wall time: 59.4 s


In [379]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': None}


In [380]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [381]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.939
accuracy: 0.939

test data:
f1_score: 0.988
accuracy: 0.988


In [382]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.96      0.98      0.97      5008
        Open       0.96      0.90      0.93       682
      Pistol       0.87      0.81      0.84       683
       Thumb       0.90      0.91      0.90       686
          OK       0.88      0.92      0.90       688
        Grab       0.92      0.84      0.88       685

    accuracy                           0.94      8432
   macro avg       0.92      0.89      0.90      8432
weighted avg       0.94      0.94      0.94      8432

              precision    recall  f1-score   support

     Neutral       0.99      1.00      0.99      1669
        Open       1.00      0.98      0.99       228
      Pistol       0.99      0.98      0.98       227
       Thumb       0.98      0.97      0.98       229
          OK       0.98      0.97      0.98       230
        Grab       0.99      0.97      0.98       228

    accuracy                           0.99      2811
   macro avg       0.99

In [383]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [384]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 22

In [385]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [386]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [387]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [388]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [389]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [390]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [391]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.606; 0.584\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [392]:
#select the features
X = gestures[gestures.columns].values

In [393]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((12627, 65), (12627,)) 
 test: ((4210, 65), (4210,))


### **Model Learning**

### Random Forest with Optuna

In [394]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:47:39,509] A new study created in memory with name: RandomForest
[I 2024-10-08 00:47:54,786] Trial 0 finished with value: 0.9086874720726229 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': None}. Best is trial 0 with value: 0.9086874720726229.
[I 2024-10-08 00:48:20,655] Trial 1 finished with value: 0.9308627736882951 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 1 with value: 0.9308627736882951.
[I 2024-10-08 00:48:23,020] Trial 2 finished with value: 0.9156563580348533 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9308627736882951.
[I 2024-10-08 00:48:55,753] Trial 3 finished with value: 0.9461470802662214 and parameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'gini', 

CPU times: user 102 ms, sys: 71.3 ms, total: 173 ms
Wall time: 1min 30s


In [395]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': None}


In [396]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [397]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.920
accuracy: 0.920

test data:
f1_score: 0.987
accuracy: 0.987


In [398]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.95      0.97      0.96      7521
        Open       0.91      0.90      0.91      1017
      Pistol       0.86      0.82      0.84      1026
       Thumb       0.87      0.88      0.87      1021
          OK       0.82      0.83      0.82      1017
        Grab       0.91      0.84      0.87      1025

    accuracy                           0.92     12627
   macro avg       0.89      0.87      0.88     12627
weighted avg       0.92      0.92      0.92     12627

              precision    recall  f1-score   support

     Neutral       0.98      1.00      0.99      2508
        Open       0.99      0.97      0.98       339
      Pistol       0.99      0.98      0.99       342
       Thumb       0.98      0.96      0.97       340
          OK       1.00      0.97      0.99       339
        Grab       0.99      0.98      0.99       342

    accuracy                           0.99      4210
   macro avg       0.99

In [399]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [400]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 23

In [401]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [402]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [403]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [404]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [405]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [406]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [407]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.576; 0.632\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [408]:
#select the features
X = gestures[gestures.columns].values

In [409]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((19986, 65), (19986,)) 
 test: ((6662, 65), (6662,))


### **Model Learning**

### Random Forest with Optuna

In [410]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:49:39,952] A new study created in memory with name: RandomForest
[I 2024-10-08 00:49:58,019] Trial 0 finished with value: 0.9438108846767641 and parameters: {'n_estimators': 100, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.9438108846767641.
[I 2024-10-08 00:50:36,527] Trial 1 finished with value: 0.9703791475422475 and parameters: {'n_estimators': 150, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': None}. Best is trial 1 with value: 0.9703791475422475.
[I 2024-10-08 00:51:00,624] Trial 2 finished with value: 0.9794855521330843 and parameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': None}. Best is trial 2 with value: 0.9794855521330843.
[I 2024-10-08 00:51:05,044] Trial 3 finished with value: 0.9312017655062208 and parameters: {'n_estimators': 150, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini',

CPU times: user 100 ms, sys: 68.4 ms, total: 169 ms
Wall time: 1min 29s


In [411]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': None}


In [412]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [413]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.964
accuracy: 0.964

test data:
f1_score: 0.993
accuracy: 0.993


In [414]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.97      0.99      0.98     11827
        Open       0.97      0.92      0.95      1635
      Pistol       0.93      0.91      0.92      1634
       Thumb       0.96      0.94      0.95      1633
          OK       0.94      0.96      0.95      1627
        Grab       0.95      0.93      0.94      1630

    accuracy                           0.96     19986
   macro avg       0.95      0.94      0.95     19986
weighted avg       0.96      0.96      0.96     19986

              precision    recall  f1-score   support

     Neutral       0.99      1.00      0.99      3943
        Open       0.99      0.99      0.99       545
      Pistol       0.99      0.99      0.99       544
       Thumb       1.00      0.99      0.99       544
          OK       0.98      0.99      0.99       543
        Grab       1.00      0.99      1.00       543

    accuracy                           0.99      6662
   macro avg       0.99

In [415]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [416]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 24

In [417]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [418]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [419]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [420]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [421]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [422]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [423]:
#display
summary

['Оптимальные свдиги для концевых выборок:   20 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.596; 0.582\n',
 'Размер оптимального сдвига (как среднего): 10']

### **Train-Test Split**

In [424]:
#select the features
X = gestures[gestures.columns].values

In [425]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((5223, 65), (5223,)) 
 test: ((1741, 65), (1741,))


### **Model Learning**

### Random Forest with Optuna

In [426]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:51:55,473] A new study created in memory with name: RandomForest
[I 2024-10-08 00:52:05,303] Trial 0 finished with value: 0.8022218555793874 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.8022218555793874.
[I 2024-10-08 00:52:11,399] Trial 1 finished with value: 0.6613057984564337 and parameters: {'n_estimators': 175, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.8022218555793874.
[I 2024-10-08 00:52:12,115] Trial 2 finished with value: 0.6626473445892683 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.8022218555793874.
[I 2024-10-08 00:52:13,235] Trial 3 finished with value: 0.707066124035271 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'en

CPU times: user 43.9 ms, sys: 24.2 ms, total: 68.1 ms
Wall time: 19 s


In [427]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': None}


In [428]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [429]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.702
accuracy: 0.702

test data:
f1_score: 0.953
accuracy: 0.953


In [430]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.68      0.96      0.80      3138
        Open       0.83      0.26      0.40       417
      Pistol       0.82      0.38      0.52       414
       Thumb       0.84      0.38      0.52       418
          OK       0.81      0.28      0.42       419
        Grab       0.81      0.28      0.42       417

    accuracy                           0.70      5223
   macro avg       0.80      0.42      0.51      5223
weighted avg       0.74      0.70      0.66      5223

              precision    recall  f1-score   support

     Neutral       0.93      1.00      0.96      1046
        Open       1.00      0.89      0.94       139
      Pistol       1.00      0.90      0.95       138
       Thumb       1.00      0.85      0.92       140
          OK       1.00      0.88      0.94       139
        Grab       0.99      0.88      0.94       139

    accuracy                           0.95      1741
   macro avg       0.99

In [431]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [432]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 25

In [433]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [434]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [435]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [436]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [437]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [438]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [439]:
#display
summary

['Оптимальные свдиги для концевых выборок:   12 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.653; 0.579\n',
 'Размер оптимального сдвига (как среднего): 6']

### **Train-Test Split**

In [440]:
#select the features
X = gestures[gestures.columns].values

In [441]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8376, 65), (8376,)) 
 test: ((2793, 65), (2793,))


### **Model Learning**

### Random Forest with Optuna

In [442]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:52:35,779] A new study created in memory with name: RandomForest
[I 2024-10-08 00:52:47,527] Trial 0 finished with value: 0.8724927866633421 and parameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.8724927866633421.
[I 2024-10-08 00:52:56,460] Trial 1 finished with value: 0.8206783742385924 and parameters: {'n_estimators': 175, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.8724927866633421.
[I 2024-10-08 00:53:05,213] Trial 2 finished with value: 0.8678361414882627 and parameters: {'n_estimators': 150, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.8724927866633421.
[I 2024-10-08 00:53:06,529] Trial 3 finished with value: 0.789278951305525 and parameters: {'n_estimators': 200, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'gini

CPU times: user 49.8 ms, sys: 30 ms, total: 79.8 ms
Wall time: 32.8 s


In [443]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}


In [444]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [445]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.820
accuracy: 0.820

test data:
f1_score: 0.952
accuracy: 0.952


In [446]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.83      0.90      0.86      4950
        Open       0.87      0.74      0.80       685
      Pistol       0.77      0.60      0.67       685
       Thumb       0.75      0.73      0.74       685
          OK       0.80      0.76      0.78       686
        Grab       0.81      0.71      0.76       685

    accuracy                           0.82      8376
   macro avg       0.81      0.74      0.77      8376
weighted avg       0.82      0.82      0.82      8376

              precision    recall  f1-score   support

     Neutral       0.95      0.97      0.96      1650
        Open       0.96      0.93      0.95       228
      Pistol       0.95      0.92      0.93       229
       Thumb       0.93      0.92      0.93       229
          OK       0.96      0.92      0.94       228
        Grab       0.97      0.91      0.94       229

    accuracy                           0.95      2793
   macro avg       0.95

In [447]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [448]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 26

In [449]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [450]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [451]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [452]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [453]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [454]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [455]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.604; 0.578\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [456]:
#select the features
X = gestures[gestures.columns].values

In [457]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8376, 65), (8376,)) 
 test: ((2792, 65), (2792,))


### **Model Learning**

### Random Forest with Optuna

In [458]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:53:33,033] A new study created in memory with name: RandomForest
[I 2024-10-08 00:53:34,243] Trial 0 finished with value: 0.7919053182773482 and parameters: {'n_estimators': 175, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.7919053182773482.
[I 2024-10-08 00:53:35,671] Trial 1 finished with value: 0.8809683325615361 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 1 with value: 0.8809683325615361.
[I 2024-10-08 00:53:40,965] Trial 2 finished with value: 0.9547523955401989 and parameters: {'n_estimators': 100, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': None}. Best is trial 2 with value: 0.9547523955401989.
[I 2024-10-08 00:53:51,880] Trial 3 finished with value: 0.9567818188294803 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini'

CPU times: user 46.6 ms, sys: 26.3 ms, total: 72.9 ms
Wall time: 24.1 s


In [459]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': None}


In [460]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [461]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.935
accuracy: 0.935

test data:
f1_score: 0.977
accuracy: 0.977


In [462]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.95      0.97      0.96      4953
        Open       0.97      0.88      0.92       686
      Pistol       0.90      0.86      0.88       685
       Thumb       0.91      0.87      0.89       684
          OK       0.89      0.88      0.89       685
        Grab       0.90      0.92      0.91       683

    accuracy                           0.94      8376
   macro avg       0.92      0.90      0.91      8376
weighted avg       0.94      0.94      0.93      8376

              precision    recall  f1-score   support

     Neutral       0.97      0.99      0.98      1651
        Open       0.99      0.97      0.98       229
      Pistol       0.98      0.93      0.96       229
       Thumb       0.99      0.94      0.96       228
          OK       1.00      0.97      0.98       228
        Grab       0.97      0.98      0.97       227

    accuracy                           0.98      2792
   macro avg       0.98

In [463]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [464]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 27

In [465]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [466]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [467]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [468]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [469]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [470]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [471]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.604; 0.579\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [472]:
#select the features
X = gestures[gestures.columns].values

In [473]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8376, 65), (8376,)) 
 test: ((2792, 65), (2792,))


### **Model Learning**

### Random Forest with Optuna

In [474]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:54:19,968] A new study created in memory with name: RandomForest
[I 2024-10-08 00:54:21,153] Trial 0 finished with value: 0.8223493036013252 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 0 with value: 0.8223493036013252.
[I 2024-10-08 00:54:22,766] Trial 1 finished with value: 0.9361271684536744 and parameters: {'n_estimators': 175, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9361271684536744.
[I 2024-10-08 00:54:27,979] Trial 2 finished with value: 0.878701171944573 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 1 with value: 0.9361271684536744.
[I 2024-10-08 00:54:29,914] Trial 3 finished with value: 0.9509309300751612 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'entrop

CPU times: user 47 ms, sys: 30.4 ms, total: 77.4 ms
Wall time: 18.4 s


In [475]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 175, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': None}


In [476]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [477]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.928
accuracy: 0.928

test data:
f1_score: 0.975
accuracy: 0.975


In [478]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.95      0.97      0.96      4952
        Open       0.93      0.91      0.92       685
      Pistol       0.90      0.82      0.86       686
       Thumb       0.91      0.86      0.88       683
          OK       0.85      0.92      0.89       684
        Grab       0.92      0.83      0.87       686

    accuracy                           0.93      8376
   macro avg       0.91      0.89      0.90      8376
weighted avg       0.93      0.93      0.93      8376

              precision    recall  f1-score   support

     Neutral       0.98      0.98      0.98      1651
        Open       0.98      0.98      0.98       228
      Pistol       0.97      0.96      0.96       229
       Thumb       0.97      0.93      0.95       228
          OK       0.98      0.99      0.98       228
        Grab       0.97      0.96      0.96       228

    accuracy                           0.97      2792
   macro avg       0.97

In [479]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [480]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 28

In [481]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [482]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [483]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [484]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [485]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [486]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [487]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.605; 0.579\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [488]:
#select the features
X = gestures[gestures.columns].values

In [489]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8376, 65), (8376,)) 
 test: ((2793, 65), (2793,))


### **Model Learning**

### Random Forest with Optuna

In [490]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:54:57,381] A new study created in memory with name: RandomForest
[I 2024-10-08 00:55:01,801] Trial 0 finished with value: 0.9582137997364015 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 0 with value: 0.9582137997364015.
[I 2024-10-08 00:55:02,882] Trial 1 finished with value: 0.8431228582623873 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.9582137997364015.
[I 2024-10-08 00:55:03,767] Trial 2 finished with value: 0.9389915577245039 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'log2'}. Best is trial 0 with value: 0.9582137997364015.
[I 2024-10-08 00:55:05,017] Trial 3 finished with value: 0.8464657143874896 and parameters: {'n_estimators': 175, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 

CPU times: user 44.4 ms, sys: 25.2 ms, total: 69.5 ms
Wall time: 17.9 s


In [491]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': None}


In [492]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [493]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.938
accuracy: 0.938

test data:
f1_score: 0.977
accuracy: 0.977


In [494]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.95      0.97      0.96      4949
        Open       0.94      0.92      0.93       685
      Pistol       0.90      0.87      0.88       685
       Thumb       0.93      0.83      0.88       685
          OK       0.89      0.93      0.91       687
        Grab       0.95      0.88      0.91       685

    accuracy                           0.94      8376
   macro avg       0.93      0.90      0.91      8376
weighted avg       0.94      0.94      0.94      8376

              precision    recall  f1-score   support

     Neutral       0.98      0.99      0.98      1651
        Open       0.97      0.97      0.97       228
      Pistol       0.99      0.96      0.98       228
       Thumb       0.98      0.96      0.97       228
          OK       0.96      0.96      0.96       229
        Grab       0.97      0.97      0.97       229

    accuracy                           0.98      2793
   macro avg       0.98

In [495]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [496]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 29

In [497]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [498]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [499]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [500]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [501]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [502]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [503]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.604; 0.58\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [504]:
#select the features
X = gestures[gestures.columns].values

In [505]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8379, 65), (8379,)) 
 test: ((2794, 65), (2794,))


### **Model Learning**

### Random Forest with Optuna

In [506]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:55:37,671] A new study created in memory with name: RandomForest
[I 2024-10-08 00:55:43,967] Trial 0 finished with value: 0.9585863285006946 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': None}. Best is trial 0 with value: 0.9585863285006946.
[I 2024-10-08 00:55:45,241] Trial 1 finished with value: 0.8672854344031631 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 4, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.9585863285006946.
[I 2024-10-08 00:55:47,428] Trial 2 finished with value: 0.9657474441634312 and parameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'sqrt'}. Best is trial 2 with value: 0.9657474441634312.
[I 2024-10-08 00:55:48,637] Trial 3 finished with value: 0.9601377123926904 and parameters: {'n_estimators': 125, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entro

CPU times: user 56 ms, sys: 29.6 ms, total: 85.6 ms
Wall time: 18 s


In [507]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 4, 'criterion': 'entropy', 'max_features': 'sqrt'}


In [508]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [509]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.936
accuracy: 0.936

test data:
f1_score: 0.975
accuracy: 0.975


In [510]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.93      0.98      0.95      4950
        Open       0.97      0.79      0.87       687
      Pistol       0.94      0.93      0.94       686
       Thumb       0.96      0.82      0.88       684
          OK       0.96      0.93      0.94       685
        Grab       0.95      0.90      0.92       687

    accuracy                           0.94      8379
   macro avg       0.95      0.89      0.92      8379
weighted avg       0.94      0.94      0.94      8379

              precision    recall  f1-score   support

     Neutral       0.97      0.99      0.98      1651
        Open       1.00      0.95      0.98       229
      Pistol       0.96      0.96      0.96       228
       Thumb       0.99      0.93      0.96       228
          OK       0.97      0.97      0.97       229
        Grab       0.98      0.96      0.97       229

    accuracy                           0.97      2794
   macro avg       0.98

In [511]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [512]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 30

In [513]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [514]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [515]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [516]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [517]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [518]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [519]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.603; 0.586\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [520]:
#select the features
X = gestures[gestures.columns].values

In [521]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8386, 65), (8386,)) 
 test: ((2796, 65), (2796,))


### **Model Learning**

### Random Forest with Optuna

In [522]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:56:03,936] A new study created in memory with name: RandomForest
[I 2024-10-08 00:56:09,504] Trial 0 finished with value: 0.9255915587955391 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': None}. Best is trial 0 with value: 0.9255915587955391.
[I 2024-10-08 00:56:11,372] Trial 1 finished with value: 0.9585022917506217 and parameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9585022917506217.
[I 2024-10-08 00:56:17,234] Trial 2 finished with value: 0.9577870836096298 and parameters: {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': None}. Best is trial 1 with value: 0.9585022917506217.
[I 2024-10-08 00:56:23,264] Trial 3 finished with value: 0.9425241452932227 and parameters: {'n_estimators': 125, 'max_depth': 10, 'min_samples_leaf': 5, 'criterion': 'entropy', 

CPU times: user 46.4 ms, sys: 26.1 ms, total: 72.5 ms
Wall time: 20.2 s


In [523]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 200, 'max_depth': 15, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'sqrt'}


In [524]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [525]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.909
accuracy: 0.909

test data:
f1_score: 0.970
accuracy: 0.970


In [526]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.90      0.98      0.94      4955
        Open       0.96      0.73      0.83       685
      Pistol       0.88      0.73      0.80       686
       Thumb       0.93      0.83      0.88       687
          OK       0.90      0.94      0.92       687
        Grab       0.93      0.83      0.88       686

    accuracy                           0.91      8386
   macro avg       0.92      0.84      0.87      8386
weighted avg       0.91      0.91      0.91      8386

              precision    recall  f1-score   support

     Neutral       0.97      0.99      0.98      1652
        Open       1.00      0.92      0.96       228
      Pistol       0.97      0.94      0.96       229
       Thumb       0.95      0.96      0.95       229
          OK       0.97      0.98      0.97       229
        Grab       0.98      0.92      0.95       229

    accuracy                           0.97      2796
   macro avg       0.97

In [527]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [528]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

##### Palm File 31

In [529]:
gestures, current_file, palm_file = read_pilot(data=data_meta,
                                               file_number=next(n))

#### Protocol File

In [530]:
gestures_protocol = pd.read_csv(f'{"./data_csv/" + current_file}.protocol.csv', index_col=0)

#### Encoding

In [531]:
#FIT
le.fit(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

#TRANSFORM
gestures_protocol['gesture'] = le.transform(
    gestures_protocol[[
        "Thumb", "Index", "Middle", "Ring", "Pinky",
        'Thumb_stretch', 'Index_stretch', 'Middle_stretch', 'Ring_stretch', 'Pinky_stretch'
    ]]
    .apply(lambda row: str(tuple(row)), axis=1)
)

In [532]:
#set the gestures
MAIN_GESTURES = ['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']
MAIN_GESTURES

['Neutral', 'Open', 'Pistol', 'Thumb', 'OK', 'Grab']

#### Predictive Feature

In [533]:
#lock the predictive feature
y_cmd = np.array([gestures_protocol['gesture'].loc[s] for s in gestures['SYNC'].values])

#### Shift Function

In [534]:
#apply the time interval shift function
y, summary = get_naive_centering(gestures, y_cmd)

In [535]:
#display
summary

['Оптимальные свдиги для концевых выборок:   1 и 1\n',
 'Accuracy/correlation на концевых выборках: 0.603; 0.581\n',
 'Размер оптимального сдвига (как среднего): 1']

### **Train-Test Split**

In [536]:
#select the features
X = gestures[gestures.columns].values

In [537]:
#use stratified samplifing to split the data evenly
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, 
    stratify=y, 
    random_state=0
)

#find the dimensions
print(f'train: {X_train.shape, y_train.shape} \n test: {X_test.shape, y_test.shape}')

train: ((8380, 65), (8380,)) 
 test: ((2794, 65), (2794,))


### **Model Learning**

### Random Forest with Optuna

In [538]:
%%time
#begin hyperparameters selection
#create review object
study_optuna_rf = optuna.create_study(study_name='RandomForest',
                                       direction='maximize')

#search for the best combination
study_optuna_rf.optimize(optuna_rf,
                         n_trials=5)

[I 2024-10-08 00:56:33,423] A new study created in memory with name: RandomForest
[I 2024-10-08 00:56:35,129] Trial 0 finished with value: 0.95381861575179 and parameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'gini', 'max_features': 'log2'}. Best is trial 0 with value: 0.95381861575179.
[I 2024-10-08 00:56:37,273] Trial 1 finished with value: 0.966109785202864 and parameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}. Best is trial 1 with value: 0.966109785202864.
[I 2024-10-08 00:56:45,752] Trial 2 finished with value: 0.9594272076372314 and parameters: {'n_estimators': 150, 'max_depth': 20, 'min_samples_leaf': 5, 'criterion': 'entropy', 'max_features': None}. Best is trial 1 with value: 0.966109785202864.
[I 2024-10-08 00:56:52,945] Trial 3 finished with value: 0.9113365155131264 and parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_leaf': 3, 'criterion': 'entropy', 'm

CPU times: user 57 ms, sys: 31.9 ms, total: 88.9 ms
Wall time: 20.4 s


In [539]:
#find the best hyperparameters
print(f'Random Forest Hyperparameters: {study_optuna_rf.best_params}')

Random Forest Hyperparameters: {'n_estimators': 175, 'max_depth': 20, 'min_samples_leaf': 3, 'criterion': 'gini', 'max_features': 'sqrt'}


In [540]:
#find the metrics for test data
model_opt_rf = ensemble.RandomForestClassifier(**study_optuna_rf.best_params,
                                               random_state=42,
                                               )

#set the start_time
start_time = time.time()

#model learning
model_opt_rf.fit(X_train, y_train)
model_opt_rf.fit(X_test, y_test)

#calculate the model learning time
model_opt_rf_time = round(time.time() - start_time, 2)

#make a prediction
y_train_pred_rf = model_opt_rf.predict(X_train)
y_test_pred_rf = model_opt_rf.predict(X_test)

In [541]:
#display the metrics
print('train data:')
print(f'f1_score: {metrics.f1_score(y_train, y_train_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_train, y_train):.3f}')
print()
print('test data:')
print(f'f1_score: {metrics.f1_score(y_test, y_test_pred_rf, average="micro"):.3f}')
print(f'accuracy: {model_opt_rf.score(X_test, y_test):.3f}')

train data:
f1_score: 0.931
accuracy: 0.931

test data:
f1_score: 0.977
accuracy: 0.977


In [542]:
print(classification_report(y_train,
                            y_train_pred_rf,
                            target_names=MAIN_GESTURES))

print(classification_report(y_test,
                            y_test_pred_rf,
                            target_names=MAIN_GESTURES))

              precision    recall  f1-score   support

     Neutral       0.92      0.98      0.95      4953
        Open       0.97      0.86      0.91       686
      Pistol       0.95      0.83      0.89       685
       Thumb       0.93      0.82      0.87       684
          OK       0.95      0.89      0.92       686
        Grab       0.95      0.90      0.93       686

    accuracy                           0.93      8380
   macro avg       0.95      0.88      0.91      8380
weighted avg       0.93      0.93      0.93      8380

              precision    recall  f1-score   support

     Neutral       0.97      0.99      0.98      1651
        Open       0.99      0.96      0.97       229
      Pistol       0.98      0.95      0.97       229
       Thumb       0.99      0.96      0.98       228
          OK       0.97      0.97      0.97       228
        Grab       0.98      0.97      0.97       229

    accuracy                           0.98      2794
   macro avg       0.98

In [543]:
#export the classification report into a dataframe
report_rf = classification_report(y_test,
                                  y_test_pred_rf,
                                  target_names=MAIN_GESTURES,
                                  output_dict=True)

test_rf = pd.DataFrame(report_rf)

In [544]:
#add model results to the ordered dict
review['palm_file'].append(current_file)
review['model_name'].append(model_opt_rf.__class__.__name__)
review['learning_time'].append(model_opt_rf_time)
review['hyperparameters'].append(study_optuna_rf.best_params)
review['F1-Score, train'].append(round(metrics.f1_score(y_train, y_train_pred_rf, average="micro"), 3))
review['F1-Score, test'].append(round(metrics.f1_score(y_test, y_test_pred_rf, average="micro"), 3))

review['F1-Score: Neutral'].append(test_rf.loc['f1-score'].iloc[0].round(3))
review['F1-Score: Open'].append(test_rf.loc['f1-score'].iloc[1].round(3))
review['F1-Score: Pistol'].append(test_rf.loc['f1-score'].iloc[2].round(3))
review['F1-Score: Thumb'].append(test_rf.loc['f1-score'].iloc[3].round(3))
review['F1-Score: OK'].append(test_rf.loc['f1-score'].iloc[4].round(3))
review['F1-Score: Grab'].append(test_rf.loc['f1-score'].iloc[5].round(3))

### SUMMARY

In [545]:
#write the results to a dataframe
review_summary = pd.DataFrame(review)
review_summary

Unnamed: 0,palm_file,model_name,learning_time,hyperparameters,"F1-Score, train","F1-Score, test",F1-Score: Neutral,F1-Score: Open,F1-Score: Pistol,F1-Score: Thumb,F1-Score: OK,F1-Score: Grab
0,2023-05-15_16-16-08.palm,RandomForestClassifier,9.69,"{'n_estimators': 125, 'max_depth': 20, 'min_sa...",0.947,0.987,0.992,0.98,0.983,0.971,0.984,0.979
1,2023-05-15_17-12-24.palm,RandomForestClassifier,90.39,"{'n_estimators': 150, 'max_depth': 20, 'min_sa...",0.827,0.966,0.975,0.983,0.945,0.915,0.938,0.958
2,2023-06-05_16-12-38.palm,RandomForestClassifier,8.59,"{'n_estimators': 125, 'max_depth': 20, 'min_sa...",0.929,0.983,0.987,0.985,0.977,0.972,0.976,0.974
3,2023-06-05_17-53-01.palm,RandomForestClassifier,5.87,"{'n_estimators': 100, 'max_depth': 15, 'min_sa...",0.919,0.974,0.981,0.971,0.963,0.97,0.954,0.962
4,2023-06-20_14-43-11.palm,RandomForestClassifier,10.16,"{'n_estimators': 175, 'max_depth': 15, 'min_sa...",0.92,0.971,0.98,0.969,0.946,0.959,0.948,0.963
5,2023-06-20_13-30-15.palm,RandomForestClassifier,8.15,"{'n_estimators': 175, 'max_depth': 20, 'min_sa...",0.932,0.985,0.987,0.991,0.98,0.972,0.981,0.98
6,2023-06-20_12-34-17.palm,RandomForestClassifier,11.83,"{'n_estimators': 200, 'max_depth': 20, 'min_sa...",0.927,0.979,0.984,0.983,0.967,0.971,0.971,0.975
7,2023-09-30_08-06-44.palm,RandomForestClassifier,6.21,"{'n_estimators': 200, 'max_depth': 20, 'min_sa...",0.938,0.986,0.988,0.985,0.971,0.98,0.987,0.987
8,2023-09-29_11-03-50.palm,RandomForestClassifier,3.84,"{'n_estimators': 125, 'max_depth': 20, 'min_sa...",0.931,0.979,0.983,0.971,0.957,0.985,0.976,0.971
9,2023-09-29_09-20-47.palm,RandomForestClassifier,2.92,"{'n_estimators': 100, 'max_depth': 20, 'min_sa...",0.929,0.982,0.985,0.978,0.969,0.98,0.984,0.978


In [546]:
#create a file name
xlx_name = 'pilots_random_forest_results.xlsx'

#save to excel
review_summary.to_excel('./model_results/' + xlx_name, sheet_name='random_forest')