In [1]:
import pandas as pd
from data_preprocessing import *
from train_model_helper import *
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
import xgboost
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import StackingClassifier
from mlflow_helper import*

In [2]:
# prepare the data
raw_df = pd.read_csv('hand_landmarks_data.csv')
X, y = preprocess_data(raw_df)
le = LabelEncoder()
y = le.fit_transform(y)

# split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [3]:
# MLflow setup
import os
from pathlib import Path
import joblib


mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("hand_gestures_experiment")

# Log the dataset
with mlflow.start_run():

    dataset = mlflow.data.from_pandas(raw_df, source=Path(os.path.abspath('hand_landmarks_data.csv')).as_uri(), name="hand_gestures")
    mlflow.log_input(dataset)
    
    joblib.dump(le, "label_encoder.pkl")
    mlflow.log_artifact("label_encoder.pkl")


2026/02/27 16:08:17 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/02/27 16:08:17 INFO mlflow.store.db.utils: Updating database tables
2026/02/27 16:08:19 INFO mlflow.tracking.fluent: Experiment with name 'hand_gestures_experiment' does not exist. Creating a new experiment.
  return _dataset_source_registry.resolve(


In [4]:
# Train and log KNN model
knn_parameters = {'n_neighbors': [3, 5, 7, 9],
                  'weights': ['uniform', 'distance'],
                  'metric': ['euclidean', 'manhattan']}
knn = KNeighborsClassifier()
knn_grid = train_model(knn, knn_parameters, X_train, y_train)
log_model_with_grid(knn_grid, 'knn', X_test, y_test, le)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] END ...metric=euclidean, n_neighbors=3, weights=uniform; total time=   2.9s
[CV] END ...metric=euclidean, n_neighbors=3, weights=uniform; total time=   0.1s
[CV] END ...metric=euclidean, n_neighbors=3, weights=uniform; total time=   0.0s
[CV] END ..metric=euclidean, n_neighbors=3, weights=distance; total time=   0.1s
[CV] END ..metric=euclidean, n_neighbors=3, weights=distance; total time=   0.0s
[CV] END ..metric=euclidean, n_neighbors=3, weights=distance; total time=   0.0s
[CV] END ...metric=euclidean, n_neighbors=5, weights=uniform; total time=   0.0s
[CV] END ...metric=euclidean, n_neighbors=5, weights=uniform; total time=   0.0s
[CV] END ...metric=euclidean, n_neighbors=5, weights=uniform; total time=   0.1s
[CV] END ..metric=euclidean, n_neighbors=5, weights=distance; total time=   0.1s
[CV] END ..metric=euclidean, n_neighbors=5, weights=distance; total time=   0.1s
[CV] END ..metric=euclidean, n_neighbors=5, weig



In [5]:
#Train and log logistic regression model
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],   

    'max_iter': [10000]
}
log_reg = LogisticRegression(random_state=42)
log_reg_grid = train_model(log_reg, param_grid, X_train, y_train)
log_model_with_grid(log_reg_grid, 'logistic_regression', X_test, y_test, le)

Fitting 3 folds for each of 5 candidates, totalling 15 fits
[CV] END .............................C=0.01, max_iter=10000; total time=   0.7s
[CV] END .............................C=0.01, max_iter=10000; total time=   0.7s
[CV] END .............................C=0.01, max_iter=10000; total time=   0.6s
[CV] END ..............................C=0.1, max_iter=10000; total time=   1.4s
[CV] END ..............................C=0.1, max_iter=10000; total time=   1.5s
[CV] END ..............................C=0.1, max_iter=10000; total time=   1.7s
[CV] END ................................C=1, max_iter=10000; total time=   2.9s
[CV] END ................................C=1, max_iter=10000; total time=   2.9s
[CV] END ................................C=1, max_iter=10000; total time=   5.8s
[CV] END ...............................C=10, max_iter=10000; total time=   7.1s
[CV] END ...............................C=10, max_iter=10000; total time=   3.8s
[CV] END ...............................C=10, max



In [6]:
#Train and log the SVM model
svm_parameters = {'kernel': ['rbf'],
                  'C': [1, 10 , 100, 150],
                  'gamma': [0.01,0.05, 0.1, 0.5]
                }
svm = SVC(random_state=42, probability=True)
svm_grid = train_model(svm, svm_parameters, X_train, y_train)
log_model_with_grid(svm_grid, 'svm', X_test, y_test, le)

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV] END ........................C=1, gamma=0.01, kernel=rbf; total time=  29.1s
[CV] END ........................C=1, gamma=0.01, kernel=rbf; total time=  28.9s
[CV] END ........................C=1, gamma=0.01, kernel=rbf; total time=  28.7s
[CV] END ........................C=1, gamma=0.05, kernel=rbf; total time=  16.8s
[CV] END ........................C=1, gamma=0.05, kernel=rbf; total time=  32.5s
[CV] END ........................C=1, gamma=0.05, kernel=rbf; total time=  27.4s
[CV] END .........................C=1, gamma=0.1, kernel=rbf; total time=  24.9s
[CV] END .........................C=1, gamma=0.1, kernel=rbf; total time=  20.6s
[CV] END .........................C=1, gamma=0.1, kernel=rbf; total time=  20.5s
[CV] END .........................C=1, gamma=0.5, kernel=rbf; total time=  14.9s
[CV] END .........................C=1, gamma=0.5, kernel=rbf; total time=  14.8s
[CV] END .........................C=1, gamma=0.5



In [7]:
#Train and log the Decision Tree model
dt_parameters = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
dt = DecisionTreeClassifier(random_state=42)
dt_grid = train_model(dt, dt_parameters, X_train, y_train)
log_model_with_grid(dt_grid, 'decision_tree', X_test, y_test, le)

Fitting 3 folds for each of 72 candidates, totalling 216 fits
[CV] END criterion=gini, max_depth=None, min_samples_leaf=1, min_samples_split=2; total time=   0.9s
[CV] END criterion=gini, max_depth=None, min_samples_leaf=1, min_samples_split=2; total time=   1.0s
[CV] END criterion=gini, max_depth=None, min_samples_leaf=1, min_samples_split=2; total time=   1.0s
[CV] END criterion=gini, max_depth=None, min_samples_leaf=1, min_samples_split=5; total time=   0.9s
[CV] END criterion=gini, max_depth=None, min_samples_leaf=1, min_samples_split=5; total time=   0.9s
[CV] END criterion=gini, max_depth=None, min_samples_leaf=1, min_samples_split=5; total time=   0.9s
[CV] END criterion=gini, max_depth=None, min_samples_leaf=1, min_samples_split=10; total time=   0.9s
[CV] END criterion=gini, max_depth=None, min_samples_leaf=1, min_samples_split=10; total time=   0.9s
[CV] END criterion=gini, max_depth=None, min_samples_leaf=1, min_samples_split=10; total time=   0.9s
[CV] END criterion=gini, m



In [8]:
#Train and log the Random Forest model
rf_parameters = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None,10, 20],

}   
rf = RandomForestClassifier(random_state=42)
rf_grid = train_model(rf, rf_parameters, X_train, y_train)
log_model_with_grid(rf_grid, 'random_forest', X_test, y_test, le)


Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] END ...................max_depth=None, n_estimators=100; total time=   7.5s
[CV] END ...................max_depth=None, n_estimators=100; total time=   7.3s
[CV] END ...................max_depth=None, n_estimators=100; total time=   7.5s
[CV] END ...................max_depth=None, n_estimators=200; total time=  15.2s
[CV] END ...................max_depth=None, n_estimators=200; total time=  14.7s
[CV] END ...................max_depth=None, n_estimators=200; total time=  14.8s
[CV] END ...................max_depth=None, n_estimators=300; total time=  22.5s
[CV] END ...................max_depth=None, n_estimators=300; total time=  22.2s
[CV] END ...................max_depth=None, n_estimators=300; total time=  22.5s
[CV] END .....................max_depth=10, n_estimators=100; total time=   5.8s
[CV] END .....................max_depth=10, n_estimators=100; total time=   5.9s
[CV] END .....................max_depth=10, n_est



In [9]:
#Train and log the xgboost model
xgb_parameters = {
    'n_estimators': [ 300,400],
    'learning_rate': [ 0.1,0.2,0.3],
    'max_depth':[3,4]
    
}
xgb = xgboost.XGBClassifier(random_state=42, n_jobs=1)
xgb_grid = train_model(xgb, xgb_parameters, X_train, y_train)
log_model_with_grid(xgb_grid, 'xgboost', X_test, y_test, le)


Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time=  15.1s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time=  14.8s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=300; total time=  14.8s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=400; total time=  19.0s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=400; total time=  18.9s
[CV] END ...learning_rate=0.1, max_depth=3, n_estimators=400; total time=  19.7s
[CV] END ...learning_rate=0.1, max_depth=4, n_estimators=300; total time=  17.0s
[CV] END ...learning_rate=0.1, max_depth=4, n_estimators=300; total time=  17.0s
[CV] END ...learning_rate=0.1, max_depth=4, n_estimators=300; total time=  17.2s
[CV] END ...learning_rate=0.1, max_depth=4, n_estimators=400; total time=  21.9s
[CV] END ...learning_rate=0.1, max_depth=4, n_estimators=400; total time=  21.3s
[CV] END ...learning_rate=0.1, max_depth=4, n_es



In [10]:
#Train and log the voting model
voting_clf = VotingClassifier(estimators=[
    ('svm', svm_grid.best_estimator_),
    ('xgb', xgb_grid.best_estimator_),
    ('rf', rf_grid.best_estimator_),
    ('knn', knn_grid.best_estimator_)
    ], voting='soft')

voting_clf.fit(X_train, y_train)
log_model(voting_clf, 'voting_classifier', X_test, y_test, le)


                 precision    recall  f1-score   support

           call      0.993     0.990     0.992       301
        dislike      1.000     0.996     0.998       259
           fist      0.995     0.995     0.995       189
           four      0.982     0.988     0.985       327
           like      0.993     0.997     0.995       287
           mute      0.977     0.968     0.972       217
             ok      0.994     0.991     0.992       318
            one      0.958     0.980     0.969       253
           palm      0.988     0.988     0.988       330
          peace      0.993     0.979     0.986       288
 peace_inverted      0.990     0.990     0.990       299
           rock      0.997     0.993     0.995       292
           stop      0.961     0.986     0.973       296
  stop_inverted      0.990     0.994     0.992       314
          three      0.993     0.969     0.981       291
         three2      0.994     0.991     0.992       331
         two_up      0.982    



In [11]:
#Train and log the stacking model
from sklearn.ensemble import StackingClassifier
estimators = [
    ('svm', svm_grid.best_estimator_),
    ('xgb', xgb_grid.best_estimator_),
    ('rf', rf_grid.best_estimator_),
    ('knn', knn_grid.best_estimator_)
]
stacking_clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(max_iter=10000, random_state=42))
stacking_clf.fit(X_train, y_train)
log_model(stacking_clf, 'stacking_classifier', X_test, y_test, le)

                 precision    recall  f1-score   support

           call      0.993     0.990     0.992       301
        dislike      1.000     0.996     0.998       259
           fist      0.995     0.995     0.995       189
           four      0.988     0.991     0.989       327
           like      0.993     0.997     0.995       287
           mute      0.972     0.968     0.970       217
             ok      0.991     0.991     0.991       318
            one      0.961     0.976     0.969       253
           palm      0.988     0.994     0.991       330
          peace      0.993     0.993     0.993       288
 peace_inverted      0.990     0.990     0.990       299
           rock      0.997     0.997     0.997       292
           stop      0.980     0.980     0.980       296
  stop_inverted      0.987     0.997     0.992       314
          three      0.993     0.976     0.984       291
         three2      0.997     0.991     0.994       331
         two_up      0.993    

