## Setup

### Modules

In [None]:
import os
import zipfile

from tqdm import tqdm
import itertools

import pandas as pd
import numpy as np

import plotly.figure_factory as ff
import plotly.io as pio

from sklearn.preprocessing import MinMaxScaler, Normalizer
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import cross_val_predict, cross_val_score, RandomizedSearchCV, GridSearchCV, RepeatedKFold
from sklearn.linear_model import RidgeClassifier, LogisticRegression, SGDClassifier, Perceptron, PassiveAggressiveClassifier
from sklearn.kernel_ridge import KernelRidge
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, export_graphviz, plot_tree
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier, StackingClassifier
from sklearn.neural_network import MLPClassifier

from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

from skopt.space import Real, Categorical, Integer
from skopt import BayesSearchCV

import joblib

import warnings
warnings.filterwarnings('ignore')

### Random Seed

In [None]:
np.random.seed(0)

## Dataset Reading

In [None]:
# in case of zip file (only on Google Colab)

# with zipfile.ZipFile('dataset.zip', 'r') as zip_ref:
#   zip_ref.extractall('dataset')
#   instances = zip_ref.namelist()

In [None]:
def load_dataset(person):
  data_list = []
  for gesture in range(10):
    data_list.append(pd.read_csv(f'../dataset/{person}/gesto{gesture}.csv'))

  df = pd.concat(data_list, ignore_index=True)

  return df

In [None]:
P0_data = load_dataset('P0')
P1_data = load_dataset('P1')
P2_data = load_dataset('P2') 
P3_data = load_dataset('P3')
P4_data = load_dataset('P4')
P5_data = load_dataset('P5')
P6_data = load_dataset('P6')
P7_data = load_dataset('P7')
P8_data = load_dataset('P8')
P9_data = load_dataset('P9')

data_list = [P0_data, P1_data, P2_data, P3_data, P4_data, P5_data, P6_data, P7_data, P8_data, P9_data]
folds_number = len(data_list)
df = pd.concat(data_list, ignore_index=True)

In [None]:
df.describe()

### Preprocessing

In [None]:
X = df.iloc[:, 1:6]
y = df.iloc[:, [0]]

In [None]:
# normalization # we are not using normalization

# normalizer = Normalizer(norm='l2')
# X = normalizer.transform(X) or
# X = X.apply(lambda x: x / x.sum(), axis=1)
# X = X.apply(lambda x: x / np.linalg.norm(x), axis=1)

## Models Training

### Classifiers

In [None]:
classifier_models = {
  'Ridge': RidgeClassifier(random_state=0),
  'Logistic Regression': LogisticRegression(random_state=0),
  'SGD': SGDClassifier(random_state=0),
  'Perceptron': Perceptron(random_state=0),
  'Passive Aggressive': PassiveAggressiveClassifier(random_state=0),
  'SVM': SVC(random_state=0),
  'KNN': KNeighborsClassifier(),
  'Decision Tree': DecisionTreeClassifier(random_state=0),
  'Multi Layer Perceptron': MLPClassifier(random_state=0),
  'Random Forest': RandomForestClassifier(random_state=0),
  'Gradient Boosting': GradientBoostingClassifier(random_state=0),
  'Extra Trees': ExtraTreesClassifier(random_state=0),
  'Bagging': BaggingClassifier(random_state=0),
  'LGBM': LGBMClassifier(random_state=0),
  'XGB': XGBClassifier(random_state=0)
}

### Testing Raw Models

In [None]:
for model_name, model in classifier_models.items():
  scores = cross_val_score(model, X, np.ravel(y), cv=folds_number, scoring='accuracy')
  print(f'Model: {model_name}')
  print(f'Scores: {scores}')
  print(f'Mean Accuracy: {np.mean(scores)}')
  print(f'Standard Deviation: {np.std(scores)}')
  print(f'CV: {np.std(scores)/np.mean(scores)}', end='\n\n')

Model: Ridge
Scores: [0.9668 0.7    0.8052 0.9928 0.9976 1.     0.6    0.9996]
Mean Accuracy: 0.8827499999999999
Standard Deviation: 0.14963561574705403
CV: 0.16951075134189073

Model: Logistic Regression
Scores: [0.9952 0.7832 0.9    0.9924 0.998  0.9    0.8044 0.9992]
Mean Accuracy: 0.92155
Standard Deviation: 0.08374949253577599
CV: 0.09087894583666213

Model: SGD
Scores: [0.9992 0.7012 0.8    0.9928 0.9976 0.9124 0.7632 0.9996]
Mean Accuracy: 0.89575
Standard Deviation: 0.11518644668536311
CV: 0.1285921816191606

Model: Perceptron
Scores: [0.994  0.83   0.9    0.9936 0.8984 0.7    0.756  0.8996]
Mean Accuracy: 0.87145
Standard Deviation: 0.09792526487071661
CV: 0.11237049156086593

Model: Passive Aggressive
Scores: [0.9912 0.8984 0.9    0.9916 0.998  0.9    0.8488 0.9992]
Mean Accuracy: 0.9409000000000001
Standard Deviation: 0.056340660273021294
CV: 0.05987954115529949

Model: SVM
Scores: [0.992  0.8988 0.8996 0.9992 0.9984 1.     0.7572 0.8992]
Mean Accuracy: 0.93055
Standard Devi

### Experiments Records (with old dataset)

* Experiment with 4 people, with the person 2's data, without normalization

```
Model: Ridge
Scores: [0.848  0.6748 0.7    0.9   ]
Mean Accuracy: 0.7807
Standard Deviation: 0.09551057533069313
CV: 0.1223396635464239

Model: Logistic Regression
Scores: [0.8836 0.7736 0.7    0.9   ]
Mean Accuracy: 0.8142999999999999
Standard Deviation: 0.08195724495125496
CV: 0.10064748244044575

Model: SGD
Scores: [0.9736 0.5996 0.7    0.9   ]
Mean Accuracy: 0.7933
Standard Deviation: 0.15009793469598443
CV: 0.18920702722297292

Model: Perceptron
Scores: [0.6576 0.3996 0.8    0.9   ]
Mean Accuracy: 0.6893
Standard Deviation: 0.18813556282638327
CV: 0.2729371287195463

Model: Passive Aggressive
Scores: [0.788  0.6096 0.702  0.9   ]
Mean Accuracy: 0.7499
Standard Deviation: 0.10719155750337804
CV: 0.14294113548923595

Model: SVM
Scores: [0.9524 0.7336 0.7008 0.8   ]
Mean Accuracy: 0.7967
Standard Deviation: 0.0967364977658381
CV: 0.12142148583637268

Model: KNN
Scores: [0.926  0.6972 0.746  0.9   ]
Mean Accuracy: 0.8173
Standard Deviation: 0.09767635333078319
CV: 0.11951101594369654

Model: Decision Tree
Scores: [0.8024 0.4992 0.7252 0.6556]
Mean Accuracy: 0.6706000000000001
Standard Deviation: 0.11175347869305904
CV: 0.16664700073525057

Model: Multi Layer Perceptron
Scores: [0.8076 0.7084 0.702  0.9   ]
Mean Accuracy: 0.7795
Standard Deviation: 0.08119624375548416
CV: 0.10416452053301369

Model: Random Forest
Scores: [0.8484 0.4992 0.702  0.804 ]
Mean Accuracy: 0.7134
Standard Deviation: 0.13457800711854817
CV: 0.188643127443998

Model: Gradient Boosting
Scores: [0.6732 0.4116 0.5972 0.7144]
Mean Accuracy: 0.5991
Standard Deviation: 0.11613005640229407
CV: 0.1938408552867536

Model: Extra Trees
Scores: [0.934  0.5952 0.702  0.8004]
Mean Accuracy: 0.7579
Standard Deviation: 0.12491353009181995
CV: 0.164815318764771

Model: Bagging
Scores: [0.8064 0.4004 0.6992 0.6752]
Mean Accuracy: 0.6453
Standard Deviation: 0.14977352903634208
CV: 0.23209906870655833

Model: LGBM
Scores: [0.6352 0.36   0.6008 0.848 ]
Mean Accuracy: 0.611
Standard Deviation: 0.17310378389856185
CV: 0.2833122486064842

Model: XGB
Scores: [0.534  0.4012 0.6856 0.814 ]
Mean Accuracy: 0.6087
Standard Deviation: 0.15548154231290603
CV: 0.2554321378559324
```

* Experiment with 4 people, with the person 2's data, with normalization ```X = X.apply(lambda x: x / x.sum(), axis=1)```

```
Model: Ridge
Scores: [0.666  0.3708 0.6988 0.7   ]
Mean Accuracy: 0.6089
Standard Deviation: 0.13814235411342893
CV: 0.22687198901860556

Model: Logistic Regression
Scores: [0.88   0.7    0.7992 0.8   ]
Mean Accuracy: 0.7948
Standard Deviation: 0.06382099967878914
CV: 0.08029818781931196

Model: SGD
Scores: [0.6948 0.6    0.6984 0.8   ]
Mean Accuracy: 0.6982999999999999
Standard Deviation: 0.07074256144641643
CV: 0.10130683294632169

Model: Perceptron
Scores: [0.7532 0.698  0.6    0.7004]
Mean Accuracy: 0.6879000000000001
Standard Deviation: 0.05533705810756477
CV: 0.08044346286897044

Model: Passive Aggressive
Scores: [0.5772 0.59   0.6    0.7   ]
Mean Accuracy: 0.6168
Standard Deviation: 0.0487105738007673
CV: 0.07897304442407149

Model: SVM
Scores: [0.872  0.6996 0.702  0.8076]
Mean Accuracy: 0.7703
Standard Deviation: 0.07313952419861644
CV: 0.09494940178971367

Model: KNN
Scores: [0.8788 0.6996 0.7044 0.8   ]
Mean Accuracy: 0.7706999999999999
Standard Deviation: 0.07415355689378629
CV: 0.09621585168520344

Model: Decision Tree
Scores: [0.7352 0.5536 0.702  0.8   ]
Mean Accuracy: 0.6977
Standard Deviation: 0.0903532511866618
CV: 0.12950157830967723

Model: Multi Layer Perceptron
Scores: [0.8712 0.6996 0.7    0.75  ]
Mean Accuracy: 0.7552
Standard Deviation: 0.07003827525003739
CV: 0.09274136023574867

Model: Random Forest
Scores: [0.8628 0.4784 0.8    0.8096]
Mean Accuracy: 0.7377
Standard Deviation: 0.15160590357898338
CV: 0.20551159492881033

Model: Gradient Boosting
Scores: [0.7804 0.4916 0.7984 0.894 ]
Mean Accuracy: 0.7411000000000001
Standard Deviation: 0.1503798856230447
CV: 0.20291443209154592

Model: Extra Trees
Scores: [0.8592 0.6996 0.808  0.8196]
Mean Accuracy: 0.7966
Standard Deviation: 0.059132732052561206
CV: 0.07423139850936632

Model: Bagging
Scores: [0.8052 0.538  0.7996 0.8   ]
Mean Accuracy: 0.7357
Standard Deviation: 0.11416352307107555
CV: 0.15517673381959432

Model: LGBM
Scores: [0.7972 0.6996 0.798  0.8956]
Mean Accuracy: 0.7976
Standard Deviation: 0.069297041783903
CV: 0.08688194807410106

Model: XGB
Scores: [0.8276 0.5404 0.7984 0.8956]
Mean Accuracy: 0.7655
Standard Deviation: 0.13466146442096935
CV: 0.17591308219591034
```

* Experiment with 3 people, without the person 2's data, without normalization

```
Model: Ridge
Scores: [0.7688 0.7172 0.8   ]
Mean Accuracy: 0.762
Standard Deviation: 0.03414322773259732
CV: 0.044807385475849504

Model: Logistic Regression
Scores: [0.8944 0.6996 0.8   ]
Mean Accuracy: 0.798
Standard Deviation: 0.07953934037108093
CV: 0.09967335886100366

Model: SGD
Scores: [0.92 0.7  0.8 ]
Mean Accuracy: 0.8066666666666666
Standard Deviation: 0.08993825042154698
CV: 0.11149369886968634

Model: Perceptron
Scores: [0.7616 0.6988 0.8   ]
Mean Accuracy: 0.7534666666666666
Standard Deviation: 0.04171309413388349
CV: 0.055361565387387394

Model: Passive Aggressive
Scores: [0.926 0.7   0.8  ]
Mean Accuracy: 0.8086666666666668
Standard Deviation: 0.09246741167688338
CV: 0.11434552144709403

Model: SVM
Scores: [0.9476 0.778  0.8   ]
Mean Accuracy: 0.8418666666666667
Standard Deviation: 0.0753022944906432
CV: 0.08944681797273107

Model: KNN
Scores: [0.9288 0.7992 0.8   ]
Mean Accuracy: 0.8426666666666667
Standard Deviation: 0.060906339753938735
CV: 0.0722780930624273

Model: Decision Tree
Scores: [0.7476 0.5004 0.7628]
Mean Accuracy: 0.6702666666666667
Standard Deviation: 0.12027405742257512
CV: 0.17944209880034084

Model: Multi Layer Perceptron
Scores: [0.8568 0.7016 0.8   ]
Mean Accuracy: 0.7861333333333334
Standard Deviation: 0.06411434230255263
CV: 0.0815565751813339

Model: Random Forest
Scores: [0.7728 0.5992 0.7676]
Mean Accuracy: 0.7132
Standard Deviation: 0.08063812167124598
CV: 0.11306522948856701

Model: Gradient Boosting
Scores: [0.53   0.5    0.8008]
Mean Accuracy: 0.6102666666666666
Standard Deviation: 0.13528294628501977
CV: 0.22167841318279405

Model: Extra Trees
Scores: [0.8372 0.5004 0.8008]
Mean Accuracy: 0.7128
Standard Deviation: 0.15092285004818415
CV: 0.21173239344582512

Model: Bagging
Scores: [0.738  0.6988 0.7384]
Mean Accuracy: 0.7250666666666666
Standard Deviation: 0.01857405598019871
CV: 0.025617031969748127

Model: LGBM
Scores: [0.5136 0.4944 0.7788]
Mean Accuracy: 0.5956
Standard Deviation: 0.1297788888841325
CV: 0.21789605252540714

Model: XGB
Scores: [0.5236 0.4112 0.7724]
Mean Accuracy: 0.5690666666666666
Standard Deviation: 0.15092332122269536
CV: 0.26521202182994735
```

* Experiment with 3 people, without the person 2's data, with normalization ```normalizer = Normalizer(norm='l2')```

```
Model: Ridge
Scores: [0.658  0.654  0.6996]
Mean Accuracy: 0.6705333333333333
Standard Deviation: 0.020618007232082874
CV: 0.030748668570415902

Model: Logistic Regression
Scores: [0.7552 0.7752 0.8   ]
Mean Accuracy: 0.7768
Standard Deviation: 0.018324482712116803
CV: 0.023589704830222453

Model: SGD
Scores: [0.6904 0.7008 0.7884]
Mean Accuracy: 0.7265333333333333
Standard Deviation: 0.04395189289312678
CV: 0.06049535634032866

Model: Perceptron
Scores: [0.6352 0.5536 0.5592]
Mean Accuracy: 0.5826666666666668
Standard Deviation: 0.037216961485621335
CV: 0.06387350369385811

Model: Passive Aggressive
Scores: [0.6956 0.6008 0.702 ]
Mean Accuracy: 0.6661333333333334
Standard Deviation: 0.046271469491349505
CV: 0.06946277445658952

Model: SVM
Scores: [0.8972 0.8228 0.8   ]
Mean Accuracy: 0.84
Standard Deviation: 0.04150373477170457
CV: 0.04940920806155306

Model: KNN
Scores: [0.8992 0.8116 0.8   ]
Mean Accuracy: 0.8369333333333332
Standard Deviation: 0.044283129468857034
CV: 0.05291117906905015

Model: Decision Tree
Scores: [0.6184 0.6636 0.8   ]
Mean Accuracy: 0.694
Standard Deviation: 0.07719136393837508
CV: 0.1112267491907422

Model: Multi Layer Perceptron
Scores: [0.8748 0.78   0.8   ]
Mean Accuracy: 0.8182666666666667
Standard Deviation: 0.0408004357275208
CV: 0.04986202834551181

Model: Random Forest
Scores: [0.7748 0.8    0.8   ]
Mean Accuracy: 0.7916000000000002
Standard Deviation: 0.011879393923933999
CV: 0.015006813951407271

Model: Gradient Boosting
Scores: [0.6788 0.8624 0.6904]
Mean Accuracy: 0.7438666666666666
Standard Deviation: 0.08394940275083694
CV: 0.11285544374104269

Model: Extra Trees
Scores: [0.8792 0.8208 0.9   ]
Mean Accuracy: 0.8666666666666667
Standard Deviation: 0.03352584409410482
CV: 0.03868366626242864

Model: Bagging
Scores: [0.6816 0.7632 0.8   ]
Mean Accuracy: 0.7482666666666665
Standard Deviation: 0.04947654887811893
CV: 0.06612154607731505

Model: LGBM
Scores: [0.7888 0.6992 0.7996]
Mean Accuracy: 0.7625333333333333
Standard Deviation: 0.04499995061725682
CV: 0.059013748842354635

Model: XGB
Scores: [0.778  0.6992 0.8   ]
Mean Accuracy: 0.7590666666666667
Standard Deviation: 0.04327442149302005
CV: 0.05701004061086428
```

* Experiment with 3 people, without the person 2's data, with normalization ```X = X.apply(lambda x: x / np.linalg.norm(x), axis=1)```

```
Model: Ridge
Scores: [0.762  0.7036 0.7992]
Mean Accuracy: 0.7549333333333333
Standard Deviation: 0.03934711622921756
CV: 0.05211998794050366

Model: Logistic Regression
Scores: [0.8328 0.7008 0.8   ]
Mean Accuracy: 0.7778666666666666
Standard Deviation: 0.05611543657695468
CV: 0.07214017386478577

Model: SGD
Scores: [0.7956 0.7004 0.8   ]
Mean Accuracy: 0.7653333333333334
Standard Deviation: 0.04594992443470995
CV: 0.06003909987113669

Model: Perceptron
Scores: [0.7684 0.6996 0.8   ]
Mean Accuracy: 0.7559999999999999
Standard Deviation: 0.04191547049320415
CV: 0.05544374403862983

Model: Passive Aggressive
Scores: [0.7832 0.7016 0.8   ]
Mean Accuracy: 0.7615999999999999
Standard Deviation: 0.042977203259402554
CV: 0.05643015133850126

Model: SVM
Scores: [0.8988 0.8088 0.8   ]
Mean Accuracy: 0.8358666666666666
Standard Deviation: 0.04464536805637165
CV: 0.053412068977953

Model: KNN
Scores: [0.8992 0.8064 0.8   ]
Mean Accuracy: 0.8352
Standard Deviation: 0.045330195969868314
CV: 0.05427465992560861

Model: Decision Tree
Scores: [0.7436 0.6    0.8   ]
Mean Accuracy: 0.7145333333333334
Standard Deviation: 0.08419680648470124
CV: 0.11783467972294444

Model: Multi Layer Perceptron
Scores: [0.8232 0.7996 0.8   ]
Mean Accuracy: 0.8076
Standard Deviation: 0.011032074449833404
CV: 0.013660320022081977

Model: Random Forest
Scores: [0.81   0.7028 0.8   ]
Mean Accuracy: 0.7709333333333334
Standard Deviation: 0.04835020395222987
CV: 0.06271645272253962

Model: Gradient Boosting
Scores: [0.7724 0.7176 0.8668]
Mean Accuracy: 0.7856
Standard Deviation: 0.06162164122016442
CV: 0.07843895267332539

Model: Extra Trees
Scores: [0.8764 0.8848 0.8988]
Mean Accuracy: 0.8866666666666667
Standard Deviation: 0.009239528607504254
CV: 0.010420520985907053

Model: Bagging
Scores: [0.7056 0.6996 0.8   ]
Mean Accuracy: 0.7350666666666666
Standard Deviation: 0.04598009231057383
CV: 0.06255227504612801

Model: LGBM
Scores: [0.7772 0.6996 0.7792]
Mean Accuracy: 0.7519999999999999
Standard Deviation: 0.03706139051178014
CV: 0.049283763978431046

Model: XGB
Scores: [0.716  0.6996 0.8892]
Mean Accuracy: 0.7682666666666668
Standard Deviation: 0.08577448468059848
CV: 0.11164676069151137
```



### Experiments Records (with new dataset)

* Experiment with 9 people, with the person 2's data, with normalization ```X = X.apply(lambda x: x / np.linalg.norm(x), axis=1)```

```
Model: Ridge
Scores: [0.9328 0.5996 0.6904 0.8956 0.8888 0.9536 0.916  0.7428 0.9736]
Mean Accuracy: 0.8436888888888889
Standard Deviation: 0.12477190250341215
CV: 0.14788852164182548

Model: Logistic Regression
Scores: [0.9936 0.796  0.7996 0.8088 0.9808 0.998  0.9992 0.8468 0.9984]
Mean Accuracy: 0.9134666666666666
Standard Deviation: 0.09118264942167208
CV: 0.09982044528719028

Model: SGD
Scores: [0.9512 0.6996 0.8    0.8004 0.9948 0.998  1.     0.7816 0.9768]
Mean Accuracy: 0.8891555555555555
Standard Deviation: 0.11063010551795376
CV: 0.12442154224502448

Model: Perceptron
Scores: [0.5532 0.6992 0.798  0.7068 0.8    0.898  0.8    0.8056 0.6988]
Mean Accuracy: 0.7510666666666668
Standard Deviation: 0.09314724305576032
CV: 0.12401994015945363

Model: Passive Aggressive
Scores: [0.8948 0.7576 0.7992 0.8108 0.8972 0.9976 0.9984 0.8796 0.7984]
Mean Accuracy: 0.8704
Standard Deviation: 0.08198341295652431
CV: 0.09419050201806561

Model: SVM
Scores: [0.9844 0.6996 0.898  0.8084 0.8832 0.9984 0.9256 0.8604 0.8996]
Mean Accuracy: 0.8841777777777777
Standard Deviation: 0.08538375824967828
CV: 0.09656854129977689

Model: KNN
Scores: [0.9876 0.6996 0.9    0.8808 0.9864 0.9984 0.9592 0.7672 0.9924]
Mean Accuracy: 0.9079555555555555
Standard Deviation: 0.10244711493265533
CV: 0.11283274198368716

Model: Decision Tree
Scores: [0.948  0.68   0.7988 0.8952 0.8876 0.9984 0.9036 0.8624 0.8984]
Mean Accuracy: 0.8747111111111111
Standard Deviation: 0.08594463908618336
CV: 0.09825488437778189

Model: Multi Layer Perceptron
Scores: [0.9852 0.6996 0.8208 0.83   0.982  0.9992 0.9856 0.8792 0.8992]
Mean Accuracy: 0.8978666666666667
Standard Deviation: 0.09596073270990703
CV: 0.10687637293203188

Model: Random Forest
Scores: [0.976  0.6996 0.894  0.8988 0.8904 0.9984 0.9136 0.866  0.8988]
Mean Accuracy: 0.8928444444444443
Standard Deviation: 0.0793234603255852
CV: 0.08884353911721177

Model: Gradient Boosting
Scores: [0.9564 0.768  0.8912 0.8924 0.89   0.998  0.814  0.8    0.8988]
Mean Accuracy: 0.8787555555555555
Standard Deviation: 0.06974530029744117
CV: 0.07936826101013687

Model: Extra Trees
Scores: [0.9832 0.7976 0.8572 0.898  0.9928 0.9984 0.9264 0.784  0.8992]
Mean Accuracy: 0.9040888888888888
Standard Deviation: 0.07567574753798008
CV: 0.08370387963841076

Model: Bagging
Scores: [0.96   0.7004 0.7988 0.8972 0.8864 0.998  0.82   0.866  0.8984]
Mean Accuracy: 0.8694666666666666
Standard Deviation: 0.0834308762456149
CV: 0.09595638273916758

Model: LGBM
Scores: [0.9872 0.7004 0.8176 0.8968 0.91   0.9976 0.8596 0.8004 0.8988]
Mean Accuracy: 0.8742666666666666
Standard Deviation: 0.08785321090698203
CV: 0.10048788802842233

Model: XGB
Scores: [0.97   0.6996 0.8988 0.8972 0.992  0.998  0.9128 0.8004 0.8988]
Mean Accuracy: 0.8964000000000001
Standard Deviation: 0.09012721872750515
CV: 0.10054352825469114
```

* Experiment with 9 people, with the person 2's data, without normalization

```
Model: Ridge
Scores: [0.9632 0.7996 0.7    0.9    0.988  0.998  0.9932 0.5712 0.9996]
Mean Accuracy: 0.8792
Standard Deviation: 0.14691058505090773
CV: 0.16709575187773856

Model: Logistic Regression
Scores: [0.9936 0.7352 0.7012 0.9    0.9824 0.998  0.9    0.8368 0.9992]
Mean Accuracy: 0.8940444444444444
Standard Deviation: 0.10819265852184562
CV: 0.12101485467993271

Model: SGD
Scores: [0.9988 0.7012 0.7008 0.9    0.9524 0.998  1.     0.7804 0.9996]
Mean Accuracy: 0.8923555555555558
Standard Deviation: 0.12247977711634332
CV: 0.1372544568740773

Model: Perceptron
Scores: [0.9888 0.6992 0.7996 0.9    0.9012 0.9968 0.9    0.8016 0.9992]
Mean Accuracy: 0.8873777777777777
Standard Deviation: 0.09760876301482337
CV: 0.10999685304184743

Model: Passive Aggressive
Scores: [0.992  0.8868 0.7064 0.9    0.9908 0.9984 0.9828 0.8496 0.8996]
Mean Accuracy: 0.9118222222222223
Standard Deviation: 0.0895573531664086
CV: 0.09821799796471989

Model: SVM
Scores: [0.984  0.6548 0.8608 0.9    0.9968 0.9984 1.     0.7564 0.8996]
Mean Accuracy: 0.8945333333333334
Standard Deviation: 0.11456227806549395
CV: 0.12806932262501186

Model: KNN
Scores: [0.9896 0.638  0.9452 0.9    1.     0.7984 1.     0.7528 0.9992]
Mean Accuracy: 0.8914666666666667
Standard Deviation: 0.12467329750636706
CV: 0.13985188921593672

Model: Decision Tree
Scores: [0.8884 0.4592 0.7004 0.8784 0.9796 0.898  0.984  0.7636 0.704 ]
Mean Accuracy: 0.8061777777777778
Standard Deviation: 0.15844366526222622
CV: 0.19653688011467502

Model: Multi Layer Perceptron
Scores: [0.99   0.61   0.8336 0.9    0.9972 0.9988 0.9116 0.8508 0.8992]
Mean Accuracy: 0.8879111111111112
Standard Deviation: 0.11404619380315878
CV: 0.1284432556097243

Model: Random Forest
Scores: [0.9712 0.5992 0.9028 0.9    0.9948 0.898  0.9436 0.8628 0.8996]
Mean Accuracy: 0.8857777777777778
Standard Deviation: 0.10853236872808222
CV: 0.12252776198604365

Model: Gradient Boosting
Scores: [0.9252 0.6296 0.8324 0.8992 0.9    0.8988 0.816  0.814  0.8992]
Mean Accuracy: 0.8460444444444444
Standard Deviation: 0.08600701665317921
CV: 0.10165779967937237

Model: Extra Trees
Scores: [0.9884 0.7988 0.8992 0.9    0.9968 0.9036 0.952  0.77   0.9288]
Mean Accuracy: 0.9041777777777779
Standard Deviation: 0.07281345690305356
CV: 0.08053002262675506

Model: Bagging
Scores: [0.8912 0.4352 0.802  0.9    0.8956 0.8984 0.984  0.7636 0.8004]
Mean Accuracy: 0.8189333333333333
Standard Deviation: 0.14988042641459967
CV: 0.1830190814245356

Model: LGBM
Scores: [0.9684 0.5792 0.7252 0.8384 0.9952 0.8992 0.8924 0.7052 0.8   ]
Mean Accuracy: 0.8225777777777777
Standard Deviation: 0.1269564560327479
CV: 0.15433975906293645

Model: XGB
Scores: [0.9048 0.7152 0.898  0.8368 0.9964 0.898  0.8828 0.6988 0.8236]
Mean Accuracy: 0.8504888888888888
Standard Deviation: 0.08945599097314644
CV: 0.10518184557356788
```

* Experiment with 9 people, with the person 2's data, with normalization ```X = X.apply(lambda x: x / x.sum(), axis=1)```

```
Model: Ridge
Scores: [0.8004 0.5292 0.652  0.6992 0.7772 0.898  0.8084 0.5    0.8   ]
Mean Accuracy: 0.7182666666666667
Standard Deviation: 0.12713457436905196
CV: 0.17700191345236488

Model: Logistic Regression
Scores: [0.9944 0.778  0.7996 0.8    0.9804 0.998  0.9996 0.8084 0.9984]
Mean Accuracy: 0.9063111111111112
Standard Deviation: 0.09864554328441068
CV: 0.10884291505979012

Model: SGD
Scores: [0.8996 0.6968 0.8    0.782  0.9972 0.998  1.     0.7544 0.9988]
Mean Accuracy: 0.8807555555555555
Standard Deviation: 0.11637769078985992
CV: 0.132133927575912

Model: Perceptron
Scores: [0.8056 0.7952 0.6    0.6128 0.5996 0.6988 0.6    0.5988 0.4   ]
Mean Accuracy: 0.6345333333333334
Standard Deviation: 0.1152621745799067
CV: 0.18164873068907336

Model: Passive Aggressive
Scores: [0.8996 0.7304 0.69   0.6892 0.7964 0.998  0.9872 0.7828 0.5348]
Mean Accuracy: 0.7898222222222222
Standard Deviation: 0.14263253312681798
CV: 0.180588148970424

Model: SVM
Scores: [0.9844 0.6996 0.898  0.808  0.8836 0.9984 0.9452 0.8592 0.8996]
Mean Accuracy: 0.8862222222222221
Standard Deviation: 0.08673230562775426
CV: 0.09786744616973275

Model: KNN
Scores: [0.9868 0.6996 0.8996 0.8808 0.9872 0.9984 0.9584 0.7676 0.9396]
Mean Accuracy: 0.902
Standard Deviation: 0.09878263455126558
CV: 0.109515115910494

Model: Decision Tree
Scores: [0.8812 0.544  0.792  0.8936 0.8728 0.998  0.8448 0.7972 0.8992]
Mean Accuracy: 0.8358666666666666
Standard Deviation: 0.11819757470721073
CV: 0.14140721172500886
```

* Experiment with 8 people, without the person 2's data, with normalization ```X = X.apply(lambda x: x / np.linalg.norm(x), axis=1)```

```
Model: Ridge
Scores: [0.9076 0.6096 0.716  0.8904 0.9124 0.9184 0.652  0.9476]
Mean Accuracy: 0.81925
Standard Deviation: 0.12768890907200983
CV: 0.155860737347586

Model: Logistic Regression
Scores: [0.9936 0.7992 0.8    0.9756 0.998  1.     0.8272 0.9972]
Mean Accuracy: 0.9238500000000001
Standard Deviation: 0.08974830081956982
CV: 0.09714596614122402

Model: SGD
Scores: [0.994  0.7012 0.8    0.992  0.998  0.9852 0.8556 0.8988]
Mean Accuracy: 0.9031
Standard Deviation: 0.10342896112791616
CV: 0.11452658745201656

Model: Perceptron
Scores: [0.5792 0.6024 0.7156 0.7996 0.9344 0.9772 0.8872 0.7092]
Mean Accuracy: 0.7756000000000001
Standard Deviation: 0.13940530836377785
CV: 0.17973866472895544

Model: Passive Aggressive
Scores: [0.9604 0.782  0.7308 0.8996 0.9744 0.9768 0.782  0.8984]
Mean Accuracy: 0.87555
Standard Deviation: 0.09136376469914098
CV: 0.10435013956843239

Model: SVM
Scores: [0.9852 0.8992 0.8968 0.8836 0.9984 0.9768 0.8588 0.8992]
Mean Accuracy: 0.92475
Standard Deviation: 0.04989646781085809
CV: 0.05395671025775409

Model: KNN
Scores: [0.9896 0.9    0.8808 0.9864 0.998  0.9592 0.7672 0.9924]
Mean Accuracy: 0.9342
Standard Deviation: 0.07565289155081913
CV: 0.08098147243718597

Model: Decision Tree
Scores: [0.946  0.6876 0.8972 0.8504 0.9976 0.9036 0.8808 0.8988]
Mean Accuracy: 0.8827499999999999
Standard Deviation: 0.08457693243432278
CV: 0.09581074192503289
```

* Experiment with 8 people, without the person 2's data, without normalization

```
Model: Ridge
Scores: [0.9668 0.7    0.8052 0.9928 0.9976 1.     0.6    0.9996]
Mean Accuracy: 0.8827499999999999
Standard Deviation: 0.14963561574705403
CV: 0.16951075134189073

Model: Logistic Regression
Scores: [0.9952 0.7832 0.9    0.9924 0.998  0.9    0.8044 0.9992]
Mean Accuracy: 0.92155
Standard Deviation: 0.08374949253577599
CV: 0.09087894583666213

Model: SGD
Scores: [0.9992 0.7012 0.8    0.9928 0.9976 0.9124 0.7632 0.9996]
Mean Accuracy: 0.89575
Standard Deviation: 0.11518644668536311
CV: 0.1285921816191606

Model: Perceptron
Scores: [0.994  0.83   0.9    0.9936 0.8984 0.7    0.756  0.8996]
Mean Accuracy: 0.87145
Standard Deviation: 0.09792526487071661
CV: 0.11237049156086593

Model: Passive Aggressive
Scores: [0.9912 0.8984 0.9    0.9916 0.998  0.9    0.8488 0.9992]
Mean Accuracy: 0.9409000000000001
Standard Deviation: 0.056340660273021294
CV: 0.05987954115529949

Model: SVM
Scores: [0.992  0.8988 0.8996 0.9992 0.9984 1.     0.7572 0.8992]
Mean Accuracy: 0.93055
Standard Deviation: 0.07977692335506552
CV: 0.08573093692446995

Model: KNN
Scores: [0.9896 0.8996 0.8996 1.     0.798  1.     0.7528 0.9992]
Mean Accuracy: 0.91735
Standard Deviation: 0.0918781666120956
CV: 0.10015606541897377

Model: Decision Tree
Scores: [0.9148 0.7984 0.8988 0.9804 0.8136 0.9    0.7092 0.8992]
Mean Accuracy: 0.8643000000000001
Standard Deviation: 0.07959390680196568
CV: 0.09209060141382121
```

### Hyperparameters Optimization

In [None]:
parameters_ridge = {
  'alpha': Real(1e-5, 1e+5, prior='log-uniform', base=10),
  'class_weight': Categorical(['balanced', None]),
  'solver': Categorical(['auto', 'svd', 'cholesky', 'sparse_cg'])
}

parameters_logistic = {
  'solver': Categorical(['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']),
  # 'penalty': Categorical(['none', 'l1', 'l2', 'elasticnet']),
  'C': Real(1e-2, 1e+2, prior='log-uniform', base=10),
  'class_weight': Categorical(['balanced', None])
}

parameters_SGD = {
  'alpha': Real(1e-4, 1e+3, prior='log-uniform', base=10),
  'max_iter': Integer(100, 5000),
  'class_weight': Categorical(['balanced', None])
}

parameters_perceptron = {
  'eta0': Real(1e-4, 1, prior='log-uniform', base=10),
  'max_iter': Integer(100, 10000, prior='log-uniform', base=10),
  'class_weight': Categorical(['balanced', None])
}

parameters_PasAgg = {
  'C': Real(1e-2, 1e+2, prior='log-uniform', base=10),
  'max_iter': Integer(100, 10000, prior='log-uniform', base=10),
  'class_weight': Categorical(['balanced', None])
}

parameters_SVM = {
  'kernel': Categorical(['linear', 'poly', 'rbf', 'sigmoid']),
  'C': Real(1e-2, 1e+2, prior='log-uniform', base=10),
  'gamma': Categorical(['scale', 'auto']),
  'class_weight': Categorical(['balanced', None])
}

parameters_KN = {
  'n_neighbors' : Integer(1, 20),
  'metric':  Categorical(['euclidean', 'manhattan', 'minkowski']),
  'weights':  Categorical(['uniform', 'distance']),
  'leaf_size': Integer(10, 60),
  'algorithm': Categorical(['auto', 'ball_tree', 'kd_tree', 'brute'])
}

parameters_tree = {
  'max_depth': [3, 5, 7, None],
  'max_features': Integer(1, 10),
  'min_samples_leaf': Integer(1, 10),
  'criterion': Categorical(['gini', 'entropy']),
  'class_weight': Categorical(['balanced', None])
}

parameters_MLP = {
'max_iter': Integer(100, 5000),
#'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50), (100,)],
'activation': Categorical(['tanh', 'relu']),
'solver': Categorical(['sgd', 'adam']),
'learning_rate': Categorical(['constant','adaptive'])
}

parameters_RF = {
  'n_estimators': Integer(90, 200),
  'max_depth': Integer(1, 110),
  'min_samples_leaf': Integer(2, 10),
  'min_samples_split': Integer(2, 10),
  'max_features': Categorical(['log2', 'sqrt', 'auto']),
  'class_weight': Categorical(['balanced', None])
}

parameters_GB = {
  'learning_rate': Real(1e-2, 2e-1),
  'max_depth': Integer(3, 14),
  'subsample': Real(0.5, 1.0),
  'min_samples_leaf': Integer(2, 10),
  'min_samples_split': Integer(2, 10),
  'max_features': Categorical(['log2', 'sqrt', 'auto']),
  'subsample': Real(0.5, 1.0)
}

parameters_ET = {
  'n_estimators': Integer(90, 200),
  'min_samples_leaf': Integer(2, 10),
  'min_samples_split': Integer(2, 10),
  'max_features': Categorical(['log2', 'sqrt', 'auto']),
  'class_weight': Categorical(['balanced', None])
}

parameters_bagging = {
  'n_estimators': Integer(5, 200),
  'max_features': Real(0.3, 0.9),
  'max_samples': Real(0.05, 0.5)
}

parameters_LGBM = {
  'learning_rate': Real(1e-3, 3e-1),
  'n_estimators': Integer(50, 300),
  'max_depth': Integer(3, 15),
  'num_leaves': Integer(10, 3000)
}

parameters_XGB = {
  'learning_rate': Real(1e-4, 1.0),
  'max_depth' : Integer(3, 20),
  'gamma': Real(0.1 ,0.5),
  'colsample_bytree': Real(0.3, 1.0),
  'reg_alpha' : Real(1e-5, 1e+2, prior='log-uniform', base=10),
  'reg_lambda' : Real(1e-5, 1e+2, prior='log-uniform', base=10)
}

classifier_parameters = {
  'Ridge': parameters_ridge,
  'Logistic Regression': parameters_logistic,
  'SGD': parameters_SGD,
  'Perceptron': parameters_perceptron,
  'Passive Aggressive': parameters_PasAgg,
  'SVM': parameters_SVM,
  'KNN': parameters_KN,
  'Decision Tree': parameters_tree,
  'Multi Layer Perceptron': parameters_MLP,
  'Random Forest': parameters_RF,
  'Gradient Boosting': parameters_GB,
  'Extra Trees': parameters_ET,
  'Bagging': parameters_bagging,
  'LGBM': parameters_LGBM,
  'XGB': parameters_XGB
}

In [None]:
for model_name, parameters in classifier_parameters.items():
  grid_search_cv = BayesSearchCV(classifier_models[model_name], parameters, cv=folds_number, n_iter=50, verbose=1)
  grid_search_cv.fit(X, y)
  scores = cross_val_score(grid_search_cv.best_estimator_, X, np.ravel(y), cv=folds_number, scoring='accuracy')
  y_pred = cross_val_predict(grid_search_cv.best_estimator_, X, y, cv=folds_number)
  precision_recall_report = classification_report(y, y_pred)
  confusion_matrix_report = confusion_matrix(y, y_pred)
  print(f'Model: {model_name}')
  print(f'Scores: {scores}')
  print(f'Mean Accuracy: {np.mean(scores)}')
  print(f'Standard Deviation: {np.std(scores)}')
  print(f'CV: {np.std(scores)/np.mean(scores)}', end='\n\n')
  print(confusion_matrix_report, end='\n\n')
  print(precision_recall_report, end='\n\n\n')

### Experiments Records (with old dataset)

```
Model: Ridge
Scores: [0.7684 0.6988 0.7996]
Mean Accuracy: 0.7555999999999999
Standard Deviation: 0.042135021063243815
CV: 0.05576365942726816

[[750   0   0   0   0   0   0   0   0   0]
 [  0 741   0   9   0   0   0   0   0   0]
 [  3   0 747   0   0   0   0   0   0   0]
 [  0   0   0 750   0   0   0   0   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   0 749   0   0   0   1]
 [250   0 250   0   0   0   0   0 250   0]
 [249  21 233 246   0   0   0   0   1   0]
 [  0   0   0   0   0   0   0   0 750   0]
 [  0   0   0   0  69 251   0   0   0 430]]

              precision    recall  f1-score   support

           0       0.60      1.00      0.75       750
           1       0.97      0.99      0.98       750
           2       0.61      1.00      0.75       750
           3       0.75      1.00      0.85       750
           4       0.92      1.00      0.96       750
           5       0.75      1.00      0.86       750
           6       0.00      0.00      0.00       750
           7       0.00      0.00      0.00       750
           8       0.75      1.00      0.86       750
           9       1.00      0.57      0.73       750

    accuracy                           0.76      7500
   macro avg       0.63      0.76      0.67      7500
weighted avg       0.63      0.76      0.67      7500



Model: Logistic Regression
Scores: [0.7728 0.7988 0.8   ]
Mean Accuracy: 0.7905333333333333
Standard Deviation: 0.012548926470242588
CV: 0.015874000426179695

[[750   0   0   0   0   0   0   0   0   0]
 [  0 741   0   9   0   0   0   0   0   0]
 [  0   0 747   0   0   0   3   0   0   0]
 [  0   0   0 750   0   0   0   0   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   0 749   0   0   0   1]
 [250   0 250   0   0   0   0 250   0   0]
 [  0 205  45 250   0   0   0   0 250   0]
 [  0   0   0   0   0   0   0   0 750   0]
 [  0   0   0   0  58   0   0   0   0 692]]

              precision    recall  f1-score   support

           0       0.75      1.00      0.86       750
           1       0.78      0.99      0.87       750
           2       0.72      1.00      0.83       750
           3       0.74      1.00      0.85       750
           4       0.93      1.00      0.96       750
           5       1.00      1.00      1.00       750
           6       0.00      0.00      0.00       750
           7       0.00      0.00      0.00       750
           8       0.75      1.00      0.86       750
           9       1.00      0.92      0.96       750

    accuracy                           0.79      7500
   macro avg       0.67      0.79      0.72      7500
weighted avg       0.67      0.79      0.72      7500



Model: SGD
Scores: [0.7716 0.7988 0.8   ]
Mean Accuracy: 0.7901333333333334
Standard Deviation: 0.013114199259665945
CV: 0.016597450969877588

[[750   0   0   0   0   0   0   0   0   0]
 [  0 740   0   1   0   0   0   9   0   0]
 [  3   0 747   0   0   0   0   0   0   0]
 [  0   0   0 750   0   0   0   0   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   0 749   0   0   0   1]
 [250   0 250   0   0   0   0   0 250   0]
 [  0 227 272 250   0   0   0   0   1   0]
 [  0   0   0   0   0   0   0   0 750   0]
 [  0   0   0   0  60   0   0   0   0 690]]

              precision    recall  f1-score   support

           0       0.75      1.00      0.86       750
           1       0.77      0.99      0.86       750
           2       0.59      1.00      0.74       750
           3       0.75      1.00      0.86       750
           4       0.93      1.00      0.96       750
           5       1.00      1.00      1.00       750
           6       0.00      0.00      0.00       750
           7       0.00      0.00      0.00       750
           8       0.75      1.00      0.86       750
           9       1.00      0.92      0.96       750

    accuracy                           0.79      7500
   macro avg       0.65      0.79      0.71      7500
weighted avg       0.65      0.79      0.71      7500



Model: Perceptron
Scores: [0.7684 0.6996 0.8   ]
Mean Accuracy: 0.7559999999999999
Standard Deviation: 0.04191547049320415
CV: 0.05544374403862983

[[669   0   0   0   0   0   0  81   0   0]
 [  0 727   0   0   5   0   0  18   0   0]
 [250   0 500   0   0   0   0   0   0   0]
 [  0   0   0 750   0   0   0   0   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   1 749   0   0   0   0]
 [500   0 250   0   0   0   0   0   0   0]
 [  1   7 300 249   0   0   0 193   0   0]
 [  1   0   0   0   0   0   0  26 723   0]
 [  0   0   0   0 138   0   0   0   3 609]]

              precision    recall  f1-score   support

           0       0.47      0.89      0.62       750
           1       0.99      0.97      0.98       750
           2       0.48      0.67      0.56       750
           3       0.75      1.00      0.86       750
           4       0.84      1.00      0.91       750
           5       1.00      1.00      1.00       750
           6       0.00      0.00      0.00       750
           7       0.61      0.26      0.36       750
           8       1.00      0.96      0.98       750
           9       1.00      0.81      0.90       750

    accuracy                           0.76      7500
   macro avg       0.71      0.76      0.72      7500
weighted avg       0.71      0.76      0.72      7500



Model: Passive Aggressive
Scores: [0.822  0.7016 0.8   ]
Mean Accuracy: 0.7745333333333333
Standard Deviation: 0.0523478960629959
CV: 0.0675863695080856

[[750   0   0   0   0   0   0   0   0   0]
 [  0 738   0   0   0   0   0  12   0   0]
 [248   0 502   0   0   0   0   0   0   0]
 [  0   0   0 750   0   0   0   0   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   0 749   0   0   0   1]
 [250   0 250   0   0   0   0   0 250   0]
 [250  68 114 248   0   0   0  70   0   0]
 [  0   0   0   0   0   0   0   0 750   0]
 [  0   0   0   0   0   0   0   0   0 750]]

              precision    recall  f1-score   support

           0       0.50      1.00      0.67       750
           1       0.92      0.98      0.95       750
           2       0.58      0.67      0.62       750
           3       0.75      1.00      0.86       750
           4       1.00      1.00      1.00       750
           5       1.00      1.00      1.00       750
           6       0.00      0.00      0.00       750
           7       0.85      0.09      0.17       750
           8       0.75      1.00      0.86       750
           9       1.00      1.00      1.00       750

    accuracy                           0.77      7500
   macro avg       0.73      0.77      0.71      7500
weighted avg       0.73      0.77      0.71      7500



Model: SVM
Scores: [0.8988 0.8088 0.8   ]
Mean Accuracy: 0.8358666666666666
Standard Deviation: 0.04464536805637165
CV: 0.053412068977953

[[750   0   0   0   0   0   0   0   0   0]
 [  0 750   0   0   0   0   0   0   0   0]
 [ 33   0 717   0   0   0   0   0   0   0]
 [  0   0   0 750   0   0   0   0   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   0 749   0   0   0   1]
 [296   0 228   0   0   0   0 226   0   0]
 [  1   0 220 195   0   0   1 303  30   0]
 [  0   0   0   0   0   0   0   0 750   0]
 [  0   0   0   0   0   0   0   0   0 750]]

              precision    recall  f1-score   support

           0       0.69      1.00      0.82       750
           1       1.00      1.00      1.00       750
           2       0.62      0.96      0.75       750
           3       0.79      1.00      0.88       750
           4       1.00      1.00      1.00       750
           5       1.00      1.00      1.00       750
           6       0.00      0.00      0.00       750
           7       0.57      0.40      0.47       750
           8       0.96      1.00      0.98       750
           9       1.00      1.00      1.00       750

    accuracy                           0.84      7500
   macro avg       0.76      0.84      0.79      7500
weighted avg       0.76      0.84      0.79      7500



Model: KNN
Scores: [0.8992 0.8064 0.8   ]
Mean Accuracy: 0.8352
Standard Deviation: 0.045330195969868314
CV: 0.05427465992560861

[[750   0   0   0   0   0   0   0   0   0]
 [  0 750   0   0   0   0   0   0   0   0]
 [  3   0 747   0   0   0   0   0   0   0]
 [  0   0   0 750   0   0   0   0   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   0 749   0   0   0   1]
 [556   0 194   0   0   0   0   0   0   0]
 [  0   0   0 231   0   0   1 268 250   0]
 [  0   0   0   0   0   0   0   0 750   0]
 [  0   0   0   0   0   0   0   0   0 750]]

              precision    recall  f1-score   support

           0       0.57      1.00      0.73       750
           1       1.00      1.00      1.00       750
           2       0.79      1.00      0.88       750
           3       0.76      1.00      0.87       750
           4       1.00      1.00      1.00       750
           5       1.00      1.00      1.00       750
           6       0.00      0.00      0.00       750
           7       1.00      0.36      0.53       750
           8       0.75      1.00      0.86       750
           9       1.00      1.00      1.00       750

    accuracy                           0.84      7500
   macro avg       0.79      0.84      0.79      7500
weighted avg       0.79      0.84      0.79      7500



Model: Decision Tree
Scores: [0.8316 0.7996 0.8616]
Mean Accuracy: 0.8309333333333333
Standard Deviation: 0.025315783394730056
CV: 0.030466684123953054

[[669   0  33   0   0   0  48   0   0   0]
 [  0 628   0   0   0   0 122   0   0   0]
 [  1   0 739   0   0   0   4   6   0   0]
 [  0   1   0 668   0   0   0  81   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   0 749   0   0   0   1]
 [  1   0 250  14   0   0 485   0   0   0]
 [  0   1   0 249   0   0   1 249 250   0]
 [  0   0   0   0   0   0   0  52 698   0]
 [  0   0   0   0   0 153   0   0   0 597]]

              precision    recall  f1-score   support

           0       1.00      0.89      0.94       750
           1       1.00      0.84      0.91       750
           2       0.72      0.99      0.83       750
           3       0.72      0.89      0.79       750
           4       1.00      1.00      1.00       750
           5       0.83      1.00      0.91       750
           6       0.73      0.65      0.69       750
           7       0.64      0.33      0.44       750
           8       0.74      0.93      0.82       750
           9       1.00      0.80      0.89       750

    accuracy                           0.83      7500
   macro avg       0.84      0.83      0.82      7500
weighted avg       0.84      0.83      0.82      7500



Model: Multi Layer Perceptron
Scores: [0.8952 0.8216 0.8064]
Mean Accuracy: 0.8410666666666667
Standard Deviation: 0.03877777141037834
CV: 0.0461054669590738

[[747   0   0   0   0   0   3   0   0   0]
 [  0 741   0   7   0   0   0   2   0   0]
 [  2   0 748   0   0   0   0   0   0   0]
 [  0   0   0 750   0   0   0   0   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   0 750   0   0   0   0]
 [231   0 250   0   0   0  19 250   0   0]
 [  0   0 226 194   0   0   3 303  24   0]
 [  0   0   0   0   0   0   0   0 750   0]
 [  0   0   0   0   0   0   0   0   0 750]]

              precision    recall  f1-score   support

           0       0.76      1.00      0.86       750
           1       1.00      0.99      0.99       750
           2       0.61      1.00      0.76       750
           3       0.79      1.00      0.88       750
           4       1.00      1.00      1.00       750
           5       1.00      1.00      1.00       750
           6       0.76      0.03      0.05       750
           7       0.55      0.40      0.46       750
           8       0.97      1.00      0.98       750
           9       1.00      1.00      1.00       750

    accuracy                           0.84      7500
   macro avg       0.84      0.84      0.80      7500
weighted avg       0.84      0.84      0.80      7500

Model: Random Forest
Scores: [0.81   0.7028 0.8   ]
Mean Accuracy: 0.7709333333333334
Standard Deviation: 0.04835020395222987
CV: 0.06271645272253962

[[729   0   0   0   0   0  21   0   0   0]
 [  0 739   0   6   0   0   0   5   0   0]
 [  2   0 506   0   0   0   0 242   0   0]
 [  0   0   0 750   0   0   0   0   0   0]
 [  0   0   0   0 750   0   0   0   0   0]
 [  0   0   0   0   0 750   0   0   0   0]
 [250   0 250   0   0   0   0 250   0   0]
 [ 11   0 405   4   0   0 234  95   1   0]
 [  0   0   0   0   0   0   0   0 750   0]
 [  0   0   0   0   0   0   0  37   0 713]]

              precision    recall  f1-score   support

           0       0.73      0.97      0.84       750
           1       1.00      0.99      0.99       750
           2       0.44      0.67      0.53       750
           3       0.99      1.00      0.99       750
           4       1.00      1.00      1.00       750
           5       1.00      1.00      1.00       750
           6       0.00      0.00      0.00       750
           7       0.15      0.13      0.14       750
           8       1.00      1.00      1.00       750
           9       1.00      0.95      0.97       750

    accuracy                           0.77      7500
   macro avg       0.73      0.77      0.75      7500
weighted avg       0.73      0.77      0.75      7500
```

### Experiments Records (with new dataset)

```
Model: Ridge
Scores: [0.9668 0.7    0.8056 0.9928 0.9976 1.     0.6    0.9996]
Mean Accuracy: 0.8828
Standard Deviation: 0.14960975903997709
CV: 0.16947186116898175

[[2000    0    0    0    0    0    0    0    0    0]
 [  10 1983    0    7    0    0    0    0    0    0]
 [ 272  247 1481    0    0    0    0    0    0    0]
 [   0    0    0 2000    0    0    0    0    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [ 736    0    0    0    0    1 1263    0    0    0]
 [   0    1    0  501    0    0    1 1247  250    0]
 [   0    0    0    0    0    0    0    0 2000    0]
 [   0    0    0    0  314    0    0    0    0 1686]]

              precision    recall  f1-score   support

           0       0.66      1.00      0.80      2000
           1       0.89      0.99      0.94      2000
           2       1.00      0.74      0.85      2000
           3       0.80      1.00      0.89      2000
           4       0.86      1.00      0.93      2000
           5       1.00      1.00      1.00      2000
           6       1.00      0.63      0.77      2000
           7       1.00      0.62      0.77      2000
           8       0.89      1.00      0.94      2000
           9       1.00      0.84      0.91      2000

    accuracy                           0.88     20000
   macro avg       0.91      0.88      0.88     20000
weighted avg       0.91      0.88      0.88     20000



Model: Logistic Regression
Scores: [0.9932 0.8352 0.9    0.9952 0.998  0.9    0.8452 0.9992]
Mean Accuracy: 0.93325
Standard Deviation: 0.06666286447490835
CV: 0.07143087540842041

[[2000    0    0    0    0    0    0    0    0    0]
 [  10 1984    6    0    0    0    0    0    0    0]
 [  59  244 1447    0    0    0  250    0    0    0]
 [   0    0    0 2000    0    0    0    0    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [  68    0  216    0    0    0 1715    1    0    0]
 [   0    0    0  226    0    0    1 1523  250    0]
 [   0    0    0    0    0    0    0    0 2000    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      2000
           1       0.89      0.99      0.94      2000
           2       0.87      0.72      0.79      2000
           3       0.90      1.00      0.95      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       0.87      0.86      0.86      2000
           7       1.00      0.76      0.86      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.93     20000
   macro avg       0.94      0.93      0.93     20000
weighted avg       0.94      0.93      0.93     20000



Model: SGD
Scores: [0.9972 0.7008 0.8    0.9712 0.998  1.     0.7536 0.9996]
Mean Accuracy: 0.90255
Standard Deviation: 0.11994947894843061
CV: 0.13290064699842735

[[2000    0    0    0    0    0    0    0    0    0]
 [   5 1995    0    0    0    0    0    0    0    0]
 [ 323  250 1427    0    0    0    0    0    0    0]
 [   0    0    0 2000    0    0    0    0    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [ 737    0    0    0    0    1 1262    0    0    0]
 [   0    1    0  377    0    0    1 1371  250    0]
 [   0    0    0    0    0    0    0    0 2000    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.65      1.00      0.79      2000
           1       0.89      1.00      0.94      2000
           2       1.00      0.71      0.83      2000
           3       0.84      1.00      0.91      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       1.00      0.63      0.77      2000
           7       1.00      0.69      0.81      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.90     20000
   macro avg       0.93      0.90      0.90     20000
weighted avg       0.93      0.90      0.90     20000



Model: Perceptron
Scores: [0.9896 0.8284 0.9    0.9896 0.9984 0.7576 0.8224 0.9864]
Mean Accuracy: 0.90905
Standard Deviation: 0.08942503844002528
CV: 0.09837196902263383

[[2000    0    0    0    0    0    0    0    0    0]
 [  14 1976    0    0   10    0    0    0    0    0]
 [  32  249 1467    0    0    0    0  250    0    2]
 [   0  250    0 1745    0    0    0    5    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [ 224    0  101    0    0    1 1539  135    0    0]
 [   0    1    0  291    0    0    0 1458  250    0]
 [   0    0    0    0    0    0    0    0 2000    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.88      1.00      0.94      2000
           1       0.80      0.99      0.88      2000
           2       0.94      0.73      0.82      2000
           3       0.86      0.87      0.86      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       1.00      0.77      0.87      2000
           7       0.79      0.73      0.76      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.91     20000
   macro avg       0.91      0.91      0.91     20000
weighted avg       0.91      0.91      0.91     20000



Model: Passive Aggressive
Scores: [0.9924 0.8404 0.9    0.9908 0.998  0.988  0.8496 0.9992]
Mean Accuracy: 0.9448000000000001
Standard Deviation: 0.06519877299458937
CV: 0.06900801544727918

[[2000    0    0    0    0    0    0    0    0    0]
 [   5 1982    0    0   13    0    0    0    0    0]
 [  38  238 1693    0    0    0   31    0    0    0]
 [   0    0    0 2000    0    0    0    0    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [  33    0  250    0    0    1 1716    0    0    0]
 [   0    1    0  240    0    0    0 1509  250    0]
 [   0    0    0    0    0    0    0    0 2000    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.96      1.00      0.98      2000
           1       0.89      0.99      0.94      2000
           2       0.87      0.85      0.86      2000
           3       0.89      1.00      0.94      2000
           4       0.99      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       0.98      0.86      0.92      2000
           7       1.00      0.75      0.86      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.94     20000
   macro avg       0.95      0.94      0.94     20000
weighted avg       0.95      0.94      0.94     20000



Model: SVM
Scores: [0.9908 0.8984 0.8996 0.9988 0.998  0.996  0.758  0.9992]
Mean Accuracy: 0.94235
Standard Deviation: 0.08099788577487688
CV: 0.08595308088807437

[[2000    0    0    0    0    0    0    0    0    0]
 [   0 1976   20    0    0    0    4    0    0    0]
 [   6  249 1733    0    0    0   12    0    0    0]
 [   0    0    0 2000    0    0    0    0    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [ 236    0  250    0    0    0 1513    1    0    0]
 [   0    0    0  120    0    0    0 1630  250    0]
 [   0    0    0    0    0    0    0    1 1999    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.89      1.00      0.94      2000
           1       0.89      0.99      0.93      2000
           2       0.87      0.87      0.87      2000
           3       0.94      1.00      0.97      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       0.99      0.76      0.86      2000
           7       1.00      0.81      0.90      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.94     20000
   macro avg       0.95      0.94      0.94     20000
weighted avg       0.95      0.94      0.94     20000



Model: KNN
Scores: [0.9904 0.8996 0.842  1.     0.8988 1.     0.7544 0.9992]
Mean Accuracy: 0.92305
Standard Deviation: 0.08538686959948819
CV: 0.0925051401326994

[[2000    0    0    0    0    0    0    0    0    0]
 [   0 1974   25    0    1    0    0    0    0    0]
 [   1  250 1749    0    0    0    0    0    0    0]
 [   0    1    0 1751    0    0    0  248    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 2000    0    0    0    0]
 [ 381    0  248    0    0    0 1370    0    1    0]
 [   0    0    0  129    0    0    1 1620  250    0]
 [   0    0    0    0    0    0    0    0 2000    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.84      1.00      0.91      2000
           1       0.89      0.99      0.93      2000
           2       0.86      0.87      0.87      2000
           3       0.93      0.88      0.90      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       1.00      0.69      0.81      2000
           7       0.87      0.81      0.84      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.92     20000
   macro avg       0.93      0.92      0.92     20000
weighted avg       0.93      0.92      0.92     20000



Model: Decision Tree
Scores: [0.9188 0.7988 0.8988 0.9996 0.8988 0.9    0.746  0.8992]
Mean Accuracy: 0.8825000000000001
Standard Deviation: 0.07227081015181719
CV: 0.08189326929384383

[[1832    0  168    0    0    0    0    0    0    0]
 [   0 1962   20   18    0    0    0    0    0    0]
 [ 252  250 1498    0    0    0    0    0    0    0]
 [   0    0    0 1500    0    0    0  500    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [ 137   11  236    0    0    1 1615    0    0    0]
 [   0  249    0  251    0    0    0 1250  250    0]
 [   0    0    0    0    0    0    0    3 1997    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.82      0.92      0.87      2000
           1       0.79      0.98      0.88      2000
           2       0.78      0.75      0.76      2000
           3       0.85      0.75      0.80      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       1.00      0.81      0.89      2000
           7       0.71      0.62      0.67      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.88     20000
   macro avg       0.88      0.88      0.88     20000
weighted avg       0.88      0.88      0.88     20000



Model: Multi Layer Perceptron
Scores: [0.9936 0.7996 0.9    0.9932 0.998  0.9    0.8476 0.9992]
Mean Accuracy: 0.9289000000000001
Standard Deviation: 0.07334514298847605
CV: 0.07895913767733453

[[2000    0    0    0    0    0    0    0    0    0]
 [   8 1984    8    0    0    0    0    0    0    0]
 [  22  249 1479    0    0    0  250    0    0    0]
 [   1    0    0 1999    0    0    0    0    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [  34    0  250    0    0    0 1715    1    0    0]
 [   0    0    0   98    0    0  246 1406  250    0]
 [   0    0    0    0    0    0    0    1 1999    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.97      1.00      0.98      2000
           1       0.89      0.99      0.94      2000
           2       0.85      0.74      0.79      2000
           3       0.95      1.00      0.98      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       0.78      0.86      0.81      2000
           7       1.00      0.70      0.83      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.93     20000
   macro avg       0.93      0.93      0.93     20000
weighted avg       0.93      0.93      0.93     20000



Model: Random Forest
Scores: [0.942  0.7988 0.9    0.9992 0.9952 1.     0.8632 0.8992]
Mean Accuracy: 0.9247
Standard Deviation: 0.06835868635367418
CV: 0.07392525830396256

[[1884    0  116    0    0    0    0    0    0    0]
 [   0 1970   22    6    0    0    0    2    0    0]
 [   3  250 1746    0    0    0    1    0    0    0]
 [   0    0    0 1500    0    0    1  499    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 2000    0    0    0    0]
 [   0    0  251    0    0    1 1748    0    0    0]
 [   0    6    0   93    0    0    0 1651  250    0]
 [   0    0    0    0    0    0    0    2 1998    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       1.00      0.94      0.97      2000
           1       0.88      0.98      0.93      2000
           2       0.82      0.87      0.84      2000
           3       0.94      0.75      0.83      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       1.00      0.87      0.93      2000
           7       0.77      0.83      0.79      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.92     20000
   macro avg       0.93      0.92      0.92     20000
weighted avg       0.93      0.92      0.92     20000



Model: Gradient Boosting
Scores: [0.9188 0.68   0.9    0.9984 0.9244 0.8572 0.7804 0.8992]
Mean Accuracy: 0.8698
Standard Deviation: 0.09210841438218333
CV: 0.10589608459666973

[[1712    0  104    0    0    0  184    0    0    0]
 [   0 1910   22    0   45    0   15    8    0    0]
 [   5  250 1484    0    0    0  261    0    0    0]
 [   0    0    0 1596    0    0    1  403    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0  250 1750    0    0    0    0]
 [   0    0  250    0    0    0 1749    0    1    0]
 [   0    0    0  131    0    0    0 1619   76  174]
 [   0    0    0    0    0    0    0    3 1997    0]
 [   0    0    0    0  133    0    0  285    0 1582]]

              precision    recall  f1-score   support

           0       1.00      0.86      0.92      2000
           1       0.88      0.95      0.92      2000
           2       0.80      0.74      0.77      2000
           3       0.92      0.80      0.86      2000
           4       0.82      1.00      0.90      2000
           5       1.00      0.88      0.93      2000
           6       0.79      0.87      0.83      2000
           7       0.70      0.81      0.75      2000
           8       0.96      1.00      0.98      2000
           9       0.90      0.79      0.84      2000

    accuracy                           0.87     20000
   macro avg       0.88      0.87      0.87     20000
weighted avg       0.88      0.87      0.87     20000



Model: Bagging
Scores: [0.9208 0.7984 0.9    0.9992 0.9956 1.     0.8628 0.8992]
Mean Accuracy: 0.9219999999999999
Standard Deviation: 0.06823459533110751
CV: 0.07400715328753527

[[1842    0  158    0    0    0    0    0    0    0]
 [   0 1969   22    4    0    0    0    5    0    0]
 [   4  250 1735    0    0    0   11    0    0    0]
 [   0    0    0 1500    0    0    0  500    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 2000    0    0    0    0]
 [   0    0  252    0    0    0 1748    0    0    0]
 [   0    5    0   93    0    0    0 1652  250    0]
 [   0    0    0    0    0    0    0    3 1997    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       1.00      0.92      0.96      2000
           1       0.88      0.98      0.93      2000
           2       0.80      0.87      0.83      2000
           3       0.94      0.75      0.83      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       0.99      0.87      0.93      2000
           7       0.76      0.83      0.79      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.92     20000
   macro avg       0.93      0.92      0.92     20000
weighted avg       0.93      0.92      0.92     20000



Model: Extra Trees
Scores: [0.99   0.8984 0.9    0.996  0.9912 1.     0.7548 0.9988]
Mean Accuracy: 0.9411499999999999
Standard Deviation: 0.08134333101121442
CV: 0.08642972003529133

[[1983    0    0    0    0    0   17    0    0    0]
 [   0 1975   23    2    0    0    0    0    0    0]
 [  11  250 1738    0    0    0    1    0    0    0]
 [   0    0    0 1998    0    0    1    1    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [ 234    0  250    0    0    0 1515    1    0    0]
 [   0    0    0  130    0    0    0 1620  250    0]
 [   0    0    0    0    0    0    0    2 1998    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.89      0.99      0.94      2000
           1       0.89      0.99      0.93      2000
           2       0.86      0.87      0.87      2000
           3       0.94      1.00      0.97      2000
           4       1.00      1.00      1.00      2000
           5       1.00      1.00      1.00      2000
           6       0.99      0.76      0.86      2000
           7       1.00      0.81      0.89      2000
           8       0.89      1.00      0.94      2000
           9       1.00      1.00      1.00      2000

    accuracy                           0.94     20000
   macro avg       0.95      0.94      0.94     20000
weighted avg       0.95      0.94      0.94     20000



Model: LGBM
Scores: [0.9124 0.5284 0.8988 0.8936 0.6992 0.8896 0.8612 0.8992]
Mean Accuracy: 0.8228
Standard Deviation: 0.12871425717456478
CV: 0.1564344399301954

[[1534    0  216    0    0    0  250    0    0    0]
 [ 240 1690   25    0   25   17    0    3    0    0]
 [  15  250 1733    0    0    0    2    0    0    0]
 [   0  211    0 1289    0    0  249  251    0    0]
 [   0  250    0    0 1750    0    0    0    0    0]
 [   0    0    0    0  187 1812    0    0    1    0]
 [   0   26  225    0    0    0 1748    1    0    0]
 [   0  248    0   98    0    0  252 1402    0    0]
 [   0    0    0    0    0    0    0  252 1748    0]
 [   0    0    0    0    0  250    0    0    0 1750]]



Model: XGB
Scores: [0.9012 0.7984 0.966  0.9976 0.8864 0.8004 0.8496 0.8992]
Mean Accuracy: 0.88735
Standard Deviation: 0.06668161290790739
CV: 0.07514691261385856

[[1790    0  168    0    0    0   42    0    0    0]
 [   0 1935   17    0   46    0    0    2    0    0]
 [   7  250 1482    0    0    0  261    0    0    0]
 [   0  211    0 1538    0    0    0  251    0    0]
 [   0  250    0    0 1750    0    0    0    0    0]
 [   0    0    0    0    0 1999    0    0    0    1]
 [   0   12  238    0    1    0 1749    0    0    0]
 [   0   32    0  126    0    0    0 1757   85    0]
 [   0    0    0    0    0    0    0    3 1997    0]
 [   0    0    0    0    0   89    0  161    0 1750]]

              precision    recall  f1-score   support

           0       1.00      0.90      0.94      2000
           1       0.72      0.97      0.83      2000
           2       0.78      0.74      0.76      2000
           3       0.92      0.77      0.84      2000
           4       0.97      0.88      0.92      2000
           5       0.96      1.00      0.98      2000
           6       0.85      0.87      0.86      2000
           7       0.81      0.88      0.84      2000
           8       0.96      1.00      0.98      2000
           9       1.00      0.88      0.93      2000

    accuracy                           0.89     20000
   macro avg       0.90      0.89      0.89     20000
weighted avg       0.90      0.89      0.89     20000
```

### Combining Classification

#### Getting Models

In [None]:
selected_model_names = [
  'Logistic Regression',
  'Passive Aggressive',
  'KNN',
  'SVM',
  'Random Forest',
  'Extra Trees',
  'Bagging'
]
selected_models = []

for model_name in selected_model_names:
  grid_search_cv = BayesSearchCV(classifier_models[model_name], classifier_parameters[model_name], cv=folds_number, n_iter=50, verbose=1)
  grid_search_cv.fit(X, y)
  model = grid_search_cv.best_estimator_
  scores = cross_val_score(model, X, np.ravel(y), cv=folds_number, scoring='accuracy')
  y_pred = cross_val_predict(model, X, y, cv=folds_number)
  precision_recall_report = classification_report(y, y_pred)
  confusion_matrix_report = confusion_matrix(y, y_pred)
  model = grid_search_cv.best_estimator_
  selected_models.append(model)
  joblib.dump(model, f'{model_name}.joblib')

  print(f'Model: {model_name}')
  print(f'Scores: {scores}')
  print(f'Mean Accuracy: {np.mean(scores)}')
  print(f'Standard Deviation: {np.std(scores)}')
  print(f'Standard Deviation: {np.std(scores)}')
  print(f'CV: {np.std(scores)/np.mean(scores)}', end='\n\n')
  display(model)
  print(confusion_matrix_report, end='\n\n')
  print(precision_recall_report, end='\n\n\n')

NameError: ignored

#### Voting Classifier

In [None]:
# remeber to upload the models first
model_0 = joblib.load('Logistic Regression.joblib') # Logistic Regression
model_1 = joblib.load('Passive Aggressive.joblib')  # Passive Aggressive
model_2 = joblib.load('KNN.joblib')                 # KNN
model_3 = joblib.load('SVM.joblib')                 # SVM
model_4 = joblib.load('Random Forest.joblib')       # Random Forest
model_5 = joblib.load('Extra Trees.joblib')         # Extra Trees
model_6 = joblib.load('Bagging.joblib')             # Bagging

estimators = [
  ('Logistic Regression', model_0),
  ('Passive Aggressive', model_1),
  ('KNN', model_2),
  ('SVM', model_3),
  ('Random Forest', model_4),
  ('Extra Trees', model_5),
  ('Bagging', model_6)
]

In [None]:
def get_combination_subsets(estimators, size):
  subsets = []
  combinations = itertools.combinations(estimators, size)
  for combination in combinations:
      subsets.append(list(combination))
  return subsets

In [None]:
best_model = None
best_model_accuracy = 0.0
for i in tqdm(range(2, 7)):
  for estimators_list in tqdm(get_combination_subsets(estimators, i)):
    model = VotingClassifier(estimators=estimators_list, voting='hard')

    scores = cross_val_score(model, X, np.ravel(y), cv=folds_number, scoring='accuracy')
    # y_pred = cross_val_predict(model, X, y, cv=folds_number)
    # precision_recall_report = classification_report(y, y_pred)
    # confusion_matrix_report = confusion_matrix(y, y_pred)

    # display(model)
    # print(f'Scores: {scores}')
    # print(f'Mean Accuracy: {np.mean(scores)}')
    # print(f'Standard Deviation: {np.std(scores)}')
    # print(f'Standard Deviation: {np.std(scores)}')
    # print(f'CV: {np.std(scores)/np.mean(scores)}', end='\n\n')
    # print(confusion_matrix_report, end='\n\n')
    # print(precision_recall_report, end='\n\n\n')

    if np.mean(scores) > best_model_accuracy:
      best_model_accuracy = np.mean(scores)
      best_model = model

In [None]:
display(best_model)
display(best_model_accuracy)

0.95465

In [None]:
all_estimators_model = VotingClassifier(estimators=estimators, voting='hard')
scores = cross_val_score(model, X, np.ravel(y), cv=folds_number, scoring='accuracy')
display(np.mean(scores))

0.9439

In [None]:
scores = cross_val_score(best_model, X, np.ravel(y), cv=folds_number, scoring='accuracy')
y_pred = cross_val_predict(best_model, X, y, cv=folds_number)
precision_recall_report = classification_report(y, y_pred)
confusion_matrix_report = confusion_matrix(y, y_pred)

display(best_model)
print(f'Scores: {scores}')
print(f'Mean Accuracy: {np.mean(scores)}')
print(f'Standard Deviation: {np.std(scores)}')
print(f'Standard Deviation: {np.std(scores)}')
print(f'CV: {np.std(scores)/np.mean(scores)}', end='\n\n')
print(confusion_matrix_report, end='\n\n')
print(precision_recall_report, end='\n\n\n')

Scores: [0.9916 0.8988 0.9    0.9992 0.998  1.     0.8504 0.9992]
Mean Accuracy: 0.95465
Standard Deviation: 0.0572761512324283
Standard Deviation: 0.0572761512324283
CV: 0.05999701590365925

[[2000    0    0    0    0    0    0    0    0    0]
 [   0 1978   22    0    0    0    0    0    0    0]
 [   4  250 1745    0    0    0    1    0    0    0]
 [   0    1    0 1999    0    0    0    0    0    0]
 [   0    3    0    0 1997    0    0    0    0    0]
 [   0    0    0    0    0 2000    0    0    0    0]
 [  31    0  251    0    0    0 1718    0    0    0]
 [   0    0    0   94    0    0    0 1656  250    0]
 [   0    0    0    0    0    0    0    0 2000    0]
 [   0    0    0    0    0    0    0    0    0 2000]]

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      2000
           1       0.89      0.99      0.93      2000
           2       0.86      0.87      0.87      2000
           3       0.96      1.00      0.98      2000
     

In [None]:
joblib.dump(best_model, 'voting_classifier_best_model.joblib')

['voting_classifier_best_model.joblib']

#### Stacking Classifier

In [None]:
# remeber to upload the models first
model_0 = joblib.load('Logistic Regression.joblib') # Logistic Regression
model_1 = joblib.load('Passive Aggressive.joblib')  # Passive Aggressive
model_2 = joblib.load('KNN.joblib')                 # KNN
model_3 = joblib.load('SVM.joblib')                 # SVM
model_4 = joblib.load('Random Forest.joblib')       # Random Forest
model_5 = joblib.load('Extra Trees.joblib')         # Extra Trees
model_6 = joblib.load('Bagging.joblib')             # Bagging

estimators = [
  ('Logistic Regression', model_0),
  ('Passive Aggressive', model_1),
  ('KNN', model_2),
  ('SVM', model_3),
  ('Random Forest', model_4),
  ('Extra Trees', model_5),
  ('Bagging', model_6)
]

In [None]:
def get_combination_subsets(estimators, size):
  subsets = []
  combinations = itertools.combinations(estimators, size)
  for combination in combinations:
      subsets.append(list(combination))
  return subsets

In [None]:
best_model = None
best_model_accuracy = 0.0
for i in tqdm(range(3, 8)):
  for estimators_list in tqdm(get_combination_subsets(estimators, i)):
    model = StackingClassifier(estimators=estimators_list[1:], final_estimator=estimators_list[0][1], cv=folds_number)
    scores = cross_val_score(model, X, np.ravel(y), cv=folds_number, scoring='accuracy')
    if np.mean(scores) > best_model_accuracy:
      best_model_accuracy = np.mean(scores)
      best_model = model

In [None]:
display(best_model)
display(best_model_accuracy)

0.9351

In [None]:
joblib.dump(best_model, 'stacking_classifier_best_model.joblib')

['stacking_classifier_best_model.joblib']

# Generate charts

In [None]:
confusion_matrix = np.array(
  [[2000,    0,    0,    0,    0,    0,    0,    0,    0,    0],
  [   0, 1978,   22,    0,    0,    0,    0,    0,    0,    0],
  [   4,  250, 1745,    0,    0,    0,    1,    0,    0,    0],
  [   0,    1,    0, 1999,    0,    0,    0,    0,    0,    0],
  [   0,    3,    0,    0, 1997,    0,    0,    0,    0,    0],
  [   0,    0,    0,    0,    0, 2000,    0,    0,    0,    0],
  [  31,    0,  251,    0,    0,    0, 1718,    0,    0,    0],
  [   0,    0,    0,   94,    0,    0,    0, 1656,  250,    0],
  [   0,    0,    0,    0,    0,    0,    0,    0, 2000,    0],
  [   0,    0,    0,    0,    0,    0,    0,    0,    0, 2000]]
)

labels = ['G1', 'G2', 'G3', 'G4', 'G5', 'G6', 'G7', 'G8', 'G9', 'G10']

fig = ff.create_annotated_heatmap(
  z=confusion_matrix,
  x=labels,
  y=labels,
  colorscale='Blues'
)

fig.update_layout(
  # title='Confusion Matrix',
  xaxis=dict(title='Predicted'),
  yaxis=dict(title='Actual'),
  height=800,
  width=800,
  margin=dict(l=20, r=20, t=20, b=20),
)

fig['layout']['yaxis']['autorange'] = 'reversed'

pio.show(fig)