In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score


In [10]:
df = pd.read_csv('../datasets/WZUM_dataset_v3.csv', index_col=0)
columns_to_drop = ['world_landmark_0.x','world_landmark_0.y','world_landmark_0.z','world_landmark_1.x',
                   'world_landmark_1.y','world_landmark_1.z','world_landmark_2.x','world_landmark_2.y',
                   'world_landmark_2.z','world_landmark_3.x','world_landmark_3.y','world_landmark_3.z',
                   'world_landmark_4.x','world_landmark_4.y','world_landmark_4.z','world_landmark_5.x',
                   'world_landmark_5.y','world_landmark_5.z','world_landmark_6.x','world_landmark_6.y',
                   'world_landmark_6.z','world_landmark_7.x','world_landmark_7.y','world_landmark_7.z',
                   'world_landmark_8.x','world_landmark_8.y','world_landmark_8.z','world_landmark_9.x',
                   'world_landmark_9.y','world_landmark_9.z','world_landmark_10.x','world_landmark_10.y',
                   'world_landmark_10.z','world_landmark_11.x','world_landmark_11.y','world_landmark_11.z',
                   'world_landmark_12.x','world_landmark_12.y','world_landmark_12.z','world_landmark_13.x',
                   'world_landmark_13.y','world_landmark_13.z','world_landmark_14.x','world_landmark_14.y',
                   'world_landmark_14.z','world_landmark_15.x','world_landmark_15.y','world_landmark_15.z',
                   'world_landmark_16.x','world_landmark_16.y','world_landmark_16.z','world_landmark_17.x',
                   'world_landmark_17.y','world_landmark_17.z','world_landmark_18.x','world_landmark_18.y',
                   'world_landmark_18.z','world_landmark_19.x','world_landmark_19.y','world_landmark_19.z',
                   'world_landmark_20.x','world_landmark_20.y','world_landmark_20.z','handedness.score', 'letter','handedness.label'] #,'handedness.label'
X = df.drop(columns_to_drop, axis=1)
y = df['letter']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y, shuffle=True)

In [12]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [5]:
from sklearn.svm import SVC
mdl = SVC(C=1000, degree=1, gamma=0.9, kernel='poly')
mdl.fit(X_train, y_train)

In [8]:
score = mdl.score(X_test, y_test)
print(f'Mean score TEST : {score}')
score = mdl.score(X_train, y_train)
print(f'Mean score TRAIN: {score}')
print(f'F1 score: {f1_score(y_test, mdl.predict(X_test),average=None)}')

Mean score TEST : 0.9309338521400778
Mean score TRAIN: 0.9905086395716719
F1 score: [0.78350515 0.97674419 0.96629213 0.93023256 0.95348837 0.98850575
 0.95555556 1.         0.97777778 0.95238095 0.90909091 0.81818182
 0.95238095 1.         0.98850575 0.96470588 0.84337349 0.78947368
 0.96296296 0.81395349 0.9047619  1.         0.9382716  0.97560976]


# Grid search

In [15]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf')
parameters = [{'C': [0.25, 0.5, 0.75, 1, 10, 100, 1000], 'kernel': ['linear']},
              {'C': [0.25, 0.5, 0.75, 1, 10, 100, 1000], 'kernel': ['poly'], 'degree':[1,2,3, 4, 5, 6, 7, 8, 9, 10], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,1, 0.1, 0.01, 0.001, 0.0001, 'auto']},
              {'C': [0.25, 0.5, 0.75, 1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,1, 0.1, 0.01, 0.001, 0.0001, 'auto']},
              {'C': [0.25, 0.5, 0.75, 1, 10, 100, 1000], 'kernel': ['sigmoid'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,1, 0.1, 0.01, 0.001, 0.0001, 'auto']}]
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'f1_micro',
                           cv = 10,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Best Accuracy: 94.09 %
Best Parameters: {'C': 1000, 'degree': 1, 'gamma': 0.2, 'kernel': 'poly'}


In [16]:
score = grid_search.score(X_test, y_test)
print(f'Mean score TEST : {score}')
score = grid_search.score(X_train, y_train)
print(f'Mean score TRAIN: {score}')
print(f'F1 score: {f1_score(y_test, grid_search.predict(X_test), average=None)}')

Mean score TEST : 0.9348249027237355
Mean score TRAIN: 0.9866147481138964
F1 score: [0.82474227 0.97674419 0.96629213 0.93023256 0.95348837 0.98850575
 0.95555556 1.         0.97777778 0.95238095 0.90909091 0.86363636
 0.95238095 1.         0.98850575 0.96470588 0.84705882 0.78947368
 0.96296296 0.80952381 0.9047619  1.         0.9382716  0.97560976]


# Voting

In [20]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
clf = VotingClassifier([
    ('SVC_1', SVC(C=1000, degree=1, gamma=0.9, kernel='poly')),
    ('MLP', MLPClassifier(max_iter=1000)),
    ('Lin_SVC_1', LinearSVC(C=1000, dual=False, loss='squared_hinge', multi_class='ovr', penalty='l2',max_iter=10000 ))
])
clf.fit(X_train, y_train)
# print(clf.score(X_test, y_test))
score = clf.score(X_test, y_test)
print(f'Mean score TEST : {score}')
score = clf.score(X_train, y_train)
print(f'Mean score TRAIN: {score}')
print(f'F1 score: {f1_score(y_test, clf.predict(X_test), average=None)}')

Mean score TEST : 0.9494163424124513
Mean score TRAIN: 0.9934290581650036
F1 score: [0.84848485 0.97674419 0.95454545 0.94252874 0.95454545 0.98850575
 0.96629213 1.         0.97777778 0.95238095 0.96470588 0.9047619
 0.95238095 0.98823529 0.97674419 0.96470588 0.90697674 0.81578947
 0.98795181 0.88372093 0.9382716  1.         0.96385542 0.97560976]


# Stacking

In [19]:
from sklearn.ensemble import StackingClassifier
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
clf_2 = StackingClassifier([
    ('SVC_1', SVC(C=1000, degree=1, gamma=0.9, kernel='poly')),
    ('MLP', MLPClassifier()),
    ('Lin_SVC_1', LinearSVC(C=1000, dual=False, loss='squared_hinge', multi_class='ovr', penalty='l2',max_iter=10000 ))
],SVC())
clf_2.fit(X_train, y_train)
# print(clf.score(X_test, y_test))
score = clf_2.score(X_test, y_test)
print(f'Mean score TEST : {score}')
score = clf_2.score(X_train, y_train)
print(f'Mean score TRAIN: {score}')
print(f'F1 score: {f1_score(y_test, clf_2.predict(X_test), average=None)}')



Mean score TEST : 0.8793774319066148
Mean score TRAIN: 0.9155512290094914
F1 score: [0.61904762 0.98850575 0.97674419 0.8988764  0.89655172 0.95555556
 0.95555556 1.         0.96703297 0.93975904 0.92857143 0.7311828
 0.87179487 0.92857143 0.95348837 0.96385542 0.71794872 0.58426966
 0.98823529 0.63366337 0.73972603 1.         0.9382716  0.95121951]
