# Importing Packges

In [1]:
%matplotlib inline

import numpy as np

import joblib


import pandas as pd
import matplotlib.pyplot as plt
import math
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, recall_score,precision_score, classification_report, confusion_matrix
import collections
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_recall_curve, roc_curve
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import label_binarize

np.random.seed(1337)  # for reproducibility



from sklearn.ensemble import VotingClassifier

# Loading data (CTTD features and Labels)

In [8]:
X_train = np.load('../data/train/X_train.npy') 

Y_train = np.load('../data/train/Y_train.npy')

X_test = np.load('../data/test/set1/X_test.npy')

Y_test = np.load('../data/test/set1/Y_test.npy')

X_test2 = np.load('../data/test/set2/X_test2.npy')

Y_test2 = np.load('../data/test/set2/Y_test2.npy')

# Scaling features

In [9]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
X_test2 = scaler.fit_transform(X_test2)

In [4]:
print(X_train.shape)
print(Y_train.shape)

print(X_test.shape)
print(Y_test.shape)


print(X_test2.shape)
print(Y_test2.shape)

(28368, 42)
(28368,)
(12168, 42)
(12168,)
(7040, 42)
(7040,)


# Loading models 

In [10]:
SVM_clf = joblib.load('../models/svm_emg_clf.pkl')

LR_clf = joblib.load('../models/LR_emg_clf.pkl')

RF_clf = joblib.load('../models/RF_emg_clf.pkl')

ET_clf = joblib.load('../models/ET_emg_clf.pkl')



In [7]:
estimators=[('LR',LR_clf),('SVM',SVM_clf),('RF',RF_clf),('ET',ET_clf)]

In [8]:
voting_clf=VotingClassifier(estimators,voting='soft')

In [9]:
voting_clf.fit(X_train,Y_train)

VotingClassifier(estimators=[('LR',
                              LogisticRegression(C=100, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=10000,
                                                 multi_class='multinomial',
                                                 n_jobs=-1, penalty='l2',
                                                 random_state=None,
                                                 solver='saga', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('SVM',
                              SVC(C=35.4, break_ties=False, cache_size=200,
                                  class_weight='balanced...
                                                   criterion='gini',
                                          

In [10]:
Y_predict = voting_clf.predict(X_test)

In [11]:
accuracy_score(Y_test, Y_predict)

0.9326101249178173

# SVC class don't compute probability by default, so we have to train the model by setting the parameter probability=True.

In [13]:
SVM_clf = SVC(kernel = 'rbf', C = 35.4, gamma= 'scale', class_weight = 'balanced',probability=True)

In [None]:
SVM_clf.fit(X_train, Y_train)

# Evaluation on Test set 1

In [15]:
for clf in (LR_clf,SVM_clf,RF_clf,ET_clf,voting_clf):
    y_predict=clf.predict(X_test)
    print(clf.__class__.__name__,accuracy_score(Y_test, y_predict)) 

LogisticRegression 0.9178994082840237
SVC 0.9231591058514136
RandomForestClassifier 0.9059007232084155
ExtraTreesClassifier 0.9275147928994083
VotingClassifier 0.9326101249178173


# Evaluation on Test set 2

In [16]:
for clf in (LR_clf,SVM_clf,RF_clf,ET_clf,voting_clf):
    y_predict=clf.predict(X_test2)
    print(clf.__class__.__name__,accuracy_score(Y_test2, y_predict)) 

LogisticRegression 0.9676136363636364
SVC 0.9762784090909091
RandomForestClassifier 0.953125
ExtraTreesClassifier 0.9627840909090909
VotingClassifier 0.9738636363636364
