# Import required library

In [7]:
import numpy as np
import os
import glob
from matplotlib import pyplot
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.externals import joblib
from sklearn import metrics
import pandas as pd
import h5py

  from ._conv import register_converters as _register_converters


# Create Machine learning models

In [8]:
models = []
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(random_state=9)))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier(random_state=9)))
models.append(('RF', RandomForestClassifier(n_estimators=100, random_state=9)))

In [9]:
# To keep results
results = []
names = []
scoring = "accuracy"

# Import feature vector and trained labels

In [10]:
h5f_data = h5py.File('output/data.h5', 'r')
h5f_label = h5py.File('output/labels.h5', 'r')

features_d = h5f_data['dataset_1']
labels_d = h5f_label['dataset_1']

features = np.array(features_d)
labels = np.array(labels_d)

h5f_data.close()
h5f_label.close()

In [11]:
# verify the shape of the feature vector and labels
print ("Features shape: {}".format(features.shape))
print ("Labels shape: {}".format(labels.shape))

print ("Training started...")

Features shape: (321, 62500)
Labels shape: (321,)
Training started...


## Splitting training and testing data

In [14]:
# split the training and testing data
(trainData, testData, trainLabels, testLabels) = train_test_split(np.array(features),
                                                                  np.array(labels),
                                                                  test_size=.10,
                                                                  random_state=9)


In [15]:
print ("Train and Test data splitted")
print ("Train data  : {}".format(trainData.shape))
print ("Test data   : {}".format(testData.shape))
print ("Train labels: {}".format(trainLabels.shape))
print ("Test labels : {}".format(testLabels.shape))

Train and Test data splitted
Train data  : (288, 62500)
Test data   : (33, 62500)
Train labels: (288,)
Test labels : (33,)


In [16]:
# filter all the warnings
import warnings
warnings.filterwarnings('ignore')

# 10-fold cross validation

In [17]:
for name, model in models:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, trainData, trainLabels, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)


NB: 0.486946 (0.111215)
SVM: 0.371921 (0.076150)
KNN: 0.608744 (0.110002)
CART: 0.542241 (0.068803)
RF: 0.601232 (0.082756)


In [18]:
nb = GaussianNB()
svc= SVC(random_state=9)
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier(random_state=9)
rf = RandomForestClassifier(n_estimators=100, random_state=9)

In [19]:
training_features = trainData
test_features = testData
training_target = trainLabels
test_target = testLabels

# Naive Bayes

In [20]:
trained_model = nb.fit(training_features, training_target)
trained_model.fit(training_features, training_target)
predictions = trained_model.predict(test_features)      

Train_Accuracy = accuracy_score(training_target, trained_model.predict(training_features))
Test_Accuracy = accuracy_score(test_target, predictions)
Confusion_Matrix = confusion_matrix(test_target, predictions)
print('NB')
print(Train_Accuracy)
print(Test_Accuracy)
print(Confusion_Matrix)

NB
0.5277777777777778
0.5757575757575758
[[7 1 5]
 [3 6 5]
 [0 0 6]]


# Support Vector Machine

In [21]:
#svc
trained_model = svc.fit(training_features, training_target)
trained_model.fit(training_features, training_target)
predictions = trained_model.predict(test_features)      

Train_Accuracy = accuracy_score(training_target, trained_model.predict(training_features))
Test_Accuracy = accuracy_score(test_target, predictions)
Confusion_Matrix = confusion_matrix(test_target, predictions)
print('SVC')
print(Train_Accuracy)
print(Test_Accuracy)
print(Confusion_Matrix)

SVC
0.3784722222222222
0.21212121212121213
[[ 1  0 12]
 [ 3  0 11]
 [ 0  0  6]]


# K-Nearest Neighbours

In [22]:
#knn
trained_model = knn.fit(training_features, training_target)
trained_model.fit(training_features, training_target)
predictions = trained_model.predict(test_features)      

Train_Accuracy = accuracy_score(training_target, trained_model.predict(training_features))
Test_Accuracy = accuracy_score(test_target, predictions)
Confusion_Matrix = confusion_matrix(test_target, predictions)
print('KNN')
print(Train_Accuracy)
print(Test_Accuracy)
print(Confusion_Matrix)

KNN
0.6805555555555556
0.42424242424242425
[[8 3 2]
 [8 3 3]
 [3 0 3]]


# Decision Trees

In [23]:
#dt
trained_model = dt.fit(training_features, training_target)
trained_model.fit(training_features, training_target)
predictions = trained_model.predict(test_features)      

Train_Accuracy = accuracy_score(training_target, trained_model.predict(training_features))
Test_Accuracy = accuracy_score(test_target, predictions)
Confusion_Matrix = confusion_matrix(test_target, predictions)
print('Decision Tree')
print(Train_Accuracy)
print(Test_Accuracy)
print(Confusion_Matrix)

Decision Tree
1.0
0.45454545454545453
[[8 3 2]
 [5 4 5]
 [2 1 3]]


# Random Forest

In [24]:
#rf
trained_model = rf.fit(training_features, training_target)
trained_model.fit(training_features, training_target)
predictions = trained_model.predict(test_features)      

Train_Accuracy = accuracy_score(training_target, trained_model.predict(training_features))
Test_Accuracy = accuracy_score(test_target, predictions)
Confusion_Matrix = confusion_matrix(test_target, predictions)
print('Randor Forest')
print(Train_Accuracy)
print(Test_Accuracy)
print(Confusion_Matrix)

Randor Forest
1.0
0.5151515151515151
[[8 3 2]
 [8 4 2]
 [1 0 5]]
