In [1]:
import numpy as np
import pandas as pd
import datetime
import time
from sklearn.model_selection import train_test_split


from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

# Metrics to use other than the accuracy (Success rate)
# https://scikit-learn.org/stable/modules/classes.html?highlight=metric#module-sklearn.metrics

from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

In [2]:
def specificity(ytest,ypred):
    tn, fp, fn, tp = confusion_matrix(ytest,ypred).ravel()
    spec = tn/(tn+fp)
    return spec

In [3]:
def results(model,X_train,y_train,X_test,y_test):
    t01 = time.time()
    model.fit(X_train,y_train)
    t02 = time.time()
    print(type(model).__name__)
    print("Train Time (sec):", t02 - t01)
    traint = t02 - t01
    t1 = time.time()
    print("Acc:  ", model.score(X_test,y_test))
    t2 = time.time()
    print("Test Time  (sec):", t2 - t1,'\n')
    testt = t2 - t1
    y_pred = model.predict(X_test)
    prec = precision_score(y_test,y_pred)
    print("Prec: ",prec)
    spec = specificity(y_test,y_pred)
    print("Spec: ", spec)
    sens = recall_score(y_test,y_pred)
    print("Sens: ", sens)
    f1 = f1_score(y_test,y_pred)
    print("F1: ",f1)
    res = {"Model_Name":type(model).__name__, "Accuracy": model.score(X_test,y_test),
    "Precision":prec, "Specificity":spec, "Sensitivity": sens, "F1":f1,
    "Train_time": traint,'Test_time':testt }
    
    return res

In [4]:
#read csv
df = pd.read_csv('processed_data/filtered_psr_data18channels.csv')
df = df.drop(df.columns[0],axis=1)
df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,29,30,31,32,33,34,35,36,37,class
0,3.0,2.7105e-20,-8e-06,6.7763e-21,1.4e-05,1.0164e-20,-4e-06,2.7105e-20,-8.3395e-06,3.7268999999999996e-20,...,-2e-06,4.0657999999999996e-20,-1.5e-05,-1.3553e-20,-1.8e-05,-2.7105e-20,2.6593e-06,6.7763e-21,-2.8e-05,1
1,4.0,-3.1193e-06,-1e-05,6.885e-06,1.3e-05,-1.7404e-07,-8e-06,-3.8346e-06,-8.1509e-06,1.7034e-06,...,-3e-06,-4.9808e-06,-2.1e-05,-7.7025e-06,-2e-05,2.5342e-06,-5.8557e-07,-9.5114e-06,-3.6e-05,1
2,5.0,-5.9938e-06,-1.2e-05,1.2092e-05,9e-06,-1.2552e-06,-1.3e-05,-6.8211e-06,-6.4266e-06,3.3209e-06,...,-5e-06,-1.0114e-05,-2.5e-05,-1.4139e-05,-2.1e-05,3.7004e-06,-5.2285e-06,-1.8832e-05,-4.4e-05,1
3,6.0,-8.4383e-06,-1.3e-05,1.4385e-05,3e-06,-3.7916e-06,-1.7e-05,-8.3395e-06,-3.6574e-06,4.6819e-06,...,-6e-06,-1.5418e-05,-2.9e-05,-1.8452e-05,-2e-05,2.6593e-06,-1.0034e-05,-2.777e-05,-5.1e-05,1
4,7.0,-1.0371e-05,-1.4e-05,1.3265e-05,-5e-06,-7.758e-06,-2.1e-05,-8.1509e-06,-4.7382e-07,5.4952e-06,...,-7e-06,-2.0678e-05,-3.1e-05,-2.045e-05,-1.9e-05,-5.8557e-07,-1.3874e-05,-3.615e-05,-5.7e-05,1


In [5]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

#test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, stratify=y)
#split the dataset into train and test sets in a way that preserves the 
# same proportions of examples in each class as observed in the original dataset

In [6]:
#models
model1 = XGBClassifier()
model2 = RandomForestClassifier()
model3 = AdaBoostClassifier()

model4 = LogisticRegression()
model5 = KNeighborsClassifier()
model6 = LinearDiscriminantAnalysis()

In [7]:
cols = ['Model_Name','Accuracy','Precision','Specificity', 'Sensitivity','F1','Train_time','Test_time'] 
resDF = pd.DataFrame(columns = cols)
print(resDF.head())
#resDF = resDF.append(results(model1,X_train,y_train,X_test,y_test),ignore_index=True)
resDF = resDF.append(results(model2,X_train,y_train,X_test,y_test),ignore_index=True)
#resDF = resDF.append(results(model3,X_train,y_train,X_test,y_test),ignore_index=True)
#resDF = resDF.append(results(model4,X_train,y_train,X_test,y_test),ignore_index=True)
#resDF = resDF.append(results(model5,X_train,y_train,X_test,y_test),ignore_index=True)
#resDF = resDF.append(results(model6,X_train,y_train,X_test,y_test),ignore_index=True)
resDF.to_csv('results/filtered_psr_data_18channels.csv')

Empty DataFrame
Columns: [Model_Name, Accuracy, Precision, Specificity, Sensitivity, F1, Train_time, Test_time]
Index: []
RandomForestClassifier
Train Time (sec): 7838.6098420619965
Acc:   0.9864900650751686
Test Time  (sec): 74.92429065704346 

Prec:  0.9861580286904211
Spec:  0.986148534250976
Sens:  0.9868315949109179
F1:  0.9864946968250387


  resDF = resDF.append(results(model2,X_train,y_train,X_test,y_test),ignore_index=True)
