In [None]:
import pandas as pd
import numpy as np

from sklearn import model_selection
from sklearn import tree,svm
from sklearn.neural_network import MLPClassifier

from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn import metrics
import seaborn as sn
import matplotlib.pyplot as plt

In [None]:
def normConf(conf):
    outputConf = np.zeros([5,5])
    accsum = 0
    for i in range(len(conf)):
        for j in range(len(conf[i])):
            outputConf[i][j] = (conf[i][j])/sum(conf[i])
        accsum += outputConf[i][i]
    acc = accsum/len(conf)
    print("Acc: ", acc)
    return outputConf,acc


def plotConf(conf):
    df_cm = pd.DataFrame(conf, index = [i for i in "ABCDE"],
                  columns = [i for i in "ABCDE"])
    plt.figure(figsize = (10,7))
    sn.heatmap(df_cm, annot=True,fmt='g')
    ax = plt.axes()
    plt.xticks(np.arange(1, 4, step=1))
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    plt.show()
    return

def kcrossfold(X,y, model):
    #K-fold Cross Validation
    ypred = model_selection.cross_val_predict(model,X, y, cv=10)
    print(metrics.accuracy_score(y,ypred))
    print(metrics.classification_report(y, ypred))
    conf = metrics.confusion_matrix(y, ypred)
    #conf, acc = normConf(conf)
    plotConf(conf)
    #return acc


def leaveOneOut(X,y,username,model):
    #Leave-One-Group-Out Cross Validation
    logo = model_selection.GroupKFold(n_splits=6)
    #print(logo.get_n_splits(groups=username))
    confArray = np.zeros([5,5])

    ypred = model_selection.cross_val_predict(model,X, y, cv=logo, groups=username)
    conf = metrics.confusion_matrix(y, ypred)
    print(metrics.accuracy_score(y,ypred))
    print(metrics.classification_report(y, ypred))
   # conf,acc = normConf(conf)
    plotConf(conf)
    #return acc

In [None]:
usingCV = False

if usingCV:
    #filename = "fs_stats_cv.csv"
    filename = "fs_raw_cv.csv"
else:
    #filename = "fs_stats_logo.csv"
    filename = "fs_raw_logo.csv"
df = pd.read_csv(filename, header=0)

names = df.user_name
y = df.classe
X = df.loc[:, ~df.columns.isin(['classe', 'user_name'])]

print(X.shape)



In [None]:
#Random Forest Bagging - used in previous study
model = RandomForestClassifier(n_estimators=10, max_features = None)
bag = BaggingClassifier(model, n_estimators=10)
if usingCV:
    kcrossfold(X,y,bag)
else: 
    leaveOneOut(X,y,names,bag)

In [None]:
#1. Decision tree
model = tree.DecisionTreeClassifier()

if usingCV:
    kcrossfold(X,y,model)
else:
    leaveOneOut(X,y,names,model)

In [None]:
#2. Decision Tree with Bagging
model = tree.DecisionTreeClassifier()
bag = BaggingClassifier(model, n_estimators=100)

if usingCV:
    kcrossfold(X,y,bag)
else:
    leaveOneOut(X,y,names,bag)

In [None]:
#Boosted Decision Tree
model = tree.DecisionTreeClassifier()
clf = AdaBoostClassifier(base_estimator=model,n_estimators=100)

if usingCV:
    kcrossfold(X,y,clf)
else:
    leaveOneOut(X,y,names,clf)

In [None]:
#SVM RBF
model = svm.SVC(kernel='rbf', gamma='scale')
if usingCV:
    kcrossfold(X,y,model)
else:
    leaveOneOut(X,y,names, model)

In [None]:
#SVM Linear
model = svm.SVC(kernel='linear')
if usingCV:
    kcrossfold(X,y,model)
else:
    leaveOneOut(X,y,names, model)

In [None]:
#SVM Poly
model = svm.SVC(kernel='poly', gamma='scale')
if usingCV:
    kcrossfold(X,y,model)
else:
    leaveOneOut(X,y,names, model)

In [None]:
#SVM Sigmoid
model = svm.SVC(kernel='sigmoid', gamma='scale')
if usingCV:
    kcrossfold(X,y,model)
else:
    leaveOneOut(X,y,names, model)

In [None]:
#SVM RBF Bagging
model = svm.SVC(kernel='rbf', gamma='scale', probability=True)
bag = BaggingClassifier(model, n_estimators=5)

if usingCV:
    kcrossfold(X,y,bag)
else:
    leaveOneOut(X,y,names, bag)