In [1]:
from __future__ import division, print_function, absolute_import

import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier, RandomTreesEmbedding, GradientBoostingClassifier

# Feature for pumping and non-pumping from the Load file
featuresTrueOrig = pd.read_excel('pumpFeatures.xlsx', sheet_name = 'FeaturesTrue')
featuresFalseOrig = pd.read_excel('pumpFeatures.xlsx', sheet_name = 'FeaturesFalse')


cm_train = np.zeros([2, 2]) # Initialize empty Confusion Matrix
numOfSubjects = featuresFalseOrig['Subject'].values[-1] + 1 # Number of test subjects read from the file, plus one because indexed starting with zero

for subject in range(0, numOfSubjects):

    # Split data into test and training data
    traingDataTrue = featuresTrueOrig[featuresTrueOrig.Subject != subject]
    testDataTrue = featuresTrueOrig[featuresTrueOrig.Subject == subject]
    traingDataFalse = featuresFalseOrig[featuresFalseOrig.Subject != subject]
    testDataFalse = featuresFalseOrig[featuresFalseOrig.Subject == subject]

    # select any feature set
    #features = ['Length','MinMaxDif', 'Steepness', 'SquaredSum', 'EMG', 'NumOfLines', 'Sax1', 'Sax2', 'Sax3', 'Sax4', 'Sax5', 'Sax6', 'Sax7', 'Sax8']
    #features = ['Length','MinMaxDif','SquaredSum','EMG','NumOfLines','Sax1','Sax2','Sax3','Sax4','Sax5','Sax6','Sax7','Sax8','Sax2Emg','Sax3Emg','Sax4Emg','Sax5Emg']
    #features = [ 'Length', 'MinMaxDif', 'Steepness', 'SquaredSum', 'EMG', 'NumOfLines', 'Sax1', 'Sax2', 'Sax3', 'Sax4', 'Sax5', 'Sax6', 'Sax7', 'Sax8', 'Sax1Emg', 'Sax2Emg', 'Sax3Emg', 'Sax4Emg', 'Sax5Emg', 'Sax6Emg', 'Sax7Emg', 'Sax8Emg']
    features = [ 'Length', 'MinMaxDif', 'Steepness', 'SquaredSum', 'EMG', 'NumOfLines', 'Sax1', 'Sax2', 'Sax3', 'Sax4','Sax5', 'Sax6', 'Sax7', 'Sax8']
    #features = [ 'Length','MinMaxDif','Steepness','SquaredSum','EMG','EMGPower','EMGSlope','NumOfLines','Hight','SqSumUp','SqSumDown','BIMassCenter','Sax1','Sax2','Sax3','Sax4','Sax5','Sax6','Sax7','Sax8','Sax1Emg','Sax2Emg','Sax3Emg','Sax4Emg','Sax5Emg','Sax6Emg','Sax7Emg','Sax8Emg']

    traingDataTrue =  traingDataTrue [features]
    testDataTrue =    testDataTrue   [features]
    traingDataFalse = traingDataFalse[features]
    testDataFalse =   testDataFalse  [features]

    # connect data sets of correct and incorrect bump movements
    frames = [traingDataTrue, traingDataFalse]
    traingData = pd.concat(frames)

    frames = [testDataTrue, testDataFalse]
    testData = pd.concat(frames)

    # scale data
    min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1,1))
    trainingData = min_max_scaler.fit_transform(traingData.values)
    testData = min_max_scaler.transform(testData.values)

    # create vector with labels for training
    labels = np.zeros([trainingData.shape[0]])
    labels[0:traingDataTrue.values.shape[0]] = np.ones(labels[0:traingDataTrue.values.shape[0]].shape)

    labelsTest = np.zeros([testData.shape[0]])
    labelsTest[0:testDataTrue.values.shape[0]] = np.ones(labelsTest[0:testDataTrue.values.shape[0]].shape)

    # Select classifier and set options
    #clf = sklearn.tree.DecisionTreeClassifier(criterion = "gini", min_samples_leaf = 15,min_samples_split=10,max_depth=3,class_weight='balanced')
    clf = RandomForestClassifier(n_estimators=40, min_samples_leaf = 15, min_samples_split=12, max_depth=8, class_weight='balanced')
    #clf = KNeighborsClassifier(10) # classifier 12
    #clf = SVC(kernel="rbf", class_weight="balanced")
    #clf = LinearSVC(class_weight="balanced")

    clf.fit(trainingData, labels) # Training the classifier
    predictedLabels = clf.predict(testData) # Testing the classifier

    matrix = confusion_matrix(labelsTest, predictedLabels,labels=[1,0]) # calculate Confusion Matrix
    print("calculate Confusion Matrix for")
    print("subject: " +  str(subject))
    print(matrix)
    cm_train += matrix # add Confusion Matrix from all tests

# Calculate and output quality criteria
confM = cm_train
precission = (confM[0,0]/np.sum(confM[:,0]))
sensitivity = (confM[0,0]/np.sum(confM[0,:]))
specificity = (confM[1,1]/np.sum(confM[1,:]))
accuracy = ((confM[0,0]+confM[1,1])/(np.sum(confM[0,:])+np.sum(confM[1,:])))
beta = 1
fScore1 = (1 + beta*beta) * confM[0, 0] / ((1+beta*beta) * confM[0, 0] + beta * confM[1, 0] + confM[0, 1])
beta = 2
fScore2 = (1 + beta*beta) * confM[0, 0] / ((1+beta*beta) * confM[0, 0] + beta * confM[1, 0] + confM[0, 1])

print("Total")
print(confM)
print("\n")
print("precission: " + str(precission))
print("sensitivity: " + str(sensitivity))
print("specificity: " + str(specificity))
print("accuracy: " + str(accuracy))
print("f1Score: " + str(fScore1))
print("f2Score: " + str(fScore2))

calculate Confusion Matrix for
subject: 0
[[ 189  171]
 [ 314 1138]]
calculate Confusion Matrix for
subject: 1
[[1043  365]
 [ 121  247]]
calculate Confusion Matrix for
subject: 2
[[532 185]
 [179 505]]
calculate Confusion Matrix for
subject: 3
[[ 103   30]
 [ 356 1053]]
calculate Confusion Matrix for
subject: 4
[[104  38]
 [217 678]]
calculate Confusion Matrix for
subject: 5
[[ 65  12]
 [ 72 174]]
calculate Confusion Matrix for
subject: 6
[[  4   2]
 [ 80 396]]
calculate Confusion Matrix for
subject: 7
[[959 174]
 [224 578]]
calculate Confusion Matrix for
subject: 8
[[  27   18]
 [ 280 1136]]
calculate Confusion Matrix for
subject: 9
[[ 16   6]
 [ 63 198]]
Total
[[3042. 1001.]
 [1906. 6103.]]


precission: 0.6147938561034761
sensitivity: 0.752411575562701
specificity: 0.7620177300536896
accuracy: 0.7587952207102555
f1Score: 0.6766766766766766
f2Score: 0.7596264296059532
