In [1]:
import pandas as pd
import numpy as np
import os #file merging
from matplotlib import pyplot as plt
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB, GaussianNB
import itertools
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
import pickle

In [6]:
#combine 9 files 
path = 'C:/Users/Trang/Downloads/PAMAP2_Dataset/PAMAP2_Dataset/Protocol'
file = os.listdir(path)
for i in sorted(file):
    filename = 'PAMAP.txt'
    with open(filename, 'a') as outfile:
        with open(path + '/' + i, 'r') as infile: 
            outfile.write(infile.read())

In [7]:
data = pd.read_csv('PAMAP.txt', ' ', header= None)

In [8]:
#adding column names for 54 columns
colNames = ["timestamp", "activityID", "heartrate"]

IMUhand = ['handTemperature', 
           'handAcc16_1', 'handAcc16_2', 'handAcc16_3', 
           'handAcc6_1', 'handAcc6_2', 'handAcc6_3', 
           'handGyro1', 'handGyro2', 'handGyro3', 
           'handMagne1', 'handMagne2', 'handMagne3',
           'handOrientation1', 'handOrientation2', 'handOrientation3', 'handOrientation4']

IMUchest = ['chestTemperature', 
           'chestAcc16_1', 'chestAcc16_2', 'chestAcc16_3', 
           'chestAcc6_1', 'chestAcc6_2', 'chestAcc6_3', 
           'chestGyro1', 'chestGyro2', 'chestGyro3', 
           'chestMagne1', 'chestMagne2', 'chestMagne3',
           'chestOrientation1', 'chestOrientation2', 'chestOrientation3', 'chestOrientation4']

IMUankle = ['ankleTemperature', 
           'ankleAcc16_1', 'ankleAcc16_2', 'ankleAcc16_3', 
           'ankleAcc6_1', 'ankleAcc6_2', 'ankleAcc6_3', 
           'ankleGyro1', 'ankleGyro2', 'ankleGyro3', 
           'ankleMagne1', 'ankleMagne2', 'ankleMagne3',
           'ankleOrientation1', 'ankleOrientation2', 'ankleOrientation3', 'ankleOrientation4']

columns = colNames + IMUhand + IMUchest + IMUankle

data.columns = columns

In [9]:
#Data cleaning
def cleandata(data):
    #remove zero value in activityID column
    data = data.drop(data[data['activityID'] == 0].index)
    data = data.apply(pd.to_numeric, errors = 'coerce') #invalid parsing will be set as NaN
    data = data.interpolate() #remove NaN value
    return data

In [10]:
data1 = cleandata(data)
data1.reset_index(drop = True, inplace = True)

display(data1.head(10))

Unnamed: 0,timestamp,activityID,heartrate,handTemperature,handAcc16_1,handAcc16_2,handAcc16_3,handAcc6_1,handAcc6_2,handAcc6_3,...,ankleGyro1,ankleGyro2,ankleGyro3,ankleMagne1,ankleMagne2,ankleMagne3,ankleOrientation1,ankleOrientation2,ankleOrientation3,ankleOrientation4
0,37.66,1,,30.375,2.2153,8.27915,5.58753,2.24689,8.55387,5.77143,...,0.002908,-0.027714,0.001752,-61.1081,-36.8636,-58.3696,1.0,0.0,0.0,0.0
1,37.67,1,,30.375,2.29196,7.67288,5.74467,2.27373,8.14592,5.78739,...,0.020882,0.000945,0.006007,-60.8916,-36.3197,-58.3656,1.0,0.0,0.0,0.0
2,37.68,1,,30.375,2.2909,7.1424,5.82342,2.26966,7.66268,5.78846,...,-0.035392,-0.052422,-0.004882,-60.3407,-35.7842,-58.6119,1.0,0.0,0.0,0.0
3,37.69,1,,30.375,2.218,7.14365,5.8993,2.22177,7.25535,5.88,...,-0.032514,-0.018844,0.02695,-60.7646,-37.1028,-57.8799,1.0,0.0,0.0,0.0
4,37.7,1,100.0,30.375,2.30106,7.25857,6.09259,2.2072,7.24042,5.95555,...,0.001351,-0.048878,-0.006328,-60.204,-37.1225,-57.8847,1.0,0.0,0.0,0.0
5,37.71,1,100.0,30.375,2.07165,7.25965,6.01218,2.19238,7.21038,6.01604,...,0.003793,-0.026906,0.004125,-61.3257,-36.9744,-57.7501,1.0,0.0,0.0,0.0
6,37.72,1,100.0,30.375,2.41148,7.5978,5.93915,2.23988,7.46679,6.03053,...,0.036814,-0.032277,-0.006866,-61.552,-36.9632,-57.9957,1.0,0.0,0.0,0.0
7,37.73,1,100.0,30.375,2.32815,7.63431,5.70686,2.31663,7.64745,6.01495,...,-0.010352,-0.016621,0.006548,-61.5738,-36.1724,-59.3487,1.0,0.0,0.0,0.0
8,37.74,1,100.0,30.375,2.25096,7.78598,5.62821,2.28637,7.70801,5.93935,...,0.039346,0.020393,-0.01188,-61.7741,-37.1744,-58.1199,1.0,0.0,0.0,0.0
9,37.75,1,100.0,30.375,2.14107,7.52262,5.78141,2.31538,7.72276,5.78828,...,0.029874,-0.010763,0.005133,-60.768,-37.4206,-58.8735,1.0,0.0,0.0,0.0


In [11]:
data1.isnull().sum()

timestamp            0
activityID           0
heartrate            4
handTemperature      0
handAcc16_1          0
handAcc16_2          0
handAcc16_3          0
handAcc6_1           0
handAcc6_2           0
handAcc6_3           0
handGyro1            0
handGyro2            0
handGyro3            0
handMagne1           0
handMagne2           0
handMagne3           0
handOrientation1     0
handOrientation2     0
handOrientation3     0
handOrientation4     0
chestTemperature     0
chestAcc16_1         0
chestAcc16_2         0
chestAcc16_3         0
chestAcc6_1          0
chestAcc6_2          0
chestAcc6_3          0
chestGyro1           0
chestGyro2           0
chestGyro3           0
chestMagne1          0
chestMagne2          0
chestMagne3          0
chestOrientation1    0
chestOrientation2    0
chestOrientation3    0
chestOrientation4    0
ankleTemperature     0
ankleAcc16_1         0
ankleAcc16_2         0
ankleAcc16_3         0
ankleAcc6_1          0
ankleAcc6_2          0
ankleAcc6_3

In [12]:
data1 = data1.drop(data1.index[range(4)])
data1.isnull().sum()

timestamp            0
activityID           0
heartrate            0
handTemperature      0
handAcc16_1          0
handAcc16_2          0
handAcc16_3          0
handAcc6_1           0
handAcc6_2           0
handAcc6_3           0
handGyro1            0
handGyro2            0
handGyro3            0
handMagne1           0
handMagne2           0
handMagne3           0
handOrientation1     0
handOrientation2     0
handOrientation3     0
handOrientation4     0
chestTemperature     0
chestAcc16_1         0
chestAcc16_2         0
chestAcc16_3         0
chestAcc6_1          0
chestAcc6_2          0
chestAcc6_3          0
chestGyro1           0
chestGyro2           0
chestGyro3           0
chestMagne1          0
chestMagne2          0
chestMagne3          0
chestOrientation1    0
chestOrientation2    0
chestOrientation3    0
chestOrientation4    0
ankleTemperature     0
ankleAcc16_1         0
ankleAcc16_2         0
ankleAcc16_3         0
ankleAcc6_1          0
ankleAcc6_2          0
ankleAcc6_3

In [13]:
data1.to_csv('PAMAP.csv')

In [41]:
data2 = data1.sample(n = 100000)

In [44]:
cols = ['timestamp', 'activityID', 'heartrate', 'handTemperature',
       'handTemperature', 'handAcc16_1', 'handAcc16_2', 'handAcc16_3', 
           'handAcc6_1', 'handAcc6_2', 'handAcc6_3', 
           'handGyro1', 'handGyro2', 'handGyro3', 
           'handMagne1', 'handMagne2', 'handMagne3',
           'handOrientation1', 'handOrientation2', 'handOrientation3', 'handOrientation4']
data3 = data2[cols]
data3.head(10)

Unnamed: 0,timestamp,activityID,heartrate,handTemperature,handTemperature.1,handAcc16_1,handAcc16_2,handAcc16_3,handAcc6_1,handAcc6_2,...,handGyro1,handGyro2,handGyro3,handMagne1,handMagne2,handMagne3,handOrientation1,handOrientation2,handOrientation3,handOrientation4
1176584,3504.05,5,165.0,33.8125,33.8125,-20.848,15.7648,7.75698,-19.2658,13.5584,...,0.402113,0.243749,-4.03177,8.3857,-41.5251,-25.8448,0.550539,-0.50027,-0.592947,-0.308304
775023,1001.7,17,106.0,33.5625,33.5625,-6.99086,6.42249,6.26413,-6.92679,6.52165,...,-3.13443,-0.460888,-0.221861,36.7799,-47.6306,-23.2407,0.242438,0.309361,-0.477592,-0.785764
700206,200.87,1,72.0,32.75,32.75,5.16003,0.312252,8.27566,5.25924,0.303869,...,-0.014256,-0.012441,0.000486,17.8663,7.09486,-49.9119,0.957541,0.008959,0.274596,0.087366
1732122,707.75,3,79.0,34.5,34.5,-9.37532,-1.03189,2.21238,-9.20836,-1.04688,...,-0.012025,-0.001403,0.005049,46.573,-2.60712,-19.3341,0.369246,0.567823,-0.217675,-0.702746
627697,1453.8,16,98.0,33.1875,33.1875,-10.5505,2.89542,-0.897459,-10.1834,3.36638,...,-1.21848,0.146901,1.88838,40.0568,-40.3069,-15.4051,0.175355,0.679625,-0.403645,-0.586883
525748,290.53,1,70.0,31.375,31.375,6.95631,2.48065,6.25542,7.03966,2.59972,...,-0.001723,0.022194,-0.024972,5.95011,2.09594,-44.8783,0.899073,-0.078218,0.39993,0.16002
179332,2614.05,7,145.0,32.5,32.5,-12.4559,5.50086,0.258318,-13.1207,5.72978,...,-0.437407,-1.32587,4.99038,41.5198,-23.7053,-52.1603,1.0,0.0,0.0,0.0
1798596,1476.93,16,114.0,34.625,34.625,-5.76049,0.204962,-0.547222,-6.12834,-1.08695,...,-0.722295,0.159704,2.18355,32.0501,15.4266,-6.02839,0.628345,0.066045,-0.71381,0.302151
1489743,620.0,3,70.0,32.875,32.875,-8.86808,2.5662,1.93843,-8.86515,2.71057,...,-0.227396,0.021409,0.029712,31.9127,-10.3789,-7.0527,0.668507,0.093555,-0.643177,-0.361482
26998,307.64,1,86.0,31.875,31.875,1.71677,9.3501,1.88166,1.83365,9.6257,...,0.411484,0.372538,0.191443,22.2589,-63.6447,-7.35176,1.0,0.0,0.0,0.0


In [45]:
scaler = StandardScaler()
X = data3.drop('activityID', axis=1)
X = scaler.fit_transform(X)
y = data3['activityID']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 100)

In [47]:
knn1 = KNeighborsClassifier(n_neighbors = 7)
cart1 = DecisionTreeClassifier(random_state = 100, max_depth = 20)
svm1 = SVC(decision_function_shape = 'ovr', probability = True, kernel = 'linear')
ann1 = MLPClassifier(hidden_layer_sizes = (50,), max_iter = 250, random_state = 100)

wmv1 = VotingClassifier(estimators = [('BDT', cart),('knn', knn),('svm', svm),
                                     ('ann', ann)], voting='soft', n_jobs=-1)


wmv1.fit(X_train,y_train)

VotingClassifier(estimators=[('BDT',
                              DecisionTreeClassifier(ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=20,
                                                     max_features=None,
                                                     max_leaf_nodes=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_leaf=0.0,
                                                     presort='deprecated',
                                                     random_state=100,
     

In [48]:
y_pred = wmv1.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[2063,    1,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0],
       [   0, 1906,    1,    0,    0,    0,    0,    0,    0,    0,    0,
           0],
       [   0,    2, 2015,    0,    0,    0,    0,    0,    0,    0,    2,
           0],
       [   0,    0,    0, 2429,    0,    1,    6,    0,    0,    0,    0,
           0],
       [   0,    0,    0,    0,  952,    2,    1,    0,    0,    0,    0,
           0],
       [   0,    0,    0,    1,    3, 1634,    5,    0,    0,    0,    0,
           0],
       [   0,    0,    0,    2,    0,    1, 1895,    0,    1,    0,    0,
           0],
       [   0,    0,    0,    5,    0,    0,    1, 1134,   28,    2,    0,
           0],
       [   0,    0,    0,    4,    0,    0,    1,   26, 1050,    0,    0,
           0],
       [   0,    0,    0,    0,    0,    0,    0,    1,    0, 1869,    6,
           0],
       [   0,    0,    3,    0,    0,    0,    0,    0,    0,    0, 2440,
           0],
       [   0,    0,  

In [49]:
wmv1.score(X_test, y_test)

0.9946

In [50]:
print('Ann: ', {wmv1.named_estimators_['ann'].score(X_test, y_test)})
print('BDT: ', {wmv1.named_estimators_['BDT'].score(X_test, y_test)})
print('SVM: ', {wmv1.named_estimators_['svm'].score(X_test, y_test)})
print('knn: ', {wmv1.named_estimators_['knn'].score(X_test, y_test)})

Ann:  {0.0009}
BDT:  {0.00105}
SVM:  {0.00945}
knn:  {0.00235}
