In [1]:
"""loop through files in S1_Dataset directory"""
"""create 2D data matrix by appending each new dataset to the bottom of data matrixx"""
import os
import pandas as pd
S1_PATH = os.path.join('..','..','Datasets_Healthy_Older_People','S1_Dataset')
S2_PATH = os.path.join('..','..','Datasets_Healthy_Older_People','S2_Dataset')
print('Importing Data...')
s1_data = pd.DataFrame()
for filename in os.listdir(S1_PATH):
    if filename != 'README.txt':
        data_path = os.path.join(S1_PATH, filename)
        data=pd.read_csv(data_path, header=None)
        s1_data = s1_data.append(data, ignore_index=True)
s1_data.columns = ['time','frontal','vertical','lateral','id','rssi','phase','frequency','activity']
print('Done')
s1_data.info()
#s1_data.tail()

Importing Data...
Done
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52482 entries, 0 to 52481
Data columns (total 9 columns):
time         52482 non-null float64
frontal      52482 non-null float64
vertical     52482 non-null float64
lateral      52482 non-null float64
id           52482 non-null int64
rssi         52482 non-null float64
phase        52482 non-null float64
frequency    52482 non-null float64
activity     52482 non-null int64
dtypes: float64(7), int64(2)
memory usage: 3.6 MB


In [2]:
"""split the data into training and test"""
from sklearn.model_selection import train_test_split
s1_train_set, s1_test_set = train_test_split(s1_data, test_size = 0.2, random_state = 1)
#s1_train_set.info()
#s1_train_set.head()


In [3]:
"""for this task (stage 1 decition tree 2), keep the acclerometer and RSSI"""
s1_train_set_s1dt =  s1_train_set.drop(columns=['time','phase','frequency'])
s1_train_set_s1dt_features = s1_train_set_s1dt.drop(columns=['activity'])
s1_train_set_s1dt_labels = s1_train_set_s1dt.drop(columns=['frontal','vertical','lateral', 'id', 'rssi'])
#s1_train_set_s1dt.head()
#s1_train_set_s1dt_features.head()
#s1_train_set_s1dt_labels.head()


In [4]:
"""normalize the features"""
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(s1_train_set_s1dt_features)
s1_train_set_s1dt_features = scaler.transform(s1_train_set_s1dt_features)

  return self.partial_fit(X, y)
  """


In [5]:
"""train the decision tree model"""
from sklearn import tree
dt_clf = tree.DecisionTreeClassifier(criterion='gini',
                                     splitter='best',
                                     max_depth=100, 
                                     min_samples_split=2,
                                     presort=False)
#a complex tree increases recall at the expense of precision
dt_clf = dt_clf.fit(s1_train_set_s1dt_features, s1_train_set_s1dt_labels)

In [6]:
"""evaluate the trained model using cross validation score"""
from  sklearn.model_selection import cross_val_score
cross_val_score(dt_clf, s1_train_set_s1dt_features, s1_train_set_s1dt_labels, cv=3, scoring='accuracy')

array([0.9897828 , 0.98942479, 0.9891382 ])

In [7]:
"""evaluate the trained model using cross validation predict and the confusion matrix"""
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
s1_train_set_s1dt_predict = cross_val_predict(dt_clf, s1_train_set_s1dt_features, s1_train_set_s1dt_labels, cv=3)
#s1_train_set_s1dt_predict = dt_clf.predict(s1_train_set_s1dt_features)
s1dt_conf_mx = confusion_matrix(s1_train_set_s1dt_labels, s1_train_set_s1dt_predict)
s1dt_conf_mx

array([[11920,    27,     7,   157],
       [    3,  3460,     0,    38],
       [   12,     0, 24790,     7],
       [  152,    42,     2,  1368]], dtype=int64)

In [8]:
"""evaluate the trained model in terms of precision and recall"""
"""1: sit on bed, 2: sit on chair, 3: lying, 4: ambulating"""
for index, activity in [(0, "sit on bed"), (1, "sit on chair"), (2, "lying"), (3, "ambulating")]:
    precision = s1dt_conf_mx[index,index]/sum(s1dt_conf_mx[:,index])
    recall = s1dt_conf_mx[index,index]/sum(s1dt_conf_mx[index,:])
    print(activity + " precision: " + str(precision))
    print(activity + " recall: " + str(recall))

sit on bed precision: 0.9861835029370398
sit on bed recall: 0.9842292131120469
sit on chair precision: 0.9804477189005384
sit on chair recall: 0.9882890602684947
lying precision: 0.9996370821404089
lying recall: 0.9992341488975774
ambulating precision: 0.8713375796178344
ambulating recall: 0.8746803069053708


In [9]:
"""visualize the decision tree"""
"""graph exported as an .svg because it is huge"""
import graphviz
dot_data = tree.export_graphviz(dt_clf, out_file=None, 
                                feature_names=['frontal','vertical','lateral', 'id', 'rssi'], 
                                class_names=['sit on bed', 'sit on chair', 'lying', 'ambulating'],
                                rounded=True, filled=True)
graph = graphviz.Source(dot_data)
graph.format = 'svg'
graph.render("s1dt_rssi") 

's1dt_rssi.svg'

In [10]:
"""evaluate performance on the test set"""

s1_test_set_s1dt =  s1_test_set.drop(columns=['time','phase','frequency'])
s1_test_set_s1dt_features = s1_test_set_s1dt.drop(columns=['activity'])
s1_test_set_s1dt_labels = s1_test_set_s1dt.drop(columns=['frontal','vertical','lateral', 'id', 'rssi'])

scaler = StandardScaler()
scaler.fit(s1_test_set_s1dt_features)
s1_test_set_s1dt_features = scaler.transform(s1_test_set_s1dt_features)

s1_test_set_s1dt_predict = dt_clf.predict(s1_test_set_s1dt_features)
s1dt_test_set_conf_mx = confusion_matrix(s1_test_set_s1dt_labels, s1_test_set_s1dt_predict)
print(s1dt_test_set_conf_mx)
print()
for index, activity in [(0, "sit on bed"), (1, "sit on chair"), (2, "lying"), (3, "ambulating")]:
    precision = s1dt_test_set_conf_mx[index,index]/sum(s1dt_test_set_conf_mx[:,index])
    recall = s1dt_test_set_conf_mx[index,index]/sum(s1dt_test_set_conf_mx[index,:])
    print(activity + " precision: " + str(precision))
    print(activity + " recall: " + str(recall))



'\ns1_test_set_s1dt =  s1_test_set.drop(columns=[\'time\',\'phase\',\'frequency\'])\ns1_test_set_s1dt_features = s1_test_set_s1dt.drop(columns=[\'activity\'])\ns1_test_set_s1dt_labels = s1_test_set_s1dt.drop(columns=[\'frontal\',\'vertical\',\'lateral\', \'id\', \'rssi\'])\n\nscaler = StandardScaler()\nscaler.fit(s1_test_set_s1dt_features)\ns1_test_set_s1dt_features = scaler.transform(s1_test_set_s1dt_features)\n\ns1_test_set_s1dt_predict = dt_clf.predict(s1_test_set_s1dt_features)\ns1dt_test_set_conf_mx = confusion_matrix(s1_test_set_s1dt_labels, s1_test_set_s1dt_predict)\nprint(s1dt_test_set_conf_mx)\nprint()\nfor index, activity in [(0, "sit on bed"), (1, "sit on chair"), (2, "lying"), (3, "ambulating")]:\n    precision = s1dt_test_set_conf_mx[index,index]/sum(s1dt_test_set_conf_mx[:,index])\n    recall = s1dt_test_set_conf_mx[index,index]/sum(s1dt_test_set_conf_mx[index,:])\n    print(activity + " precision: " + str(precision))\n    print(activity + " recall: " + str(recall))\n'