In [1]:

# -*- coding: utf-8 -*-
"""
This is the script used to train an activity recognition 
classifier on accelerometer data.

"""

import os
import sys
import numpy as np
from sklearn.tree import export_graphviz
from features import extract_features
from util import slidingWindow, reorient, reset_vars
import pickle


# %%---------------------------------------------------------------------------
#
#		                 Load Data From Disk
#
# -----------------------------------------------------------------------------

print("Loading data...")
sys.stdout.flush()
data_file = 'my-activity-data.csv'
data = np.genfromtxt(data_file, delimiter=',')
data = data[~np.isnan(data).any(axis=1)]
print("Loaded {} raw labelled activity data samples.".format(len(data)))
sys.stdout.flush()

# %%---------------------------------------------------------------------------
#
#		                    Pre-processing
#
# -----------------------------------------------------------------------------

print("Reorienting accelerometer data...")
sys.stdout.flush()
reset_vars()
reoriented = np.asarray([reorient(data[i,1], data[i,2], data[i,3]) for i in range(len(data))])
reoriented_data_with_timestamps = np.append(data[:,0:1],reoriented,axis=1)
data = np.append(reoriented_data_with_timestamps, data[:,-1:], axis=1)

# %%---------------------------------------------------------------------------
#
#		                Extract Features & Labels
#
# -----------------------------------------------------------------------------

window_size = 20
step_size = 20

# sampling rate should be about 25 Hz; you can take a brief window to confirm this
n_samples = 1000
time_elapsed_seconds = (data[n_samples,0] - data[0,0]) / 1000
sampling_rate = n_samples / time_elapsed_seconds

# TODO: list the class labels that you collected data for in the order of label_index (defined while collecting data)
class_names = ["cycling", "hopping", "sitting", "walking"] #...

print("Extracting features and labels for window size {} and step size {}...".format(window_size, step_size))
sys.stdout.flush()

X = []
Y = []

for i,window_with_timestamp_and_label in slidingWindow(data, window_size, step_size):
    window = window_with_timestamp_and_label[:,1:-1] 
    feature_names, x = extract_features(window)
    X.append(x)
    Y.append(window_with_timestamp_and_label[10, -1])

X = np.asarray(X)
Y = np.asarray(Y)

n_features = len(X)
    
print("Finished feature extraction over {} windows".format(len(X)))
print("Unique labels found: {}".format(set(Y)))
print("\n")
sys.stdout.flush()

Loading data...
Loaded 146927 raw labelled activity data samples.
Reorienting accelerometer data...
Extracting features and labels for window size 20 and step size 20...


  sampling_rate = n_samples / time_elapsed_seconds
  entropyy = -(data*np.log(np.abs(data)))
  entropyy = -(data*np.log(np.abs(data)))
  return np.mean(np.fft.rfft(np.sqrt(window[0]**2+window[1]**2+window[2]**2), axis = 0).astype(float))


Finished feature extraction over 7346 windows
Unique labels found: {1.0, 2.0, 3.0, 4.0}




In [2]:
# %%---------------------------------------------------------------------------
#
#		                Train & Evaluate Classifier
#
# -----------------------------------------------------------------------------


# TODO: split data into train and test datasets using 10-fold cross validation

from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score

cv = KFold(n_splits=10, random_state=None, shuffle=True)


# """
# TODO: iterating over each fold, fit a decision tree classifier on the training set.
# Then predict the class labels for the test set and compute the confusion matrix
# using predicted labels and ground truth values. Print the accuracy, precision and recall
# for each fold.
# """

accuracy = precision = recall = 0
# X = X[~np.isnan(X).any(axis=1)]
print(len(X))

for train_index, test_index in cv.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = Y[train_index], Y[test_index]
    tree = DecisionTreeClassifier(criterion="entropy", max_depth=4)
    tree.fit(X_train, y_train)
    y_pred = tree.predict(X_test)
#     print(y_pred)
    from collections import Counter
    print(Counter(y_pred))
    conf = confusion_matrix(y_test, y_pred)
    accuracy += accuracy_score(y_test, y_pred)
    precision += precision_score(y_test, y_pred, average=None)
    recall += recall_score(y_test, y_pred, average=None)

print("Accuracy: ", accuracy/10)
print("Precision: ", precision/10)
print("Recall: ", recall/10)

# # TODO: calculate and print the average accuracy, precision and recall values over all 10 folds
# TODO: train the decision tree classifier on entire dataset
# TODO: Save the decision tree visualization to disk - replace 'tree' with your decision tree and run the below line



export_graphviz(tree, out_file='tree.dot', feature_names = feature_names)




# TODO: Save the classifier to disk - replace 'tree' with your decision tree and run the below line


with open('classifier.pickle', 'wb') as f:
    pickle.dump(tree, f)

7346
Counter({2.0: 217, 1.0: 189, 3.0: 171, 4.0: 158})
Counter({2.0: 209, 1.0: 198, 3.0: 166, 4.0: 162})
Counter({2.0: 219, 3.0: 192, 1.0: 183, 4.0: 141})
Counter({2.0: 225, 3.0: 184, 1.0: 178, 4.0: 148})
Counter({1.0: 213, 2.0: 197, 3.0: 187, 4.0: 138})
Counter({2.0: 216, 3.0: 189, 1.0: 178, 4.0: 152})
Counter({2.0: 212, 1.0: 198, 3.0: 166, 4.0: 158})
Counter({2.0: 206, 1.0: 196, 3.0: 193, 4.0: 139})
Counter({1.0: 208, 2.0: 208, 3.0: 173, 4.0: 145})
Counter({2.0: 202, 1.0: 199, 4.0: 177, 3.0: 156})
Accuracy:  0.9115166175462012
Precision:  [0.90947842 0.83926468 0.99322897 0.91775624]
Recall:  [0.94718324 0.97210865 0.97214152 0.75497253]


In [3]:
# Last Part

In [4]:
print("Loading data...")
sys.stdout.flush()
data_file = 'testdata.csv'
data = np.genfromtxt(data_file, delimiter=',')
data = data[~np.isnan(data).any(axis=1)]
print("Loaded {} raw labelled activity data samples.".format(len(data)))
sys.stdout.flush()

# %%---------------------------------------------------------------------------
#
#		                    Pre-processing
#
# -----------------------------------------------------------------------------

print("Reorienting accelerometer data...")
sys.stdout.flush()
reset_vars()
reoriented = np.asarray([reorient(data[i,1], data[i,2], data[i,3]) for i in range(len(data))])
reoriented_data_with_timestamps = np.append(data[:,0:1],reoriented,axis=1)
data = np.append(reoriented_data_with_timestamps, data[:,-1:], axis=1)

window_size = 5
step_size = 5

# sampling rate should be about 25 Hz; you can take a brief window to confirm this
n_samples = 1000
time_elapsed_seconds = (data[n_samples,0] - data[0,0]) / 1000
sampling_rate = n_samples / time_elapsed_seconds

# TODO: list the class labels that you collected data for in the order of label_index (defined while collecting data)
class_names = ["cycling", "hopping", "sitting", "walking"] #...

print("Extracting features and labels for window size {} and step size {}...".format(window_size, step_size))
sys.stdout.flush()

X = []

for i,window_with_timestamp_and_label in slidingWindow(data, window_size, step_size):
    window = window_with_timestamp_and_label[:,2:]
    feature_names, x = extract_features(window)
    X.append(x)

X = np.asarray(X)

n_features = len(X)
    
print("Finished feature extraction over {} windows".format(len(X)))
print("\n")
sys.stdout.flush()

Loading data...
Loaded 12041 raw labelled activity data samples.
Reorienting accelerometer data...
Extracting features and labels for window size 5 and step size 5...


  entropyy = -(data*np.log(np.abs(data)))
  entropyy = -(data*np.log(np.abs(data)))
  return np.mean(np.fft.rfft(np.sqrt(window[0]**2+window[1]**2+window[2]**2), axis = 0).astype(float))


Finished feature extraction over 2408 windows




In [5]:
loaded_model = pickle.load(open('classifier.pickle', 'rb'))
# result = loaded_model.score(X_test, y_test)
prediction = loaded_model.predict(X)
print(len(prediction))

2408


In [6]:
from collections import Counter
print(Counter(prediction))

Counter({3.0: 1128, 1.0: 983, 2.0: 286, 4.0: 11})
