##### Imports the required libraries

In [1]:
""" Imports the required libraries """

# import tensorflow as tf
import keras
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dense,  Activation, Conv2D, MaxPool2D, MaxPooling2D
from keras.layers import Flatten, Dropout, BatchNormalization, Reshape
from keras.utils.vis_utils import plot_model

import os
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

Using TensorFlow backend.


##### Stores the path of the sensor files in the corresponding list

In [2]:
""" Stores the path of the sensor files in the corresponding list """

base_path = "./data/raw_data/"
phone_accel_file_paths = []
phone_gyro_file_paths = []
watch_accel_file_paths = []
watch_gyro_file_paths = []

for directories, subdirectories, files in os.walk(base_path):
    for filename in files:
        if "phone" in filename and "accel" in filename:
            phone_accel_file_paths.append(f"{base_path}phone/accel/{filename}")
        elif "phone" in filename and "gyro" in filename:
            phone_gyro_file_paths.append(f"{base_path}phone/gyro/{filename}")
        elif "watch" in filename and "accel" in filename:
            watch_accel_file_paths.append(f"{base_path}watch/accel/{filename}")
        elif "watch" in filename and "gyro" in filename:
            watch_gyro_file_paths.append(f"{base_path}watch/gyro/{filename}")

##### Stores the actual name of each activity in the dictionary

In [3]:
""" Stores the actual name of each activity in the dictionary """

activity_dict= {"A":"Walking",
                "B":"Jogging",
                "C":"Stairs",
                "D":"Sitting",
                "E":"Standing",
                "F":"Typing",
                "G":"Brushing",
                "H":"Eat Soup",
                "I":"Eat Chips",
                "J":"Eat Pasta",
                "K":"Drinking",
                "L":"Eat Sandwich",
                "M":"Kicking",
                "O":"Playing",
                "P":"Dribblinlg",
                "Q":"Writing",
                "R":"Clapping",
                "S":"Folding"}

##### Removes the columns "SubjectID" and "Timestamp" from the dataframe

In [4]:
def clean_data(dataframe):
    """ Removes the columns "SubjectID" and "Timestamp" from the dataframe """

    cleaned_df = dataframe.drop(["SubjectID", "Timestamp"], axis = 1).copy()

    return cleaned_df

##### Normalizes the data using StandardScaler() function

In [5]:
def scale_data(data):
    """ Normalizes the data using StandardScaler() function """
    
    le = LabelEncoder()
    data['ActivityCode'] = le.fit_transform(data['ActivityCode'])
    
    X = data[['X', 'Y', 'Z']]
    y = data['ActivityCode']
    
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    scaled_df = pd.DataFrame(data = X, columns = ['X', 'Y', 'Z'])
    scaled_df['ActivityCode'] = y.values
    
    return scaled_df

##### Decodes the activity labels and stores them in the dictionary

In [6]:
def activity_dictionary(dataframe):    
    """ Decodes the activity labels and stores them in the dictionary """

    activity_labels = dataframe["ActivityCode"]
    le = LabelEncoder()
    activity_indices = le.fit_transform(activity_labels)
    mapped_labels = dict(zip(le.transform(le.classes_), le.classes_))
    
    return mapped_labels

##### Preprocesses the data using balance(), clean(), and scale() functions

In [7]:
def preprocess_data(dataframe):    
    """ Preprocesses the data using balance(), clean(), and scale() functions """

    cleaned_df = clean_data(dataframe)
    
    return scale_data(cleaned_df)

##### Plots training & validation accuracy values

In [8]:
def plot_learningCurve(history, epochs):
    """ Plots training & validation accuracy values """

    epoch_range = range(1, epochs+1)
    plt.plot(epoch_range, history.history['accuracy'])
    plt.plot(epoch_range, history.history["val_accuracy"])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='lower right')
    plt.show()
    
    """ Plots training & validation loss values """
    
    plt.plot(epoch_range, history.history['loss'])
    plt.plot(epoch_range, history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper right')
    plt.show()

##### Divides data into 2D frames
###### To write this code cell, we used part of this tutorial: https://www.youtube.com/watch?v=lUI6VMj43PE&t=2112s

In [9]:
""" Divides data into 2D frames """

frequency = 20 # Based on Hertz
time_period = 10 # Based on Second
frame_size = frequency * time_period
step_size = frame_size # In order not to have an overlap

def get_frames(df, frame_size, step_size):
    n_features = 3
    frames = []
    labels = []
    for i in range(0, len(df) - frame_size, step_size):
        x = df['X'].values[i: i + frame_size]
        y = df['Y'].values[i: i + frame_size]
        z = df['Z'].values[i: i + frame_size]
        
        label = stats.mode(df['ActivityCode'][i: i + frame_size])[0][0]
        frames.append([x, y, z])
        labels.append(label)

    frames = np.asarray(frames).reshape(-1, frame_size, n_features)
    labels = np.asarray(labels)

    return frames, labels

##### Builds the model (the Convolutional Neural Network)

In [10]:
def get_model():
    """ Builds the model (the Convolutional Neural Network) """
    
    # Defines model
    model = Sequential()
    model.add(Conv2D(64, (2, 2), activation = 'relu', input_shape = X_train[0].shape))
#     model.add(Dropout(0.1))
#     model.add(Conv2D(64, (4, 2), activation='relu'))
#     model.add(Dropout(0.2))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(64, activation = 'relu'))
#     model.add(Dense(64, activation = 'relu'))
    model.add(Dropout(0.5))
    model.add(Dense(18, activation='softmax'))

    # Compiles model
    model.compile(optimizer=Adam(learning_rate = 0.001), 
                  loss = 'sparse_categorical_crossentropy', 
                  metrics = ['accuracy'])
    
    return model

## Phone Accelerometer

In [12]:
""" Phone Accelerometer """

phone_accel_accuracy = {}
phone_accel_precision = {}
phone_accel_recall = {}
phone_accel_f1 = {}

phone_accel_matrix = {}
phone_accel_activity_accuracy = {}
phone_accel_classification_reports={}

data = pd.concat(map(pd.read_csv, phone_accel_file_paths))

subjectIDs = data["SubjectID"].unique()

for subjectid in subjectIDs:
    activity_labels = list(activity_dictionary(data).values())
    
    train_data = data[data["SubjectID"] != subjectid]
    test_data = data[data["SubjectID"] == subjectid]

    processed_train_data = preprocess_data(train_data)
    processed_test_data = preprocess_data(test_data)

    X_train, y_train = get_frames(processed_train_data, frame_size, step_size)
    X_test, y_test = get_frames(processed_train_data, frame_size, step_size)
    
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)
    
    model = get_model()
        
    history = model.fit(X_train, y_train, batch_size=1, epochs=70, verbose=1)

    scores = model.evaluate(X_test, y_test, verbose=0)
    
    y_true = y_test
    y_pred = model.predict_classes(X_test, verbose=0)
    
    # Accuracy: (tp + tn) / (p + n)
    phone_accel_accuracy[subjectid] = accuracy_score(y_true, y_pred)

    # Precision tp / (tp + fp)
    precision = precision_score(y_true, y_pred, average=None, zero_division=1)
    phone_accel_precision[subjectid] = dict(zip(activity_labels, precision))

    # Recall: tp / (tp + fn)
    recall = recall_score(y_true, y_pred, average=None, zero_division=1)
    phone_accel_recall[subjectid] = dict(zip(activity_labels, recall))

    # F1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_true, y_pred, average=None, zero_division=1)
    phone_accel_f1[subjectid] = dict(zip(activity_labels, f1))
    
    phone_accel_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)
    
    matrix = confusion_matrix(y_test, y_pred)
    phone_accel_matrix[subjectid] = matrix
    
    accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)
    phone_accel_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))

## Phone Gyroscope

In [15]:
""" Phone Gyroscope """

phone_gyro_accuracy = {}
phone_gyro_precision = {}
phone_gyro_recall = {}
phone_gyro_f1 = {}

phone_gyro_matrix = {}
phone_gyro_activity_accuracy = {}
phone_gyro_classification_reports={}

data = pd.concat(map(pd.read_csv, phone_gyro_file_paths))

subjectIDs = data["SubjectID"].unique()

for subjectid in subjectIDs:
    activity_labels = list(activity_dictionary(data).values())
    
    train_data = data[data["SubjectID"] != subjectid]
    test_data = data[data["SubjectID"] == subjectid]

    processed_train_data = preprocess_data(train_data)
    processed_test_data = preprocess_data(test_data)

    X_train, y_train = get_frames(processed_train_data, frame_size, step_size)
    X_test, y_test = get_frames(processed_train_data, frame_size, step_size)
    
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)
    
    model = get_model()
        
    history = model.fit(X_train, y_train, batch_size=1, epochs=70, verbose=1)

    scores = model.evaluate(X_test, y_test, verbose=0)
    
    y_true = y_test
    y_pred = model.predict_classes(X_test, verbose=0)
    
    # Accuracy: (tp + tn) / (p + n)
    phone_gyro_accuracy[subjectid] = accuracy_score(y_true, y_pred)

    # Precision tp / (tp + fp)
    precision = precision_score(y_true, y_pred, average=None, zero_division=1)
    phone_gyro_precision[subjectid] = dict(zip(activity_labels, precision))

    # Recall: tp / (tp + fn)
    recall = recall_score(y_true, y_pred, average=None, zero_division=1)
    phone_gyro_recall[subjectid] = dict(zip(activity_labels, recall))

    # F1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_true, y_pred, average=None, zero_division=1)
    phone_gyro_f1[subjectid] = dict(zip(activity_labels, f1))
    
    phone_gyro_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)
    
    matrix = confusion_matrix(y_test, y_pred)
    phone_gyro_matrix[subjectid] = matrix
    
    accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)
    phone_gyro_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))

## Watch Accelerometer

In [20]:
""" Watch Accelerometer """

watch_accel_accuracy = {}
watch_accel_precision = {}
watch_accel_recall = {}
watch_accel_f1 = {}

watch_accel_matrix = {}
watch_accel_activity_accuracy = {}
watch_accel_classification_reports={}

data = pd.concat(map(pd.read_csv, watch_accel_file_paths))

subjectIDs = data["SubjectID"].unique()

for subjectid in subjectIDs:
    activity_labels = list(activity_dictionary(data).values())
    
    train_data = data[data["SubjectID"] != subjectid]
    test_data = data[data["SubjectID"] == subjectid]

    processed_train_data = preprocess_data(train_data)
    processed_test_data = preprocess_data(test_data)

    X_train, y_train = get_frames(processed_train_data, frame_size, step_size)
    X_test, y_test = get_frames(processed_train_data, frame_size, step_size)
    
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)
    
    model = get_model()
        
    history = model.fit(X_train, y_train, batch_size=1, epochs=70, verbose=1)

    scores = model.evaluate(X_test, y_test, verbose=0)
    
    y_true = y_test
    y_pred = model.predict_classes(X_test, verbose=0)
    
    # Accuracy: (tp + tn) / (p + n)
    watch_accel_accuracy[subjectid] = accuracy_score(y_true, y_pred)

    # Precision tp / (tp + fp)
    precision = precision_score(y_true, y_pred, average=None, zero_division=1)
    watch_accel_precision[subjectid] = dict(zip(activity_labels, precision))

    # Recall: tp / (tp + fn)
    recall = recall_score(y_true, y_pred, average=None, zero_division=1)
    watch_accel_recall[subjectid] = dict(zip(activity_labels, recall))

    # F1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_true, y_pred, average=None, zero_division=1)
    watch_accel_f1[subjectid] = dict(zip(activity_labels, f1))
    
    watch_accel_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)
    
    matrix = confusion_matrix(y_test, y_pred)
    watch_accel_matrix[subjectid] = matrix
    
    accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)
    watch_accel_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))

## Watch Gyroscope

In [22]:
""" Watch Gyroscope """

watch_gyro_accuracy = {}
watch_gyro_precision = {}
watch_gyro_recall = {}
watch_gyro_f1 = {}

watch_gyro_matrix = {}
watch_gyro_activity_accuracy = {}
watch_gyro_classification_reports={}

data = pd.concat(map(pd.read_csv, watch_gyro_file_paths))

subjectIDs = data["SubjectID"].unique()

for subjectid in subjectIDs:
    activity_labels = list(activity_dictionary(data).values())
    
    train_data = data[data["SubjectID"] != subjectid]
    test_data = data[data["SubjectID"] == subjectid]

    processed_train_data = preprocess_data(train_data)
    processed_test_data = preprocess_data(test_data)

    X_train, y_train = get_frames(processed_train_data, frame_size, step_size)
    X_test, y_test = get_frames(processed_train_data, frame_size, step_size)
    
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)
    
    model = get_model()
        
    history = model.fit(X_train, y_train, batch_size=1, epochs=70, verbose=1)

    scores = model.evaluate(X_test, y_test, verbose=0)
    
    y_true = y_test
    y_pred = model.predict_classes(X_test, verbose=0)
    
    # Accuracy: (tp + tn) / (p + n)
    watch_gyro_accuracy[subjectid] = accuracy_score(y_true, y_pred)

    # Precision tp / (tp + fp)
    precision = precision_score(y_true, y_pred, average=None, zero_division=1)
    watch_gyro_precision[subjectid] = dict(zip(activity_labels, precision))

    # Recall: tp / (tp + fn)
    recall = recall_score(y_true, y_pred, average=None, zero_division=1)
    watch_gyro_recall[subjectid] = dict(zip(activity_labels, recall))

    # F1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_true, y_pred, average=None, zero_division=1)
    watch_gyro_f1[subjectid] = dict(zip(activity_labels, f1))
    
    watch_gyro_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)
    
    matrix = confusion_matrix(y_test, y_pred)
    watch_gyro_matrix[subjectid] = matrix
    
    accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)
    watch_gyro_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))