### CISC 601 - Scientific Computing II
* Assignment: Artificial Neural Network (ANN) for activity recognition
* Data set and problem description: https://www.neuraldesigner.com/learning/examples/activity-recognition#DataSet

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
# import os
# os.chdir('/home/roman/Documents/HU/CISC601_ScientificComputingII/Assignments/ann')

Using TensorFlow backend.


In [2]:
# load data:
data = pd.read_csv('activity_recognition.csv', delimiter=';')
print(data.info())
X = data.iloc[:, :-1]
Y = pd.get_dummies(data['label'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10299 entries, 0 to 10298
Columns: 562 entries, tBodyAcc_mean_X to label
dtypes: float64(561), object(1)
memory usage: 44.2+ MB
None


In [None]:
# select most important features:
n_features = 50
feat_importances = []
for i in Y.columns:
    clf = GradientBoostingClassifier()
    clf.fit(X, Y.loc[:, i])
    print('ran GBM done with target variable: ', i)
    feat_importances.append(pd.Series(clf.feature_importances_, index=X.columns))
features = pd.concat(feat_importances, axis=1).sum(axis=1).sort_values(ascending=False)[:n_features]

ran GBM done with target variable:  LAYING
ran GBM done with target variable:  SITTING
ran GBM done with target variable:  STANDING
ran GBM done with target variable:  WALKING
ran GBM done with target variable:  WALKING_DOWNSTAIRS


In [None]:
# prepare data set:
X = X[features.index]
df = pd.concat([X, Y], axis=1)
X_train, X_val_test, Y_train, Y_val_test = train_test_split(X, Y, test_size=0.4, random_state=123)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_test, Y_val_test, test_size=0.5, random_state=123)

In [None]:
# define ANN:
def ann(indput_dim, output_dim, dropout=0.2):
    model = Sequential([
        Dense(units=60, kernel_initializer='uniform', input_dim=indput_dim, activation='tanh'),
        Dropout(dropout),
        Dense(units=60, kernel_initializer='uniform', activation='tanh'),
        Dropout(dropout),
        Dense(units=25, kernel_initializer='uniform', activation='tanh'),
        Dropout(dropout),
#         Dense(15, kernel_initializer='uniform', activation='tanh'),
        Dense(output_dim, kernel_initializer='uniform', activation='softmax')
    ])
    return model

In [None]:
# fit model:
filepath = 'weights.best.hdf5'
checkpoints = [ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')]
clf = ann(
    indput_dim=X_train.shape[1], 
    output_dim=Y_train.shape[1], 
    dropout=0.2
)
clf.compile(
    optimizer=Adam(lr = 1e-3), 
    loss='binary_crossentropy', 
    metrics=['accuracy']
)
history = clf.fit(
    x=X_train, 
    y=Y_train, 
    validation_data=(X_val,Y_val),
    callbacks=checkpoints,
    batch_size=10, 
    epochs=300, 
    verbose=1
)

In [None]:
# plot history of training and validation loss:
train_loss = history.history['loss']
val_loss = history.history['val_loss']
plt.rcParams.update({'figure.figsize': [8, 5.5], 'font.size': 16})
plt.plot(train_loss, label='training loss')
plt.plot(val_loss, label='validation loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Training vs validation loss after each epoch')
plt.legend()
plt.show()

In [None]:
# predict
clf.load_weights('weights.best.hdf5')
Y_pred_train = clf.predict_classes(X_train, verbose=1)
Y_pred_val = clf.predict_classes(X_val, verbose=1)
Y_pred_test = clf.predict_classes(X_test, verbose=1)

In [None]:
# transform Y_true and Y_pred to classes:
classes = {}
for i in range(len(Y.columns)):
    classes[i] = Y.columns[i]
Y_pred_train_classes = [classes[i] for i in Y_pred_train]
Y_pred_val_classes = [classes[i] for i in Y_pred_val]
Y_pred_test_classes = [classes[i] for i in Y_pred_test]
Y_train_classes = Y_train[Y_train==1].stack().reset_index().drop(0,1)['level_1']
Y_val_classes = Y_val[Y_val==1].stack().reset_index().drop(0,1)['level_1']
Y_test_classes = Y_test[Y_test==1].stack().reset_index().drop(0,1)['level_1']

In [None]:
# evaluate accuracies:
train_accuracy = accuracy_score(Y_train_classes, Y_pred_train_classes)
val_accuracy = accuracy_score(Y_val_classes, Y_pred_val_classes)
test_accuracy = accuracy_score(Y_test_classes, Y_pred_test_classes)
print('training accuracy: ', train_accuracy)
print('validation accuracy: ', val_accuracy)
print('test accuracy: ', test_accuracy)