In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

In [71]:
# utility function to print confusion matrix
def confusion_mat(y_true,y_pred):
    y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(y_pred, axis=1)])
    return pd.crosstab(y_true,y_pred)

# Data imports

In [4]:
datadir = 'UCI_HAR_Dataset'

In [5]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [6]:
# utility function to read the data

def _read_csv(filename):
    df = pd.read_csv(filename, delim_whitespace=True,header=None)
    return df

In [23]:
# utility function to load the data 
def load_signal(subset):
    signal_data = []
    for signal in SIGNALS:
        filename = f'UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signal_data.append(_read_csv(filename).to_numpy()
                          )
    return np.transpose(signal_data,(1,2,0))

In [24]:
def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]
    return pd.get_dummies(y).to_numpy()
    

In [14]:
def load_data():
    x_train,x_test = load_signal('train'), load_signal('test')
    y_train,y_test = load_y('train'), load_y('test')
    return x_train,x_test,y_train,y_test    

In [17]:
# Importing tensorflow
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense, Dropout, LSTM
from keras.models import Sequential

In [18]:
# Initializing parameters
epochs = 30
batch_size = 16
n_hidden = 32

In [19]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [25]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

In [34]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


In [35]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.5))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 32)                5376      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense (Dense)               (None, 6)                 198       
                                                                 
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [36]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='Adam',
              metrics=['accuracy'])


In [37]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x29f5c159f30>

In [54]:
y_pred = model.predict(X_test)



In [73]:
confusion_mat(Y_test,y_pred)

col_0,LAYING,SITTING,STANDING,WALKING,WALKING_DOWNSTAIRS,WALKING_UPSTAIRS
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAYING,510,0,0,0,0,27
SITTING,0,363,118,1,0,9
STANDING,0,63,465,4,0,0
WALKING,0,0,0,424,27,45
WALKING_DOWNSTAIRS,0,0,0,1,416,3
WALKING_UPSTAIRS,0,0,0,1,20,450


In [74]:
score = model.evaluate(X_test, Y_test)



In [78]:
print('accuracy of the nominal model is:',score[1])
print('loss of the nominal model is:',score[0])

accuracy of the nominal model is: 0.8917543292045593
loss of the nominal model is: 0.39546895027160645


# Conclusion
    ** With a simple 2 layer architecture we got 90.09% accuracy and a loss of 0.30 **
    ** We can further imporve the performace with Hyperparameter tuning **