In [1]:
# Importing Libraries

In [152]:
import pandas as pd
import numpy as np
from keras.models import Sequential, load_model
from keras.layers import LSTM
from keras.layers.core import Dense, Dropout
import pickle
from sklearn.metrics import accuracy_score

In [2]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

### Data

In [3]:
# Data directory
DATADIR = 'UCI_HAR_Dataset'

In [4]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [5]:
# Utility function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

# Utility function to load the load
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = f'UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).as_matrix()
        ) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [6]:

def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).as_matrix()

In [7]:
def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [8]:
# Importing tensorflow
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)

In [9]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

In [10]:
# Import Keras
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

In [11]:
# Initializing parameters
epochs = 30
batch_size = 16
n_hidden = 32

In [12]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [13]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

  if sys.path[0] == '':
  # This is added back by InteractiveShellApp.init_path()


In [19]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


- Defining the Architecture of LSTM

In [20]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.5))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 32)                5376      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 198       
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [22]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [23]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x29b5ee36a20>

In [24]:
# Confusion Matrix
print(confusion_matrix(Y_test, model.predict(X_test)))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 512        0        25        0                   0   
SITTING                  3      410        75        0                   0   
STANDING                 0       87       445        0                   0   
WALKING                  0        0         0      481                   2   
WALKING_DOWNSTAIRS       0        0         0        0                 382   
WALKING_UPSTAIRS         0        0         0        2                  18   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                             0  
SITTING                            3  
STANDING                           0  
WALKING                           13  
WALKING_DOWNSTAIRS                38  
WALKING_UPSTAIRS                 451  


In [27]:
score = model.evaluate(X_test, Y_test)



In [28]:
score

[0.3087582236972612, 0.9097387173396675]

- With a simple 2 layer architecture we got 90.09% accuracy and a loss of 0.30
- We can further imporve the performace with Hyperparameter tuning

## LSTM models with 9 hidden units

In [21]:
n_hidden = 9

In [22]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.5))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 9)                 684       
_________________________________________________________________
dropout_2 (Dropout)          (None, 9)                 0         
_________________________________________________________________
dense_2 (Dense)              (None, 6)                 60        
Total params: 744
Trainable params: 744
Non-trainable params: 0
_________________________________________________________________


In [23]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [24]:
# Training the model
model_result = model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Instructions for updating:
Use tf.cast instead.
Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [25]:
# Confusion Matrix
print(confusion_matrix(Y_test, model.predict(X_test)))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 510        0         5        0                   0   
SITTING                  1      398        92        0                   0   
STANDING                 0      103       420        1                   0   
WALKING                  0        0        13      355                  39   
WALKING_DOWNSTAIRS       1        0         1      320                  86   
WALKING_UPSTAIRS         0        0         0       75                  18   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                            22  
SITTING                            0  
STANDING                           8  
WALKING                           89  
WALKING_DOWNSTAIRS                12  
WALKING_UPSTAIRS                 378  


In [26]:
score = model.evaluate(X_test, Y_test)



In [27]:
score

[0.6751788828571267, 0.7285374957583983]

- with fewer number of hidden units and dropout of 0.5, getting accuracy of 72%

## Model having 32 hidden units and 0.3 dropout rate

In [98]:
n_hidden = 32


# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.3))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_23 (LSTM)               (None, 32)                5376      
_________________________________________________________________
dropout_14 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_12 (Dense)             (None, 6)                 198       
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [99]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [100]:
# Training the model
model_result = model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [101]:
# Confusion Matrix
print(confusion_matrix(Y_test, model.predict(X_test)))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 537        0         0        0                   0   
SITTING                  0      412        76        0                   2   
STANDING                 0      111       420        0                   0   
WALKING                  0        0         0      446                  13   
WALKING_DOWNSTAIRS       0        0         0        3                 347   
WALKING_UPSTAIRS         0        1         0        1                  15   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                             0  
SITTING                            1  
STANDING                           1  
WALKING                           37  
WALKING_DOWNSTAIRS                70  
WALKING_UPSTAIRS                 454  


In [102]:
score = model.evaluate(X_test, Y_test)



In [103]:
score

[0.4676143296350844, 0.8876823888700374]

## Model with 2 LSTM layers

In [111]:
n_hidden = 20

In [112]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim), return_sequences=True))
model.add(LSTM(n_hidden))
# Adding a dropout layer
model.add(Dropout(0.6))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

Model: "sequential_20"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_26 (LSTM)               (None, 128, 20)           2400      
_________________________________________________________________
lstm_27 (LSTM)               (None, 20)                3280      
_________________________________________________________________
dropout_16 (Dropout)         (None, 20)                0         
_________________________________________________________________
dense_14 (Dense)             (None, 6)                 126       
Total params: 5,806
Trainable params: 5,806
Non-trainable params: 0
_________________________________________________________________


In [113]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [114]:
# Training the model
model_result = model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=40)

Train on 7352 samples, validate on 2947 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [115]:
# Confusion Matrix
print(confusion_matrix(Y_test, model.predict(X_test)))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 510        0        27        0                   0   
SITTING                  0      416        67        1                   0   
STANDING                 0       95       433        4                   0   
WALKING                  0        0         0      438                   0   
WALKING_DOWNSTAIRS       0        0         0        0                 367   
WALKING_UPSTAIRS         0        1         0        3                   2   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                             0  
SITTING                            7  
STANDING                           0  
WALKING                           58  
WALKING_DOWNSTAIRS                53  
WALKING_UPSTAIRS                 465  


In [117]:
score = model.evaluate(X_test, Y_test)



In [118]:
score

[0.39639775041275266, 0.8920936545639634]

# Using divide an conquer 1DCNN

Reference
https://app.dimensions.ai/details/publication/pub.1101891256#readcube-epdf

Classification is divided into 2 stages:
- First stage consist of decision tree classifier where it classifies wheter activity is static or dynamic.
- Second stage consist of 2 models(1DCNN based) which predicts following:
    - if first stage predicts static: then this stage predict whether it's LAYING, SITTING, STANDING
    - if first stage predicts dynamic: then this stage predict whether it's WALKING, UP_STAIRS, DONW_STAIRS

## getting data

In [127]:
train = pd.read_csv('UCI_HAR_dataset/csv_files/train.csv')
test = pd.read_csv('UCI_HAR_dataset/csv_files/test.csv')
print(train.shape, test.shape)

(7352, 564) (2947, 564)


In [134]:
# get X_train and y_train from csv files
X_train = train.drop(['subject', 'Activity', 'ActivityName'], axis=1)
y_train = train.Activity

# get X_test and y_test from test csv file
X_test = test.drop(['subject', 'Activity', 'ActivityName'], axis=1)
y_test = test.Activity

### Loading Saved Models

In [122]:
first_stage_model = pickle.load(open('trained_models/first_stage_model.sav', 'rb'))

In [130]:
dynamic_model_path = "trained_models/dynamic_hra_model.h5"
dynamic_model = load_model(dynamic_model_path)

In [131]:
static_model_path = "trained_models/static_hra_model.h5"
static_model = load_model(static_model_path)

In [155]:
y_pred = []
for i in range(len(X_test)):
    first_stage_op = first_stage_model.predict([X_test.loc[i]])[0]
    if first_stage_op == 0: #it's dynamic activity
        predicted_values_prob = dynamic_model.predict(np.expand_dims([X_test.loc[i]], axis=2))
        predicted_value = np.argmax(predicted_values_prob[0])
        predicted_value += 1
        y_pred.append(predicted_value)
    else:
        predicted_values_prob = static_model.predict(np.expand_dims([X_test.loc[i]], axis=2))
        predicted_value = np.argmax(predicted_values_prob[0])
        predicted_value += 4
        y_pred.append(predicted_value)

In [156]:
accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
print(accuracy)

0.9636918900576857


In [161]:
Y_true = pd.Series([ACTIVITIES[y-1] for y in y_test])
Y_pred = pd.Series([ACTIVITIES[y-1] for y in y_pred])

print(pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred']))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 537        0         0        0                   0   
SITTING                  0      463        28        0                   0   
STANDING                 0       36       496        0                   0   
WALKING                  0        0         0      496                   0   
WALKING_DOWNSTAIRS       0        0         0        1                 416   
WALKING_UPSTAIRS         0        0         0       36                   3   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                             0  
SITTING                            0  
STANDING                           0  
WALKING                            0  
WALKING_DOWNSTAIRS                 3  
WALKING_UPSTAIRS                 432  


In [162]:
# Please compare all your models using Prettytable library
from prettytable import PrettyTable

table = PrettyTable();
table.field_names = ['Model', 'Accuracy'];
table.add_row(['LSTM Layers(2), Dropout(0.5), hidden_units(32)', '90'])
table.add_row(['LSTM Layers(1), Dropout(0.5), hidden_units(9)', '72.8'])
table.add_row(['LSTM Layers(1), Dropout(0.3), hidden_units(32)', '88.7'])
table.add_row(['LSTM Layers(2), Dropout(0.6), hidden_units(20)', '89.2'])
table.add_row(['Divide and conquer CNN', '96.36'])

print(table)

+------------------------------------------------+----------+
|                     Model                      | Accuracy |
+------------------------------------------------+----------+
| LSTM Layers(2), Dropout(0.5), hidden_units(32) |    90    |
| LSTM Layers(1), Dropout(0.5), hidden_units(9)  |   72.8   |
| LSTM Layers(1), Dropout(0.3), hidden_units(32) |   88.7   |
| LSTM Layers(2), Dropout(0.6), hidden_units(20) |   89.2   |
|             Divide and conquer CNN             |  96.36   |
+------------------------------------------------+----------+
