In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import metrics

import os


# Useful Constants

# Those are separate normalised input features for the neural network
INPUT_SIGNAL_TYPES = [
    "body_acc_x_",
    "body_acc_y_",
    "body_acc_z_",
    "body_gyro_x_",
    "body_gyro_y_",
    "body_gyro_z_",
    "total_acc_x_",
    "total_acc_y_",
    "total_acc_z_"
]

# Output classes to learn how to classify
LABELS = [
    "WALKING", 
    "WALKING_UPSTAIRS", 
    "WALKING_DOWNSTAIRS", 
    "SITTING", 
    "STANDING", 
    "LAYING"
] 


# ## Let's start by downloading the data: 


import os
# Note: Linux bash commands start with a "!" inside those "ipython notebook" cells

DATA_PATH = "./"

get_ipython().system('python download_dataset.py')

DATASET_PATH = DATA_PATH + "UCI HAR Dataset/"
print("\n" + "Dataset is now located at: " + DATASET_PATH)


# ## Preparing dataset:


TRAIN = "train/"
TEST = "test/"


# Load "X" (the neural network's training and testing inputs)

def load_X(X_signals_paths):
    X_signals = []
    
    for signal_type_path in X_signals_paths:
        file = open(signal_type_path, 'r')
        # Read dataset from disk, dealing with text files' syntax
        X_signals.append(
            [np.array(serie, dtype=np.float32) for serie in [
                row.replace('  ', ' ').strip().split(' ') for row in file
            ]]
        )
        file.close()
    
    return np.transpose(np.array(X_signals), (1, 2, 0))

X_train_signals_paths = [
    DATASET_PATH + TRAIN + "Inertial Signals/" + signal + "train.txt" for signal in INPUT_SIGNAL_TYPES
]
X_test_signals_paths = [
    DATASET_PATH + TEST + "Inertial Signals/" + signal + "test.txt" for signal in INPUT_SIGNAL_TYPES
]

X_train = load_X(X_train_signals_paths)
X_test = load_X(X_test_signals_paths)


# Load "y" (the neural network's training and testing outputs)

def load_y(y_path):
    file = open(y_path, 'r')
    # Read dataset from disk, dealing with text file's syntax
    y_ = np.array(
        [elem for elem in [
            row.replace('  ', ' ').strip().split(' ') for row in file
        ]], 
        dtype=np.int32
    )
    file.close()
    
    # Substract 1 to each output class for friendly 0-based indexing 
    return y_ - 1

y_train_path = DATASET_PATH + TRAIN + "y_train.txt"
y_test_path = DATASET_PATH + TEST + "y_test.txt"

y_train = load_y(y_train_path)
y_test = load_y(y_test_path)


# ## Additionnal Parameters:
# 
# Here are some core parameter definitions for the training. 
# 
# The whole neural network's structure could be summarised by enumerating those parameters and the fact an LSTM is used. 
# dropout=0.2, recurrent_dropout=0.2
# nb_epoch=5, batch_size=64
# 
# Keras LSTM with dropout is used.
# A convolutional layer and max pooling can also be tested, before the LSTM cell.

# In[5]:



# Some debugging info

print("Some useful info to get an insight on dataset's shape and normalisation:")
print("(X shape, y shape, every X's mean, every X's standard deviation)")
print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test))
print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")


Downloading...
Dataset already downloaded. Did not download twice.

Extracting...
Dataset already extracted. Did not extract twice.


Dataset is now located at: ../data/UCI HAR Dataset/
Some useful info to get an insight on dataset's shape and normalisation:
(X shape, y shape, every X's mean, every X's standard deviation)
(2947, 128, 9) (2947, 1) 0.0991399 0.395671
The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.


In [None]:
'''
from keras.models import Sequential
from keras.layers import LSTM, Dense, Conv1D, MaxPooling1D
import keras.utils as np_utils
# truncate and pad input sequences
# create the model
model = Sequential()

# optional:
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu', input_shape = (128,9)))
model.add(MaxPooling1D(pool_size=2))

# Keras v1:
# model.add(LSTM(128, input_shape = (128,9), dropout=0.2, recurrent_dropout=0.2))
# model.add(LSTM(128, input_shape = (128,9), dropout_W=0.2, dropout_U=0.2))
model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2))
model.add(Dense(6, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, np_utils.to_categorical(y_train), nb_epoch=5, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, np_utils.to_categorical(y_test), verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))


# Results
accuracy = scores[1]

predictions = model.predict(X_test).argmax(1)

print("Testing Accuracy: {}%".format(100*accuracy))

print("")
'''

In [2]:
batch_size = 64
from mxnet import gluon
train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X_train,y_train),
                                      batch_size, shuffle=True)
test_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X_test,y_test),
                                     batch_size, shuffle=False)



In [10]:
epochs = 10

import mxnet as mx
from mxnet import gluon, autograd, nd
from mxnet.gluon import nn, rnn

net = mx.gluon.nn.Sequential()
with net.name_scope():
    net.add(mx.gluon.nn.Conv1D(channels=32, kernel_size=3, activation='relu'))
    net.add(mx.gluon.nn.MaxPool1D(pool_size=2)) # , strides=2)
    net.add(mx.gluon.rnn.LSTM(128, 1))#, dropout = 0.2
    net.add(mx.gluon.nn.Dense(6, flatten=True, activation='relu'))
#net.initialize()
#net(mx.nd.ones((2,3)))

ctx = mx.gpu(0)
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'Adam')#, {'learning_rate': .1})

def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

import time
smoothing_constant = .01


for e in range(epochs):
    start = time.time()
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        
        # commented because they slow down the training
        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0)) 
                       else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
        
    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))  
    print(time.time()-start)
    print('seconds per epoch')
    
    

Epoch 0. Loss: 1.28355383553, Train_acc 0.669477693145, Test_acc 0.635900916186
14.74898624420166
seconds per epoch
Epoch 1. Loss: 0.8503383417, Train_acc 0.765505984766, Test_acc 0.722090261283
14.444300413131714
seconds per epoch
Epoch 2. Loss: 0.644945110392, Train_acc 0.791077257889, Test_acc 0.744825246013
15.711294412612915
seconds per epoch
Epoch 3. Loss: 0.540760662778, Train_acc 0.796517954298, Test_acc 0.753987105531
15.75019907951355
seconds per epoch
Epoch 4. Loss: 0.492776917981, Train_acc 0.801006528836, Test_acc 0.747200542925
15.277543306350708
seconds per epoch
Epoch 5. Loss: 0.463150467495, Train_acc 0.811887921654, Test_acc 0.767220902613
16.257535934448242
seconds per epoch
Epoch 6. Loss: 0.446246977299, Train_acc 0.810119695321, Test_acc 0.771971496437
15.51608920097351
seconds per epoch
Epoch 7. Loss: 0.431683437547, Train_acc 0.819368879217, Test_acc 0.781472684086
16.275869607925415
seconds per epoch
Epoch 8. Loss: 0.423386925614, Train_acc 0.818144722524, Test_

In [14]:

output = net(X_test)
predictions=nd.argmax(output, axis=1)
print("Precision: {}%".format(100*metrics.precision_score(y_test, predictions, average="weighted")))
print("Recall: {}%".format(100*metrics.recall_score(y_test, predictions, average="weighted")))
print("f1_score: {}%".format(100*metrics.f1_score(y_test, predictions, average="weighted")))

print("")
print("Confusion Matrix:")
confusion_matrix = metrics.confusion_matrix(y_test, predictions)
print(confusion_matrix)
normalised_confusion_matrix = np.array(confusion_matrix, dtype=np.float32)/np.sum(confusion_matrix)*100

print("")
print("Confusion matrix (normalised to % of total test data):")
print(normalised_confusion_matrix)
print("Note: training and testing data is not equally distributed amongst classes, ")
print("so it is normal that more than a 6th of the data is correctly classifier in the last category.")

# Plot Results: 
width = 12
height = 12
plt.figure(figsize=(width, height))
plt.imshow(
    normalised_confusion_matrix, 
    interpolation='nearest', 
    cmap=plt.cm.rainbow
)
plt.title("Confusion matrix \n(normalised to % of total test data)")
plt.colorbar()
n_classes = 6 # Total classes 
tick_marks = np.arange(n_classes)
plt.xticks(tick_marks, LABELS, rotation=90)
plt.yticks(tick_marks, LABELS)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

AssertionError: HybridBlock requires the first argument to forward be either Symbol or NDArray, but got <class 'numpy.ndarray'>