# Dataset

The data was collected from 30 subjects aged between 19 and 48 years old performing one of six standard activities while wearing a waist-mounted smartphone that recorded the movement data. Video was recorded of each subject performing the activities and the movement data was labeled manually from these videos.

The six activities performed were as follows:
- Walking
- Walking Upstairs
- Walking Downstairs
- Sitting
- Standing
- Laying

# Develop an LSTM Network Model

In [None]:
# lstm model
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
from matplotlib import pyplot

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [19]:
# load a single file as a numpy array
def load_file(filepath):
    dataframe = read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values

In [6]:
# load a list of files into a 3D array of [samples, timesteps, features]
def load_group(filenames, prefix=''):
    loaded = list()
    for name in filenames:
        data = load_file(prefix + name)
        loaded.append(data)
    # stack group so that features are the 3rd dimension
    loaded = dstack(loaded)
    return loaded

In [7]:
# load a dataset group, such as train or test
def load_dataset_group(group, prefix=''):
    filepath = prefix + group + '/Inertial Signals/'
    # load all 9 files as a single array
    filenames = list()
    # total acceleration
    filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
    # body acceleration
    filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
    # body gyroscope
    filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
    # load input data
    X = load_group(filenames, filepath)
    # load class output
    y = load_file(prefix + group + '/y_'+group+'.txt')
    return X, y

The output data is defined as an integer for the class number. We must one hot encode these class integers so that the data is suitable for fitting a neural network multi-class classification model. We can do this by calling the `to_categorical()` Keras function.

In [8]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
    # load all train
    trainX, trainy = load_dataset_group('train', prefix + 'HARDataset/')
    print(trainX.shape, trainy.shape)
    # load all test
    testX, testy = load_dataset_group('test', prefix + 'HARDataset/')
    print(testX.shape, testy.shape)
    # zero-offset class values
    trainy = trainy - 1
    testy = testy - 1
    # one hot encode y
    trainy = to_categorical(trainy)
    testy = to_categorical(testy)
    print(trainX.shape, trainy.shape, testX.shape, testy.shape)
    return trainX, trainy, testX, testy

# Fit and Evaluate Model
We define `evaluate_model()` that takes the `train` and `test` dataset. It will fil the `train` dataset and evaluate the results on `test` dataset.

The LSTM model requires a three-dimensional input with `[samples, time steps, features]`.

In [9]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
    verbose, epochs, batch_size = 0, 15, 64
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    model = Sequential()
    model.add(LSTM(100, input_shape=(n_timesteps,n_features)))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    return accuracy

In [12]:
# summarize scores
def summarize_results(scores):
    print(scores)
    m, s = mean(scores), std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [13]:
# run an experiment
def run_experiment(repeats=10):
    # load data
    trainX, trainy, testX, testy = load_dataset()
    # repeat experiment
    scores = list()
    for r in range(repeats):
        score = evaluate_model(trainX, trainy, testX, testy)
        score = score * 100.0
        print('>#%d: %.3f' % (r+1, score))
        scores.append(score)
    # summarize results
    summarize_results(scores)

In [None]:
# run the experiment
run_experiment()

(7352, 128, 9) (7352, 1)
(2947, 128, 9) (2947, 1)
(7352, 128, 9) (7352, 6) (2947, 128, 9) (2947, 6)

>#1: 88.259


# Deconstructing above code

In [28]:
traindir = 'HARDataset/train/Inertial Signals/'

trainfiles = ['total_acc_x_train.txt',
 'total_acc_y_train.txt',
 'total_acc_z_train.txt',
 'body_acc_x_train.txt',
 'body_acc_y_train.txt',
 'body_acc_z_train.txt',
 'body_gyro_x_train.txt',
 'body_gyro_y_train.txt',
 'body_gyro_z_train.txt']

In [29]:
# load a single file as a numpy array
def load_file(filepath):
    dataframe = read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values

In [33]:
loaded = list()
for name in filenames:
    data = load_file(traindir + name)
    loaded.append(data)
# stack group so that features are the 3rd dimension
loaded = dstack(loaded)
loaded.shape

(7352, 128, 9)

In [31]:
# exploring single file
df = read_csv(traindir + trainfiles[0], header=None, delim_whitespace=True)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,1.012817,1.022833,1.022028,1.017877,1.02368,1.016974,1.017746,1.019263,1.016417,1.020745,...,1.020981,1.018065,1.019638,1.020017,1.018766,1.019815,1.01929,1.018445,1.019372,1.021171
1,1.018851,1.02238,1.020781,1.020218,1.021344,1.020522,1.01979,1.019216,1.018307,1.017996,...,1.019291,1.019258,1.020736,1.02095,1.020491,1.018685,1.01566,1.014788,1.016499,1.017849
2,1.023127,1.021882,1.019178,1.015861,1.012893,1.016451,1.020331,1.020266,1.021759,1.018649,...,1.020304,1.021516,1.019417,1.019312,1.019448,1.019434,1.019916,1.021041,1.022935,1.022019
3,1.017682,1.018149,1.019854,1.01988,1.019121,1.020479,1.020595,1.01634,1.010611,1.009013,...,1.021295,1.022934,1.022183,1.021637,1.020598,1.018887,1.019161,1.019916,1.019602,1.020735
4,1.019952,1.019616,1.020933,1.023061,1.022242,1.020867,1.021939,1.0223,1.022302,1.022254,...,1.022687,1.02367,1.019899,1.017381,1.020389,1.023884,1.021753,1.019425,1.018896,1.016787


In [32]:
df.values

array([[1.012817 , 1.022833 , 1.022028 , ..., 1.018445 , 1.019372 ,
        1.021171 ],
       [1.018851 , 1.02238  , 1.020781 , ..., 1.014788 , 1.016499 ,
        1.017849 ],
       [1.023127 , 1.021882 , 1.019178 , ..., 1.021041 , 1.022935 ,
        1.022019 ],
       ...,
       [0.7548917, 0.8043137, 0.831714 , ..., 0.6956257, 0.7479103,
        0.776768 ],
       [0.9279268, 0.9129872, 0.9246597, ..., 0.6753473, 0.6603377,
        0.719353 ],
       [0.7980909, 0.8192417, 0.8658821, ..., 0.8980947, 0.8283723,
        0.8002428]])

In [34]:
df.shape

(7352, 128)