In [1]:
# All Includes

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf  # Version 1.0.0 (some previous versions are used in past commits)
from sklearn import metrics

import os

In [2]:
# Useful Constants

# Those are separate normalised input features for the neural network
INPUT_SIGNAL_TYPES = [
    "body_acc_x_",
    "body_acc_y_",
    "body_acc_z_",
    "body_gyro_x_",
    "body_gyro_y_",
    "body_gyro_z_",
    "total_acc_x_",
    "total_acc_y_",
    "total_acc_z_"
]

# Output classes to learn how to classify
LABELS = [
    "WALKING", 
    "WALKING_UPSTAIRS", 
    "WALKING_DOWNSTAIRS", 
    "SITTING", 
    "STANDING", 
    "LAYING"
] 


In [3]:
# Note: Linux bash commands start with a "!" inside those "ipython notebook" cells

DATA_PATH = "data/"

!pwd && ls
os.chdir(DATA_PATH)
!pwd && ls

!python download_dataset.py

!pwd && ls
os.chdir("..")
!pwd && ls

DATASET_PATH = DATA_PATH + "UCI HAR Dataset/"
print("\n" + "Dataset is now located at: " + DATASET_PATH)


/home/msq-4/ML/Level2/Human-Activity-Recognition-Using-RNN-LSTM
 data  'Human Activity Recognition Using RNN LSTM.ipynb'   README.md
/home/msq-4/ML/Level2/Human-Activity-Recognition-Using-RNN-LSTM/data
 download_dataset.py   __MACOSX    'UCI HAR Dataset'
 launch.sh	       source.txt  'UCI HAR Dataset.zip'

Downloading...
Dataset already downloaded. Did not download twice.

Extracting...
Dataset already extracted. Did not extract twice.

/home/msq-4/ML/Level2/Human-Activity-Recognition-Using-RNN-LSTM/data
 download_dataset.py   __MACOSX    'UCI HAR Dataset'
 launch.sh	       source.txt  'UCI HAR Dataset.zip'
/home/msq-4/ML/Level2/Human-Activity-Recognition-Using-RNN-LSTM
 data  'Human Activity Recognition Using RNN LSTM.ipynb'   README.md

Dataset is now located at: data/UCI HAR Dataset/


In [4]:
TRAIN = "train/"
TEST = "test/"


# Load "X" (the neural network's training and testing inputs)

def load_X(X_signals_paths):
    X_signals = []
    
    for signal_type_path in X_signals_paths:
        file = open(signal_type_path, 'r')
        # Read dataset from disk, dealing with text files' syntax
        X_signals.append(
            [np.array(serie, dtype=np.float32) for serie in [
                row.replace('  ', ' ').strip().split(' ') for row in file
            ]]
        )
        file.close()
    
    return np.transpose(np.array(X_signals), (1, 2, 0))

X_train_signals_paths = [
    DATASET_PATH + TRAIN + "Inertial Signals/" + signal + "train.txt" for signal in INPUT_SIGNAL_TYPES
]
X_test_signals_paths = [
    DATASET_PATH + TEST + "Inertial Signals/" + signal + "test.txt" for signal in INPUT_SIGNAL_TYPES
]

X_train = load_X(X_train_signals_paths)
X_test = load_X(X_test_signals_paths)


# Load "y" (the neural network's training and testing outputs)

def load_y(y_path):
    file = open(y_path, 'r')
    # Read dataset from disk, dealing with text file's syntax
    y_ = np.array(
        [elem for elem in [
            row.replace('  ', ' ').strip().split(' ') for row in file
        ]], 
        dtype=np.int32
    )
    file.close()
    
    # Substract 1 to each output class for friendly 0-based indexing 
    return y_ - 1

y_train_path = DATASET_PATH + TRAIN + "y_train.txt"
y_test_path = DATASET_PATH + TEST + "y_test.txt"

y_train = load_y(y_train_path)
y_test = load_y(y_test_path)


In [5]:
# Input Data 

training_data_count = len(X_train)  # 7352 training series (with 50% overlap between each serie)
test_data_count = len(X_test)  # 2947 testing series
n_steps = len(X_train[0])  # 128 timesteps per series
n_input = len(X_train[0][0])  # 9 input parameters per timestep


# LSTM Neural Network's internal structure

n_hidden = 32 # Hidden layer num of features
n_classes = 6 # Total classes (should go up, or should go down)


# Training 

learning_rate = 0.0025
lambda_loss_amount = 0.0015
training_iters = training_data_count * 300  # Loop 300 times on the dataset
batch_size = 1500
display_iter = 30000  # To show test set accuracy during training


# Some debugging info

print("Some useful info to get an insight on dataset's shape and normalisation:")
print("(X shape, y shape, every X's mean, every X's standard deviation)")
print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test))
print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.")


Some useful info to get an insight on dataset's shape and normalisation:
(X shape, y shape, every X's mean, every X's standard deviation)
(2947, 128, 9) (2947, 1) 0.09913992 0.39567086
The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.


In [13]:
X_train[0:1].shape

(1, 128, 9)

In [16]:
y_train[0]

array([4], dtype=int32)

In [18]:
tf.transpose(X_train[0:1], perm=[2,1,0]).shape

TensorShape([9, 128, 1])

In [17]:
tf.transpose(X_train[0:1], perm=[1,0,2]).shape

TensorShape([128, 1, 9])

In [34]:
from tensorflow import keras

In [None]:
# (batch_size, time_steps, seq_len)
# >>> inputs = tf.random.normal([32, 10, 8])
# >>> lstm = tf.keras.layers.LSTM(4)
# >>> output = lstm(inputs)
# >>> print(output.shape)
# (32, 4)

In [41]:
a = [0,0,0,0,1,0]

In [42]:
b = [0,0,0,1,0,0]

In [43]:
keras.metrics.categorical_accuracy([a], [b])

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.], dtype=float32)>

In [59]:
nbr_hidden = 32
nbr_steps = len(X_train[0])
nbr_signals = len(X_train[0][0])

model = keras.models.Sequential([
    keras.layers.LSTM(nbr_hidden, return_sequences=True, input_shape=(nbr_steps, nbr_signals)),
    keras.layers.LSTM(nbr_hidden),
    keras.layers.Dense(6, activation='softmax')
])
optimizer = keras.optimizers.Adam(lr=learning_rate)
model.compile(optimizer=optimizer,
          loss='categorical_crossentropy',
             metrics=[keras.metrics.categorical_accuracy])

In [60]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
enc.fit(y_test)
enc.categories_
y_train_ = enc.transform(y_train).toarray()
y_test_ = enc.transform(y_test).toarray()

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [61]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=0),
    ModelCheckpoint('model_weights', monitor='val_loss', save_best_only=True, verbose=0)
]

model.fit(X_train, y_train_, epochs=3, validation_split=0.1, callbacks=callbacks)

Train on 7352 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f73e4379050>