In [1]:
from lstm_architecture import one_hot, run_with_config
import numpy as np
import os

# Neural net's config.

In [3]:
class Config(object):
    """
    define a class to store parameters,
    the input should be feature mat of training and testing
    """

    def __init__(self, X_train, X_test):
        # Data shaping
        self.train_count = len(X_train)  # 7352 training series
        self.test_data_count = len(X_test)  # 2947 testing series
        self.n_steps = len(X_train[0])  # 128 time_steps per series
        self.n_classes = 6  # Final output classes

        # Training
        self.learning_rate = 0.001
        self.lambda_loss_amount = 0.005
        self.training_epochs = 100
        self.batch_size = 100
        self.clip_gradients = 15.0
        self.gradient_noise_scale = None
        # Dropout is added on inputs and after each stacked layers (but not
        # between residual layers).
        self.keep_prob_for_dropout = 0.85  # **(1/3.0)

        # Linear+relu structure
        self.bias_mean = 0.3
        # I would recommend between 0.1 and 1.0 or to change and use a xavier
        # initializer
        self.weights_stddev = 0.2

        ########
        # NOTE: I think that if any of the below parameters are changed,
        # the best is to readjust every parameters in the "Training" section
        # above to properly compare the architectures only once optimised.
        ########

        # LSTM structure
        # Features count is of 9: three 3D sensors features over time
        self.n_inputs = len(X_train[0][0])
        self.n_hidden = 28  # nb of neurons inside the neural network
        # Use bidir in every LSTM cell, or not:
        self.use_bidirectionnal_cells = False

        # High-level deep architecture
        self.also_add_dropout_between_stacked_cells = False 

# Constants params

In [7]:
# Those are separate normalised input features for the neural network
INPUT_SIGNAL_TYPES = [
    "body_acc_x_",
    "body_acc_y_",
    "body_acc_z_",
    "body_gyro_x_",
    "body_gyro_y_",
    "body_gyro_z_",
    "total_acc_x_",
    "total_acc_y_",
    "total_acc_z_"
]

# Output classes to learn how to classify
LABELS = [
    "WALKING",
    "WALKING_UPSTAIRS",
    "WALKING_DOWNSTAIRS",
    "SITTING",
    "STANDING",
    "LAYING"
]

DATA_PATH = "data/"
DATASET_PATH = DATA_PATH + "UCI HAR Dataset/"

TRAIN = "train/"
TEST = "test/"

# load data

In [13]:
# Load "X" (the neural network's training and testing inputs)

def load_X(X_signals_paths):
    """
    Given attribute (train or test) of feature, read all 9 features into an
    np ndarray of shape [sample_sequence_idx, time_step, feature_num]
        argument:   X_signals_paths str attribute of feature: 'train' or 'test'
        return:     np ndarray, tensor of features
    """
    X_signals = []

    for signal_type_path in X_signals_paths:
        file = open(signal_type_path, 'rb')
        # Read dataset from disk, dealing with text files' syntax
        X_signals.append(
            [np.array(serie, dtype=np.float32) for serie in [
                row.replace('  ', ' ').strip().split(' ') for row in file
            ]]
        )
        file.close()
    return np.transpose(np.array(X_signals), (1, 2, 0))

X_train_signals_paths = [
    DATASET_PATH + TRAIN + "Inertial Signals/" + signal + "train.txt" for signal in INPUT_SIGNAL_TYPES
]
X_test_signals_paths = [
    DATASET_PATH + TEST + "Inertial Signals/" + signal + "test.txt" for signal in INPUT_SIGNAL_TYPES
]

X_train = load_X(X_train_signals_paths)
X_test = load_X(X_test_signals_paths)


# Load "y" (the neural network's training and testing outputs)

def load_y(y_path):
    """
    Read Y file of values to be predicted
        argument: y_path str attibute of Y: 'train' or 'test'
        return: Y ndarray / tensor of the 6 one_hot labels of each sample
    """
    file = open(y_path, 'rb')
    # Read dataset from disk, dealing with text file's syntax
    y_ = np.array(
        [elem for elem in [
            row.replace('  ', ' ').strip().split(' ') for row in file
        ]],
        dtype=np.int32
    )
    file.close()
    
    # Substract 1 to each output class for friendly 0-based indexing
    return one_hot(y_ - 1)

y_train_path = DATASET_PATH + TRAIN + "y_train.txt"
y_test_path = DATASET_PATH + TEST + "y_test.txt"

y_train = load_y(y_train_path)
y_test = load_y(y_test_path)
print(X_train.shape, y_test.shape)

((7352, 128, 9), (2947, 6))


# Training 

In [5]:
n_layers_in_highway = 0
n_stacked_layers = 3
trial_name = "{}x{}".format(n_layers_in_highway, n_stacked_layers)

for learning_rate in [0.001]:  # [0.01, 0.007, 0.001, 0.0007, 0.0001]:
    for lambda_loss_amount in [0.005]:
        for clip_gradients in [15.0]:
            print "learning_rate: {}".format(learning_rate)
            print "lambda_loss_amount: {}".format(lambda_loss_amount)
            print ""

            class EditedConfig(Config):
                def __init__(self, X, Y):
                    super(EditedConfig, self).__init__(X, Y)

                    # Edit only some parameters:
                    self.learning_rate = learning_rate
                    self.lambda_loss_amount = lambda_loss_amount
                    self.clip_gradients = clip_gradients
                    # Architecture params:
                    self.n_layers_in_highway = n_layers_in_highway
                    self.n_stacked_layers = n_stacked_layers

            # # Useful catch upon looping (e.g.: not enough memory)
            # try:
            #     accuracy_out, best_accuracy = run_with_config(EditedConfig)
            # except:
            #     accuracy_out, best_accuracy = -1, -1
            accuracy_out, best_accuracy, f1_score_out, best_f1_score = (
                run_with_config(EditedConfig, X_train, y_train, X_test, y_test)
            )
            print (accuracy_out, best_accuracy, f1_score_out, best_f1_score)

            with open('{}_result_HAR_6.txt'.format(trial_name), 'a') as f:
                f.write(str(learning_rate) + ' \t' + str(lambda_loss_amount) + \
                        ' \t' + str(clip_gradients) + ' \t' + str(accuracy_out) + \
                        ' \t' + str(best_accuracy) + ' \t' + str(f1_score_out) + \
                        ' \t' + str(best_f1_score) + '\n\n')

            print "______________________________________"
        print ""
print "Done."

learning_rate: 0.001
lambda_loss_amount: 0.005

Some useful info to get an insight on dataset's shape and normalisation:
features shape, labels shape, each features mean, each features standard deviation
((2947, 128, 9), (2947, 6), 0.099139921, 0.39567086)
the dataset is therefore properly normalised, as expected.
(128, ?, 9)
(?, 9)
(128, '(?, 9)')

Creating hidden #1:
(128, '(?, 28)')

Creating hidden #2:
(128, '(?, 28)')

Creating hidden #3:
(128, '(?, 28)')

Unregularised variables:
LSTM_network/layer_1/relu_fc_biases_noreg:0
LSTM_network/layer_2/relu_fc_biases_noreg:0
LSTM_network/layer_3/relu_fc_biases_noreg:0
LSTM_network/relu_fc_biases_noreg:0
iter: 0, train loss: 1.48976099491, train accuracy: 0.589999973774, train F1-score: 0.417193900053, test loss: 1.55180799961, test accuracy: 0.497455000877, test F1-score: 0.399625026882


  'precision', 'predicted', average, warn_for)


iter: 1, train loss: 1.07946181297, train accuracy: 0.659999966621, train F1-score: 0.692696126349, test loss: 1.12139594555, test accuracy: 0.635222196579, test F1-score: 0.600443796554
iter: 2, train loss: 1.02660059929, train accuracy: 0.689999938011, train F1-score: 0.580651247718, test loss: 1.04967176914, test accuracy: 0.634543597698, test F1-score: 0.566870563919
iter: 3, train loss: 0.952469825745, train accuracy: 0.719999969006, train F1-score: 0.710868993751, test loss: 1.12477648258, test accuracy: 0.64540207386, test F1-score: 0.594899063648
iter: 4, train loss: 0.958036482334, train accuracy: 0.659999966621, train F1-score: 0.615029437229, test loss: 1.08741021156, test accuracy: 0.569392561913, test F1-score: 0.508553364665
iter: 5, train loss: 0.846531391144, train accuracy: 0.769999980927, train F1-score: 0.637677613574, test loss: 0.995781242847, test accuracy: 0.678656220436, test F1-score: 0.6308162926
iter: 6, train loss: 0.846986651421, train accuracy: 0.729999959

iter: 45, train loss: 0.279854238033, train accuracy: 0.980000019073, train F1-score: 0.941298701299, test loss: 0.502293109894, test accuracy: 0.896504878998, test F1-score: 0.897532230814
iter: 46, train loss: 0.265030443668, train accuracy: 0.980000019073, train F1-score: 0.96, test loss: 0.491663336754, test accuracy: 0.900237441063, test F1-score: 0.90096592612
iter: 47, train loss: 0.375032931566, train accuracy: 0.950000047684, train F1-score: 0.908781431335, test loss: 0.594067156315, test accuracy: 0.865965306759, test F1-score: 0.865719824144
iter: 48, train loss: 0.474652975798, train accuracy: 0.920000016689, train F1-score: 0.898156647274, test loss: 0.988148450851, test accuracy: 0.795385062695, test F1-score: 0.786086371769
iter: 49, train loss: 0.273162096739, train accuracy: 0.969999969006, train F1-score: 0.959843137255, test loss: 0.463391721249, test accuracy: 0.908381342888, test F1-score: 0.90911984935
iter: 50, train loss: 0.254147827625, train accuracy: 0.990000

iter: 89, train loss: 0.237226620317, train accuracy: 0.97000002861, train F1-score: 0.939912905113, test loss: 0.41417658329, test accuracy: 0.920936405659, test F1-score: 0.921058289456
iter: 90, train loss: 0.268868744373, train accuracy: 0.97000002861, train F1-score: 0.88858560794, test loss: 0.424510061741, test accuracy: 0.918221831322, test F1-score: 0.918442938015
iter: 91, train loss: 0.26006925106, train accuracy: 0.950000047684, train F1-score: 0.969907206119, test loss: 0.510244190693, test accuracy: 0.877163171768, test F1-score: 0.876800762414
iter: 92, train loss: 0.229205608368, train accuracy: 0.990000009537, train F1-score: 0.96, test loss: 0.482990443707, test accuracy: 0.903970003128, test F1-score: 0.904985979587
iter: 93, train loss: 0.221538588405, train accuracy: 0.97000002861, train F1-score: 0.910266075388, test loss: 0.479527592659, test accuracy: 0.901594758034, test F1-score: 0.902435416707
iter: 94, train loss: 0.232344046235, train accuracy: 0.9700000286

iter: 133, train loss: 0.26315972209, train accuracy: 0.950000047684, train F1-score: 0.940146929825, test loss: 0.45954990387, test accuracy: 0.900916099548, test F1-score: 0.901994806214
iter: 134, train loss: 0.207938224077, train accuracy: 0.980000019073, train F1-score: 0.970049140049, test loss: 0.466618686914, test accuracy: 0.900237441063, test F1-score: 0.900908934592
iter: 135, train loss: 0.235848933458, train accuracy: 0.980000019073, train F1-score: 0.98, test loss: 0.382405161858, test accuracy: 0.921954452991, test F1-score: 0.922149277053
iter: 136, train loss: 0.309099256992, train accuracy: 0.939999997616, train F1-score: 0.930248746397, test loss: 0.457054078579, test accuracy: 0.891075611115, test F1-score: 0.892356458176
iter: 137, train loss: 0.260082662106, train accuracy: 0.969999969006, train F1-score: 0.930507731253, test loss: 0.429689407349, test accuracy: 0.911774516106, test F1-score: 0.912449810618
iter: 138, train loss: 0.234112262726, train accuracy: 0.

iter: 177, train loss: 0.245626986027, train accuracy: 0.960000038147, train F1-score: 0.969655172414, test loss: 0.352493077517, test accuracy: 0.923990368843, test F1-score: 0.92387155813
iter: 178, train loss: 0.203803420067, train accuracy: 0.969999969006, train F1-score: 0.959619047619, test loss: 0.485212504864, test accuracy: 0.89989811182, test F1-score: 0.900957349637
iter: 179, train loss: 0.26878875494, train accuracy: 0.959999978542, train F1-score: 0.969967814874, test loss: 0.393452316523, test accuracy: 0.913471221924, test F1-score: 0.913819431952
iter: 180, train loss: 0.182964220643, train accuracy: 0.989999949932, train F1-score: 0.95033257393, test loss: 0.368343234062, test accuracy: 0.917203783989, test F1-score: 0.916890598644
iter: 181, train loss: 0.201235711575, train accuracy: 0.980000019073, train F1-score: 0.939110023702, test loss: 0.389360934496, test accuracy: 0.920936405659, test F1-score: 0.920704247237
iter: 182, train loss: 0.187376886606, train accu

iter: 221, train loss: 0.187316983938, train accuracy: 0.990000009537, train F1-score: 0.919389978214, test loss: 0.462219119072, test accuracy: 0.904988050461, test F1-score: 0.905188312817
iter: 222, train loss: 0.18885371089, train accuracy: 0.980000019073, train F1-score: 0.96, test loss: 0.36445787549, test accuracy: 0.925347626209, test F1-score: 0.925197319335
iter: 223, train loss: 0.259456008673, train accuracy: 0.97000002861, train F1-score: 0.951103255057, test loss: 0.501508414745, test accuracy: 0.906005978584, test F1-score: 0.906743713978
iter: 224, train loss: 0.203630849719, train accuracy: 0.980000019073, train F1-score: 0.950392327812, test loss: 0.388038218021, test accuracy: 0.926026284695, test F1-score: 0.926104528374
iter: 225, train loss: 0.242014586926, train accuracy: 0.97000002861, train F1-score: 0.97019138756, test loss: 0.492975473404, test accuracy: 0.904988050461, test F1-score: 0.905857316575
iter: 226, train loss: 0.223167911172, train accuracy: 0.970