In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn import cross_validation
from sklearn.metrics import classification_report, confusion_matrix
from lstm_architecture import one_hot, run_with_config



# label

In [1]:
labels = {'0':'file', '1':'network', '2':'service', '3':'database', '4':'communication', '5':'memory', '6':'driver', 
    '7':'system', '8':'application', '9':'io', '10':'others', '11':'security', '12':'disk', '13':'processor'}

# load data

In [6]:
def load_X(X_path):
    X_list = []
    file = open(X_path, 'r')
    # Read dataset from disk, dealing with text files' syntax
    X_signal = [np.array(item, dtype=np.float32) for item in [
               line.strip().split('\t') for line in file]]
    X_list.append(X_signal)
    file.close()
    return np.transpose(np.array(X_list), (1, 2, 0))


# Load "y" (the neural network's training and testing outputs)
def load_y(y_path):
    file = open(y_path, 'r')
    # Read dataset from disk, dealing with text file's syntax
    y_ = np.array([elem for elem in [line.strip().split('\t') for line in file]], 
                  dtype=np.int32)
    file.close()
    # Substract 1 to each output class for friendly 0-based indexing
    return one_hot(y_)

dataset_path = "data_msg_type/"
X_path = dataset_path + "semantic_sim.txt"
y_path = dataset_path + "semantic_label_index.txt"

X = load_X(X_path)
y = load_y(y_path)

# Separate our training data into test and training.
print("Separating data into 80% training set & 20% test set...")
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    X, y, test_size=0.2, random_state=33)#add random state here...
print("Dataset separated.\n")

print(X_train.shape, y_train.shape, y_test.shape)

Separating data into 80% training set & 20% test set...
Dataset separated.

((160000, 14, 1), (160000, 14), (40000, 14))


# network config

In [2]:
class Config(object):
    """
    define a class to store parameters,
    the input should be feature mat of training and testing
    """

    def __init__(self, X_train, X_test):
        # Data shaping
        self.train_count = len(X_train)  # 160000 training series
        self.test_data_count = len(X_test)  # 40000 testing series
        self.n_steps = len(X_train[0])  # 14 time_steps per series
        self.n_classes = 14  # Final output classes

        # Training
        self.learning_rate = 0.0001
        self.lambda_loss_amount = 0.005
        self.training_epochs = 200
        self.batch_size = 128
        self.clip_gradients = 15.0
        self.gradient_noise_scale = None
        # Dropout is added on inputs and after each stacked layers (but not
        # between residual layers).
        self.keep_prob_for_dropout = 0.85  # **(1/3.0)

        # Linear+relu structure
        self.bias_mean = 0.3
        # I would recommend between 0.1 and 1.0 or to change and use a xavier
        # initializer
        self.weights_stddev = 0.2

        ########
        # NOTE: I think that if any of the below parameters are changed,
        # the best is to readjust every parameters in the "Training" section
        # above to properly compare the architectures only once optimised.
        ########

        # LSTM structure
        # Features count is of 9: three 3D sensors features over time
        self.n_inputs = len(X_train[0][0])
        self.n_hidden = 128  # nb of neurons inside the neural network
        # Use bidir in every LSTM cell, or not:
        self.use_bidirectionnal_cells = False

        # High-level deep architecture
        self.also_add_dropout_between_stacked_cells = False 

# train and test

In [4]:
n_layers_in_highway = 0
n_stacked_layers = 3
trial_name = "{}x{}".format(n_layers_in_highway, n_stacked_layers)

for learning_rate in [0.0001]:  # [0.01, 0.007, 0.001, 0.0007, 0.0001]:
    for lambda_loss_amount in [0.005]:
        for clip_gradients in [15.0]:
            print "learning_rate: {}".format(learning_rate)
            print "lambda_loss_amount: {}".format(lambda_loss_amount)
            print ""

            class EditedConfig(Config):
                def __init__(self, X, Y):
                    super(EditedConfig, self).__init__(X, Y)

                    # Edit only some parameters:
                    self.learning_rate = learning_rate
                    self.lambda_loss_amount = lambda_loss_amount
                    self.clip_gradients = clip_gradients
                    # Architecture params:
                    self.n_layers_in_highway = n_layers_in_highway
                    self.n_stacked_layers = n_stacked_layers

            # # Useful catch upon looping (e.g.: not enough memory)
            # try:
            #     accuracy_out, best_accuracy = run_with_config(EditedConfig)
            # except:
            #     accuracy_out, best_accuracy = -1, -1
            accuracy_out, best_accuracy, f1_score_out, best_f1_score = (
                run_with_config(EditedConfig, X_train, y_train, X_test, y_test)
            )
            print (accuracy_out, best_accuracy, f1_score_out, best_f1_score)

            with open('result/{}_result_.txt'.format(trial_name), 'a') as f:
                f.write(str(learning_rate) + ' \t' + str(lambda_loss_amount) + \
                        ' \t' + str(clip_gradients) + ' \t' + str(accuracy_out) + \
                        ' \t' + str(best_accuracy) + ' \t' + str(f1_score_out) + \
                        ' \t' + str(best_f1_score) + '\n\n')

            print "______________________________________"
        print ""
print "Done."

learning_rate: 0.001
lambda_loss_amount: 0.005

Some useful info to get an insight on dataset's shape and normalisation:
features shape, labels shape, each features mean, each features standard deviation
((40000, 14, 1), (40000, 14), 0.11178214, 0.41461828)
the dataset is therefore properly normalised, as expected.
(14, ?, 1)
(?, 1)
(14, '(?, 1)')

Creating hidden #1:
(14, '(?, 128)')

Creating hidden #2:
(14, '(?, 128)')

Creating hidden #3:
(14, '(?, 128)')

Unregularised variables:
LSTM_network/layer_1/relu_fc_biases_noreg:0
LSTM_network/layer_2/relu_fc_biases_noreg:0
LSTM_network/layer_3/relu_fc_biases_noreg:0
LSTM_network/relu_fc_biases_noreg:0


  'precision', 'predicted', average, warn_for)


iter: 0, train loss: 0.497359424829, train accuracy: 0.890625, train F1-score: 0.890148809524, test loss: 0.358922839165, test accuracy: 0.926674127579, test F1-score: 0.894389265215
iter: 1, train loss: 0.414793372154, train accuracy: 0.9296875, train F1-score: 0.890160467596, test loss: 0.27986100316, test accuracy: 0.956549108028, test F1-score: 0.945356158529
iter: 2, train loss: 0.455747634172, train accuracy: 0.8984375, train F1-score: 0.930678475936, test loss: 0.271679371595, test accuracy: 0.958349168301, test F1-score: 0.947470328203
iter: 3, train loss: 0.404860436916, train accuracy: 0.921875, train F1-score: 0.963214361929, test loss: 0.281264841557, test accuracy: 0.957374095917, test F1-score: 0.946350164747
iter: 4, train loss: 0.311050117016, train accuracy: 0.9375, train F1-score: 0.932641173246, test loss: 0.248396068811, test accuracy: 0.956924080849, test F1-score: 0.945641014636


  'recall', 'true', average, warn_for)


iter: 5, train loss: 0.470039665699, train accuracy: 0.90625, train F1-score: 0.891021397783, test loss: 0.245587170124, test accuracy: 0.958899080753, test F1-score: 0.948040815633
iter: 6, train loss: 0.400867342949, train accuracy: 0.9140625, train F1-score: 0.92565252658, test loss: 0.260438680649, test accuracy: 0.956924080849, test F1-score: 0.94574240992
iter: 7, train loss: 0.425896584988, train accuracy: 0.9140625, train F1-score: 0.863270078114, test loss: 0.300373375416, test accuracy: 0.949799060822, test F1-score: 0.935308657609
iter: 8, train loss: 0.416236639023, train accuracy: 0.9140625, train F1-score: 0.922157551573, test loss: 0.279675513506, test accuracy: 0.956799149513, test F1-score: 0.945502277686
iter: 9, train loss: 0.324032753706, train accuracy: 0.9375, train F1-score: 0.896118435692, test loss: 0.294657498598, test accuracy: 0.957399129868, test F1-score: 0.946386633889
iter: 10, train loss: 0.418241947889, train accuracy: 0.9140625, train F1-score: 0.9220

iter: 50, train loss: 0.324056565762, train accuracy: 0.9375, train F1-score: 0.924618085192, test loss: 0.19765137136, test accuracy: 0.979799091816, test F1-score: 0.973224208233
iter: 51, train loss: 0.339298158884, train accuracy: 0.9375, train F1-score: 0.955577151598, test loss: 0.179092109203, test accuracy: 0.977624177933, test F1-score: 0.971076165664
iter: 52, train loss: 0.335378408432, train accuracy: 0.9453125, train F1-score: 0.939812184343, test loss: 0.169038593769, test accuracy: 0.983849048615, test F1-score: 0.977501305897
iter: 53, train loss: 0.323740690947, train accuracy: 0.9375, train F1-score: 0.938330885879, test loss: 0.162726998329, test accuracy: 0.983099102974, test F1-score: 0.976814531961
iter: 54, train loss: 0.336368918419, train accuracy: 0.9453125, train F1-score: 0.949245043167, test loss: 0.213970392942, test accuracy: 0.973824083805, test F1-score: 0.966054878264
iter: 55, train loss: 0.275933682919, train accuracy: 0.953125, train F1-score: 0.923

iter: 95, train loss: 0.287406802177, train accuracy: 0.953125, train F1-score: 0.902447805615, test loss: 0.176517412066, test accuracy: 0.977099120617, test F1-score: 0.970494792991
iter: 96, train loss: 0.364624321461, train accuracy: 0.921875, train F1-score: 0.977015961857, test loss: 0.206892132759, test accuracy: 0.976899027824, test F1-score: 0.970503545414
iter: 97, train loss: 0.272039592266, train accuracy: 0.953125, train F1-score: 0.924493810528, test loss: 0.171179562807, test accuracy: 0.981949090958, test F1-score: 0.975478912753
iter: 98, train loss: 0.286349773407, train accuracy: 0.9453125, train F1-score: 0.964457208188, test loss: 0.163076460361, test accuracy: 0.984099090099, test F1-score: 0.977817869638
iter: 99, train loss: 0.327093958855, train accuracy: 0.9375, train F1-score: 0.95257260101, test loss: 0.168915092945, test accuracy: 0.98239916563, test F1-score: 0.975996863463
iter: 100, train loss: 0.390889436007, train accuracy: 0.9296875, train F1-score: 0

KeyboardInterrupt: 