In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf  # Version 1.0.0 
from sklearn import metrics

import os


INPUT_SIGNAL_TYPES = [
    "body_acc_x_",
    "body_acc_y_",
    "body_acc_z_"
#     "body_rotation_x_",
#     "body_rotation_y_",
#     "body_rotation_z_",
#     "body_attitude_x_",
#     "body_attitude_y_",
#     "body_attitude_z_",
#     "body_gravity_x_",
#     "body_gravity_y_",
#     "body_gravity_z_"
]



# Output classes
LABELS = [
    "0",
    "1",
    "2",
    "3",
    "4",
    "5"
] 



DATASET_PATH = "/home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/"


DATASET_FINAL_TEST_PATH = "/home/soon2soon/Notebooks/DreamChallenge/data/processed_data_test/"


print("\n" + "Dataset is now located at: " + DATASET_PATH)



Dataset is now located at: /home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/


## Preparing dataset:

In [2]:
TRAIN = "train/"
TEST = "test/"


def load_X(X_signals_paths):
    X_signals = []
    
    for signal_type_path in X_signals_paths:
        file = open(signal_type_path, 'r')
        print(signal_type_path)
        
        X_signals.append(
            [np.array(serie, dtype=np.float32) for serie in [
                row.replace('  ', ' ').strip().split(' ') for row in file
                
            ]]
        )
        
        file.close()

    print(np.array(X_signals).shape)
    return np.transpose(np.array(X_signals), (1, 2, 0))

X_train_signals_paths = [
    DATASET_PATH + TRAIN + signal + "train.txt" for signal in INPUT_SIGNAL_TYPES
]


X_test_signals_paths = [
    DATASET_PATH + TEST + signal + "test.txt" for signal in INPUT_SIGNAL_TYPES
]

X_test_final_signals_paths = [
    DATASET_FINAL_TEST_PATH + 'test/' + signal + "test.txt" for signal in INPUT_SIGNAL_TYPES
]



X_train = load_X(X_train_signals_paths)

X_test = load_X(X_test_signals_paths)

# X_test_final = load_X(X_test_final_signals_paths)



def load_y(y_path):
    file = open(y_path, 'r')
    print(y_path)
    # Read dataset from disk, dealing with text file's syntax
    y_ = np.array(
        [elem for elem in [
            row.replace('  ', ' ').strip().split(' ') for row in file
        ]], 
        dtype=np.int32
    )
    file.close()
    
    return y_

y_train_path = DATASET_PATH + TRAIN + "y_train.txt"

y_test_path = DATASET_PATH + TEST + "y_test.txt"

# y_test_final_path = DATASET_FINAL_TEST_PATH + TEST + "y_test.txt"



y_train = load_y(y_train_path)

y_test = load_y(y_test_path)

# y_test_final = load_y(y_test_final_path)




print("Dataset load done.")


/home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/train/body_acc_x_train.txt
/home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/train/body_acc_y_train.txt
/home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/train/body_acc_z_train.txt
(3, 4651, 499)
/home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/test/body_acc_x_test.txt
/home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/test/body_acc_y_test.txt
/home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/test/body_acc_z_test.txt
(3, 517, 499)
/home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/train/y_train.txt
/home/soon2soon/Notebooks/DreamChallenge/sc2/processed_data/test/y_test.txt
Dataset load done.


## Additionnal Parameters:


In [3]:
# Input Data 

training_data_count = len(X_train)  # training series (with 50% overlap between each serie)


print("+ training_data_count",training_data_count )

test_data_count = len(X_test)  # number of testing series
n_steps = len(X_train[0])  # timesteps per series
n_input = len(X_train[0][0])  # number of input parameters per timestep

print("+ test_data_count",test_data_count )

print("+ n_steps", n_steps)
print("+ n_input", n_input)



n_hidden = 96 # Hidden layer num of features
n_classes = 6 
n_hidden_3 = 32

# Training parameters

learning_rate = 0.001
lambda_loss_amount = 0.0015
training_iters = training_data_count * 30  # Loop xx times on the dataset
batch_size = 100
display_iter = 5000  # To show test set accuracy during training



X_train = np.nan_to_num(X_train)
X_test = np.nan_to_num(X_test)


print("(X_train shape, y shape, every X_train's mean, every X_train's standard deviation)")
print(X_train.shape, y_train.shape, np.mean(X_train), np.std(X_train))
print("(X_test shape, y shape, every X_test's mean, every X_test's standard deviation)")
print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test))



+ training_data_count 4651
+ test_data_count 517
+ n_steps 499
+ n_input 3
(X_train shape, y shape, every X_train's mean, every X_train's standard deviation)
(4651, 499, 3) (4651, 1) -1.25515 5.71322
(X_test shape, y shape, every X_test's mean, every X_test's standard deviation)
(517, 499, 3) (517, 1) -1.34615 5.76437


## Utility functions for training:

In [4]:
def BiRNN(x, weights, biases):

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
    x = tf.unstack(x, n_steps, 1)

    # Forward direction cell
    lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    
    lstm_fw_cell = tf.contrib.rnn.DropoutWrapper(lstm_fw_cell, output_keep_prob=0.7) 
    
    # Backward direction cell
    lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    
    lstm_bw_cell = tf.contrib.rnn.DropoutWrapper(lstm_bw_cell, output_keep_prob=0.7) 

    # Get lstm cell output
    
    outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32)


    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

# def BiRNN_attn(x, weights, biases):

#     # Prepare data shape to match `rnn` function requirements
#     # Current data input shape: (batch_size, timesteps, n_input)
#     # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)

#     # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
#     x = tf.unstack(x, n_steps, 1)

#     # Define lstm cells with tensorflow
#     # Forward direction cell
#     lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    
#     drop = tf.contrib.rnn.DropoutWrapper(lstm_fw_cell, input_keep_prob = 0.7)
    
#     lstm_fw_cell = tf.contrib.rnn.AttentionCellWrapper(drop, attn_length=4, state_is_tuple = True)

    
#     # Backward direction cell
#     lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    
#     drop = tf.contrib.rnn.DropoutWrapper(lstm_bw_cell, input_keep_prob = 0.7)
    
#     lstm_bw_cell = tf.contrib.rnn.AttentionCellWrapper(drop, attn_length=4, state_is_tuple = True)
    
    
#     # Get lstm cell output
    
#     outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32)


#     # Linear activation, using rnn inner loop last output
#     return tf.matmul(outputs[-1], weights['out']) + biases['out']


def LSTM_RNN_3(_X, _weights, _biases):
    # Reshape to split input to 3 channel List
    _X = tf.transpose(_X, [2,0,1])
    _X = tf.unstack(_X)
    
    _X_acc = tf.stack(_X[:3])
    _X_rot = tf.stack(_X[3:6])
    _X_gra = tf.stack(_X[6:])
    # New shape: (3, batch_size, n_steps)
    
    _X_acc = tf.transpose(_X_acc, [2, 1, 0])
    _X_rot = tf.transpose(_X_rot, [2, 1, 0])
    _X_gra = tf.transpose(_X_gra, [2, 1, 0])

    # Reshape to prepare input to hidden activation
    _X_acc = tf.reshape(_X_acc, [-1, 3])
    _X_rot = tf.reshape(_X_rot, [-1, 3])
    _X_gra = tf.reshape(_X_gra, [-1, 3])
    # new shape: (3*batch_size, n_input)

    # Linear activation
    _X_acc = tf.nn.relu(tf.matmul(_X_acc, _weights['hidden_acc']) + _biases['hidden_acc'])
    _X_rot = tf.nn.relu(tf.matmul(_X_rot, _weights['hidden_rot']) + _biases['hidden_rot'])
    _X_gra = tf.nn.relu(tf.matmul(_X_gra, _weights['hidden_gra']) + _biases['hidden_gra'])
    
    _X = tf.concat([_X_acc, _X_rot, _X_gra],1)
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(_X, n_steps, 0)
    # new shape: n_steps * (batch_size, n_hidden)


    # Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
    lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
    
    lstm_cell_1 = tf.contrib.rnn.DropoutWrapper(lstm_cell_1, input_keep_prob=0.7, output_keep_prob=0.7)
    
    lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
    
    lstm_cell_2 = tf.contrib.rnn.DropoutWrapper(lstm_cell_2, input_keep_prob=0.7, output_keep_prob=0.7) 
    
        
    lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
    


    # Get LSTM cell output
    outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)
    

    # Get last time step's output feature for a "many to one" style classifier
    lstm_last_output = outputs[-1]
    
    # Linear activation
    return tf.matmul(lstm_last_output, _weights['out']) + _biases['out'], lstm_last_output

def LSTM_RNN(_X, _weights, _biases):
    
    # input shape: (batch_size, n_steps, n_input)
    _X = tf.transpose(_X, [1, 0, 2])  # permute n_steps and batch_size
    # Reshape to prepare input to hidden activation
    _X = tf.reshape(_X, [-1, n_input]) 
    # new shape: (n_steps*batch_size, n_input)
    
    # Linear activation
    _X = tf.nn.relu(tf.matmul(_X, _weights['hidden']) + _biases['hidden'])
    # Split data because rnn cell needs a list of inputs for the RNN inner loop
    _X = tf.split(_X, n_steps, 0) 
    # new shape: n_steps * (batch_size, n_hidden)

    # Define two stacked LSTM cells (two recurrent layers deep) with tensorflow
    lstm_cell_1 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
    
    lstm_cell_1 = tf.contrib.rnn.DropoutWrapper(lstm_cell_1, input_keep_prob=0.7, output_keep_prob=0.7)
    
    lstm_cell_2 = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
    
    lstm_cell_2 = tf.contrib.rnn.DropoutWrapper(lstm_cell_2, input_keep_prob=0.7, output_keep_prob=0.7) 
    
        
    lstm_cells = tf.contrib.rnn.MultiRNNCell([lstm_cell_1, lstm_cell_2], state_is_tuple=True)
    


    # Get LSTM cell output
    outputs, states = tf.contrib.rnn.static_rnn(lstm_cells, _X, dtype=tf.float32)
    

    # Get last time step's output feature for a "many to one" style classifier
    lstm_last_output = outputs[-1]
    
    # Linear activation
    return tf.matmul(lstm_last_output, _weights['out']) + _biases['out'], lstm_last_output
 

def extract_batch_size(_train, step, batch_size):
    # Function to fetch a "batch_size" amount of data from "(X|y)_train" data. 
    
    shape = list(_train.shape)
    shape[0] = batch_size
    batch_s = np.empty(shape)

    for i in range(batch_size):
        # Loop index
        index = ((step-1)*batch_size + i) % len(_train)
        batch_s[i] = _train[index] 

    return batch_s


def one_hot(y_):
    # Function to encode output labels from number indexes 
    # e.g.: [[5], [0], [3]] --> [[0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0]]
    
    y_ = y_.reshape(len(y_))
      
    n_values = int(np.max(y_)) + 1
    
    if n_values == 1:
        n_values = 2
    
    return np.eye(n_values)[np.array(y_, dtype=np.int32)]  # Returns FLOATS


print("Function ready.")

Function ready.


In [5]:

# Graph input/output
x = tf.placeholder(tf.float32, [None, n_steps, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])


# Graph weights
weights = {
    'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0))
}
biases = {
    'hidden': tf.Variable(tf.random_normal([n_hidden])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

weights_3 = {
    'hidden_acc': tf.Variable(tf.random_normal([3, n_hidden_3])), # Hidden layer weights
    'hidden_rot': tf.Variable(tf.random_normal([3, n_hidden_3])),
    'hidden_gra': tf.Variable(tf.random_normal([3, n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0))
}
biases_3 = {
    'hidden_acc': tf.Variable(tf.random_normal([n_hidden_3])),
    'hidden_rot': tf.Variable(tf.random_normal([n_hidden_3])),
    'hidden_gra': tf.Variable(tf.random_normal([n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# pred = LSTM_RNN_3(x, weights, biases)
# pred, last_state = LSTM_RNN_3(x, weights_3, biases_3)
pred, last_state = LSTM_RNN(x, weights, biases)
# pred = BiRNN(x, weights_bi, biases)
# pred = BiRNN_attn(x, weights_bi, biases) 



# Loss, optimizer and evaluation
l2 = lambda_loss_amount * sum(
    tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
) # L2 loss prevents this overkill neural network to overfit the data

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred)) + l2 # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
# optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost) # RMS Optimizer

correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))



print("Parameter ready.")

Parameter ready.


## Train the neural network:

In [6]:
# To keep track of training's performance
test_losses = []
test_accuracies = []
train_losses = []
train_accuracies = []

# Launch the graph
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
init = tf.global_variables_initializer()
sess.run(init)



saver = tf.train.Saver()
save_path = "/home/soon2soon/Notebooks/DreamChallenge/sc2/model_output/model.ckpt"

# Perform Training steps with "batch_size" amount of example data at each loop
step = 1
while step * batch_size <= training_iters:
    batch_xs =         extract_batch_size(X_train, step, batch_size)
    batch_ys = one_hot(extract_batch_size(y_train, step, batch_size))

    # Fit training using batch data
    _, loss, acc = sess.run(
        [optimizer, cost, accuracy],
        feed_dict={
            x: batch_xs, 
            y: batch_ys
        }
    )
    train_losses.append(loss)
    train_accuracies.append(acc)
    
    # Evaluate network only at some steps for faster training: 
    if (step*batch_size % display_iter == 0) or (step == 1) or (step * batch_size > training_iters):
        
        # To not spam console, show training accuracy/loss in this "if"
        print("Training iter #" + str(step*batch_size) + \
              ":   Batch Loss = " + "{:.6f}".format(loss) + \
              ", Accuracy = {}".format(acc))
        
        
        # Evaluation on the test set (no learning made here - just evaluation for diagnosis)
        loss, acc = sess.run(
            [cost, accuracy], 
            feed_dict={
                x: X_test,
                y: one_hot(y_test)
            }
        )
        test_losses.append(loss)
        test_accuracies.append(acc)
        print("PERFORMANCE ON TEST SET: " + \
              "Batch Loss = {}".format(loss) + \
              ", Accuracy = {}".format(acc))
        
        
        
        print("Model saved in file: %s" % saver.save(sess, save_path, global_step=step))
        
        
        
    step += 1

print("Optimization Finished!")





# save_path = saver.save(sess, model_path)


# Accuracy for test data





# one_hot_predictions, accuracy, final_loss, l_h = sess.run(
#     [pred, accuracy, cost, last_state],
#     feed_dict={
#         x: X_test,
#         y: one_hot(y_test)
#     }
# )


# print("FINAL RESULT: " + \
#       "Batch Loss = {}".format(final_loss) + \
#       ", Accuracy = {}".format(accuracy))


# np.savetxt("output_hidden_test.txt", l_h)

# print("hidden test write donw")
#####################


# itter = 1
# chunk_size = 500
# while itter * chunk_size <= len(X_test_final):
#     batch_xs =         extract_batch_size(X_test_final, step, chunk_size)
#     batch_ys = one_hot(extract_batch_size(y_test_final, step, chunk_size))

#     # Fit training using batch data

#     a_t, _, l_h_t = sess.run(
#         [accuracy, cost, last_state],
#         feed_dict={
#             x: batch_xs,
#             y: batch_ys
#         }
#     )

#     print("FINAL RESULT: " + \
#           ", Accuracy = {}".format(a_t))

#     np.savetxt('output_state_result/'+"output_hidden_train"+str(itter)+".txt", l_h_t)

#     print(itter, "hidden test final write donw!")
#     itter= itter+1
    
# print("iter done.")






Training iter #100:   Batch Loss = 6.325527, Accuracy = 0.07999999821186066
PERFORMANCE ON TEST SET: Batch Loss = 5.8628740310668945, Accuracy = 0.13346228003501892
Model saved in file: /home/soon2soon/Notebooks/DreamChallenge/sc2/model_output/model.ckpt-1
Training iter #5000:   Batch Loss = 4.780066, Accuracy = 0.26999998092651367
PERFORMANCE ON TEST SET: Batch Loss = 3.5649452209472656, Accuracy = 0.551257312297821
Model saved in file: /home/soon2soon/Notebooks/DreamChallenge/sc2/model_output/model.ckpt-50
Training iter #10000:   Batch Loss = 3.337459, Accuracy = 0.6200000047683716
PERFORMANCE ON TEST SET: Batch Loss = 3.333059310913086, Accuracy = 0.6208897829055786
Model saved in file: /home/soon2soon/Notebooks/DreamChallenge/sc2/model_output/model.ckpt-100
Training iter #15000:   Batch Loss = 3.864054, Accuracy = 0.38999998569488525
PERFORMANCE ON TEST SET: Batch Loss = 3.205857992172241, Accuracy = 0.6421663761138916
Model saved in file: /home/soon2soon/Notebooks/DreamChallenge/s

In [None]:
sess.close()