In [71]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from utils import *

### Reading the Training and Test data
The dimention of the data shall be Trials x Contatcts x Time

In [36]:
EEG_data = "Data/EEG"
Label_data = "Data/Labels"
# Reading Training EEG Data and their labels
train_data = pd.read_csv(os.path.join(EEG_data,'training_set.csv'), header=None)
train_data = np.array(train_data).astype('float32')

# Reading Test EEG Data and their labels
test_data = pd.read_csv(os.path.join(EEG_data, 'test_set.csv'), header=None)
test_data = np.array(test_data).astype('float32')

In [58]:
train_labels = pd.read_csv(os.path.join(Label_data , 'training_label.csv'), header=None)
train_labels = np.array(train_labels).astype('int')
test_labels = pd.read_csv(os.path.join(Label_data,'test_label.csv'), header=None)
test_labels = np.array(test_labels).astype('int')

Both Training and Test Data have 64x64 feature size which come from 64-contacts EEG signal and 64 sample in time \
which will be used for classification purposes

In [38]:
print('Train Data Size: {}'.format(train_data.shape))
print('Test Data Size: {}'.format(test_data.shape))

Train Data Size: (76356, 4096)
Test Data Size: (8484, 4096)


In [59]:
# Clear all the stack and use GPU resources as much as possible
tf.reset_default_graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session()

In [60]:
# One-hot representation of trian and test labels
train_labels = tf.one_hot(indices=train_labels, depth=4)
train_labels = tf.squeeze(train_labels).eval(Session=sess)
test_labels = tf.one_hot(indices=test_labels, depth=4)
test_labels = tf.squeeze(test_labels).eval(Session=sess)

### Initializing the Model Hyper-parameters

In [63]:
n_contacts   = 64      # The number of EEG contacts at each time point
n_time  = 64      # number of EEG time points to feed to the model
n_neurons_lstm = 256     # number of neurons in the LSTM layn_er
n_attention = 8  # The number of neurons in attention layer

n_class   = 4     # The number of classification classes
n_neurons_FC  = 64    # The number of hidden units in the FC layer
num_epoch = 300   # The number of Epochs that the Model run
keep_rate = 0.75  # Keep rate of the Dropout

lr = tf.constant(1e-4, dtype=tf.float32)  # Learning rate
lr_decay_epoch = 50    # Every (50) epochs, the learning rate decays
lr_decay       = 0.50  # Learning rate Decay by (50%)

batch_size = 128
n_batch = train_data.shape[0] // batch_size

### Initializing the Weights and Biases and creating placeholders for Input and Output

In [None]:
# Initializing the weights using normal distributed small random numbers
W_1 = tf.Variable(tf.truncated_normal([2 * n_neurons_lstm, n_neurons_FC], stddev=0.01))
b_1  = tf.Variable(tf.constant(0.01, shape=[n_neurons_FC]))
W_2 = tf.Variable(tf.truncated_normal([n_neurons_FC, n_class], stddev=0.01))
b_2  = tf.Variable(tf.constant(0.01, shape=[n_class]))

X = tf.placeholder(tf.float32, [None, 64 * 64])
y = tf.placeholder(tf.float32, [None, 4])
dropout_prob = tf.placeholder(tf.float32)

### Defining Loss and Evaluation Functions

In [None]:
def loss_l2(y, y_pred,l2_norm):
    train_variable = tf.trainable_variables()
    regularization_loss = l2_norm * tf.reduce_sum([tf.nn.l2_loss(v) for v in train_variable])
    model_loss = tf.reduce_mean(tf.square(y - y_pred))
    loss = tf.reduce_mean(model_loss + regularization_loss)
    return loss

def evaluation(y, y_pred):
    y_corr = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
    Global_Average_Accuracy = tf.reduce_mean(tf.cast(y_corr, tf.float32))
    return Global_Average_Accuracy

### Creating the Attention Mechanism

In [65]:
def Attention_Layer(inputs, n_attention):
    inputs = tf.concat(inputs, 2)
    n_lstm = inputs.shape[2].value

    w_omega = tf.Variable(tf.random_normal([n_lstm, n_attention], stddev=0.1))
    b_omega = tf.Variable(tf.random_normal([n_attention], stddev=0.1))
    u_omega = tf.Variable(tf.random_normal([n_attention], stddev=0.1))

    with tf.name_scope('v'):
        v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega)

    vu = tf.tensordot(v, u_omega, axes=1, name='vu')
    alphas = tf.nn.softmax(vu, name='alphas')
    output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)

    return output

### Creating the Main Model
the main model is the combination of Bidirectional LSTM, Attention Mechanism, Fully Connected and Softmax

In [72]:
def BiLSTM_Attention_FC_Sofmax(Input, n_time, n_contacts, n_neurons_lstm, n_attention, keep_prob,
                          W_1, b_1, W_2, b_2):

    Input = tf.reshape(Input, [-1, n_time, n_contacts])

    forward_lstm = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons_lstm,activation='tanh')
    backward_lstm = tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons_lstm,activation='tanh')

    lstm_fw_drop = tf.contrib.rnn.DropoutWrapper(cell=forward_lstm, input_keep_prob=keep_prob)
    lstm_bw_drop = tf.contrib.rnn.DropoutWrapper(cell=backward_lstm, input_keep_prob=keep_prob)

    outputs, _ = tf.compat.v1.nn.bidirectional_dynamic_rnn(lstm_fw_drop, lstm_bw_drop, Input, dtype=tf.float32)

    attention_output = Attention_Layer(outputs,n_attention)
    attention_output_drop = tf.nn.dropout(attention_output, keep_prob)

    FC_1 = tf.matmul(attention_output_drop, W_1) + b_1
    FC_1 = tf.layers.batch_normalization(FC_1, training=True)
    FC_1 = tf.nn.softplus(FC_1)
    FC_1 = tf.nn.dropout(FC_1, keep_prob)

    FC_2 = tf.matmul(FC_1, W_2) + b_2
    
    output = tf.nn.softmax(FC_2)

    return output

### Loading the Model

In [86]:
y_pred = BiLSTM_Attention_FC_Sofmax(Input=X,n_time=n_time,n_neurons_lstm=n_neurons_lstm,
                                n_contacts=n_contacts,n_attention=n_attention,keep_prob=dropout_prob,
                                W_1=W_1,b_1=b_1,W_2=W_2,b_2=b_2)

### Loading Loss Function, Optimizer and Evaluation Function

In [None]:
loss_fn = loss_l2(y=y, y_pred=y_pred,l2_norm=0.001)
Optimizer = tf.train.AdamOptimizer(lr).minimize(loss_fn)
Global_Average_Accuracy = evaluation(y=y, y_pred=y_pred)

merged = tf.summary.merge_all()

In [None]:
# Initialize all the variables
sess.run(tf.global_variables_initializer())

### Training and Testing the Model

In [None]:
for epoch in range(num_epoch):

    # Decaying the learning rate
    learning_rate = sess.run(lr)
    if epoch % lr_decay_epoch == 0 and epoch != 0:
        if learning_rate > 1e-6:
            lr = lr * lr_decay
            sess.run(lr)            

    for batch_index in range(n_batch):
        random_batch = random.sample(range(train_data.shape[0]), batch_size)
        batch_xs = train_data[random_batch]
        batch_ys = train_labels[random_batch]
        sess.run(Optimizer, feed_dict={X: batch_xs, y: batch_ys, dropout_prob: keep_rate})


    train_accuracy, train_loss = sess.run([Global_Average_Accuracy, loss_fn], feed_dict={X: train_data[0:100], y: train_labels[0:100], keep_prob: 1.0})
    Test_summary, test_accuracy, test_loss = sess.run([merged, Global_Average_Accuracy, loss_fn],
                                                      feed_dict={X: test_data, y: test_labels, dropout_prob: 1.0})

    # Print Model Accuracy
    print("Iter " + str(epoch) + ", Testing Accuracy: " + str(test_accuracy) + ", Training Accuracy: " + str(train_accuracy))
    print("Iter " + str(epoch) + ", Testing Loss: " + str(test_loss) + ", Training Loss: " + str(train_loss))
    print("Learning rate is ", learning_rate)
    print('\n')

    if epoch == num_epoch:
        output_prediction = sess.run(y_pred, feed_dict={X: test_data, y: test_labels, dropout_prob: 1.0})

sess.close()