In [1]:
import tensorflow as tf
slim = tf.contrib.slim
import numpy as np
from IPython.display import clear_output
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

In [2]:
# load and split train validation set
X = np.load('../data/features/data_x.npy')
Y = np.load('../data/features/data_y.npy')

tr_features, tr_labels = shuffle(X, Y, random_state=0)
a = tr_features.shape[0]

tr_features = tr_features[:int(a*0.3)]
tr_labels = tr_labels[:int(a*0.3)]


train_features, test_features, train_labels, test_labels = train_test_split(tr_features,
                                                                            tr_labels, 
                                                                            test_size = 0.3, 
                                                                            random_state = 42)

print('Train shape: ', train_features.shape)
print('Test shape: ', test_features.shape)

Train shape:  (573254, 20)
Test shape:  (245681, 20)


# CNN+RNN arcitecture from 

https://www.isca-speech.org/archive/Interspeech_2017/pdfs/0360.PDF

In [3]:
# network parameters

n_dim = tr_features.shape[1]
n_steps = 30
n_hidden = 300
n_classes = 2
learning_rate = 0.01
batch_size = 100      # Training batch size


In [4]:
# Network structure


X = tf.placeholder(tf.float32,[None,n_dim])
Y = tf.placeholder(tf.float32,[None,n_classes])

############ Conv-1 ###############
net = slim.conv2d(X, 16, [5, 5], stride=[1, 1], scope='conv1')
net = PReLU(net, 'conv1_activation')
net = tf.nn.max_pool2d(net, strides=[2,1], ksize=[2,2], padding='VALID', name='pool1')

############ Conv-2 ###############
net = slim.conv2d(net, 16, [1, 1], stride=[1, 1], scope='conv2a')
net = PReLU(net, 'conv2a_activation')
net = slim.conv2d(net, 32, [1, 1], stride=[1, 1], scope='conv2b')
net = PReLU(net, 'conv2b_activation')
net = tf.nn.max_pool2d(net, strides=[2,1], ksize=[2,2], padding='VALID', name='pool2')

############ Conv-3 ###############
net = slim.conv2d(net, 32, [1, 1], stride=[1, 1], scope='conv3a')
net = PReLU(net, 'conv3a_activation')
net = slim.conv2d(net, 16, [3, 3], stride=[1, 1], scope='conv3b')
net = PReLU(net, 'conv3b_activation')
net = tf.nn.max_pool2d(net, strides=[2,1], ksize=[2,2], padding='VALID', name='pool3')

############ BGRU ###############
def BGRU(x, weight, bias, n_steps, n_hidden):
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)
    
    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
    x = tf.unstack(x, n_steps, 1)
    
    # Define GRU cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = tf.nn.rnn_cell.GRUCell(n_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = tf.nn.rnn_cell.GRUCell(n_hidden, forget_bias=1.0)

    # Get BiRNN cell output
    outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weight) + bias
    
weight = tf.Variable(tf.random_normal([n_hidden, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))
output_logits = BGRU(net, weight, bias, n_steps, n_hidden)
y_pred = tf.nn.softmax(output_logits)


In [None]:
########## cost funstion and optimizer #############
# Model predictions
cls_prediction = tf.argmax(output_logits, axis=1, name='predictions')

# Define the loss function, optimizer, and accuracy
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=output_logits), 
                      name='loss')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='Adam-op').minimize(loss)
correct_prediction = tf.equal(tf.argmax(output_logits, 1), tf.argmax(Y, 1), name='correct_pred')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

#
init = tf.global_variables_initializer()

In [None]:
# tensorboard
session = tf.Session()
#to create the graph summary
writer = tf.summary.FileWriter('logdir', session.graph)

In [None]:
def get_next_batch(x, y, start, end):
    x_batch = x[start:end]
    y_batch = y[start:end]
    return x_batch, y_batch


sess = tf.InteractiveSession()
sess.run(init)
global_step = 0
# Number of training iterations in each epoch
num_tr_iter = int(len(train_features) / batch_size)
for epoch in range(epochs):
    print('Training epoch: {}'.format(epoch + 1))
    for iteration in range(num_tr_iter):
        global_step += 1
        start = iteration * batch_size
        end = (iteration + 1) * batch_size
        x_batch, y_batch = get_next_batch(train_features, train_labels, start, end)
        x_batch = x_batch.reshape((batch_size, timesteps, n_dim))
        # Run optimization op (backprop)
        feed_dict_batch = {x: x_batch, y: y_batch}
        sess.run(optimizer, feed_dict=feed_dict_batch)

        if iteration % display_freq == 0:
            # Calculate and display the batch loss and accuracy
            loss_batch, acc_batch = sess.run([loss, accuracy],
                                             feed_dict=feed_dict_batch)

            print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
                  format(iteration, loss_batch, acc_batch))

    # Run validation after every epoch

    feed_dict_valid = {x: test_features[:1000].reshape((-1, n_steps, n_dim)), y: test_labels[:1000]}
    loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
    print('---------------------------------------------------------')
    print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
          format(epoch + 1, loss_valid, acc_valid))
    print('---------------------------------------------------------')
 