# Building and Training a Neural Network

In the notebook, we load a dataset of positives and negatives, split them into 80% for training, 10% for validation and 10% for testing. We then build a Multi-Layer Percepctron (MLP) Neural Network using Tensorflow and train it using the training dataset and subsequently evaluate it using the validation and testing datasets.

Finally, we will use `tf.train.Saver` to save a checkpoint of the model.

In [1]:
import numpy as np
import tensorflow as tf
np.random.seed(0)

### Load and Split the Dataset

We first load the dataset files and split them into train (80%), validation (10%) and test (10%).

In [2]:
def load_dataset(name, perc):
    import glob
    files = sorted(glob.glob('datasets/processed/{}.*'.format(name)))
    np.random.shuffle(files)
    files = files[:int(len(files) * perc)]
    dataset = []
    for f in files:
        dataset.append(np.load(f))
    return np.vstack(dataset)

positives = load_dataset('positives', perc=.3)
negatives = load_dataset('negatives', perc=.05)

In [3]:
def split_dataset(data):
    np.random.shuffle(data)
    train_s = int(len(data) * .8)
    test_s = int(len(data) * .1)
    train = data[:train_s]
    valid = data[train_s:train_s + test_s]
    test = data[train_s + test_s:]
    return train, valid, test

train_p, valid_p, test_p = split_dataset(positives)
train_n, valid_n, test_n = split_dataset(negatives)

print 'Train: [N] {} [P] {}'.format(len(train_n), len(train_p))
print 'Valid: [N] {} [P] {}'.format(len(valid_n), len(valid_p))
print 'Test:  [N] {} [P] {}'.format(len(test_n), len(test_p))

Train: [N] 5760 [P] 3680
Valid: [N] 720 [P] 460
Test:  [N] 720 [P] 460


In [4]:
# Merge positive and negative training inputs and labels and shuffle them

def merge_datasets(negatives, positives):
    labels_p = np.ones((len(positives), 1), dtype=np.float32)
    labels_n = np.zeros((len(negatives), 1), dtype=np.float32)
    merged = np.concatenate([negatives, positives], axis=0)
    labels = np.concatenate([labels_n, labels_p], axis=0)
    indexes = np.arange(len(merged))
    np.random.shuffle(indexes)
    return merged[indexes], labels[indexes]


train, labels = merge_datasets(train_n, train_p)

In [5]:
# Generator function for yielding batches from a dataset

def batch_looper(inputs, labels, batch_size, loop=False):
    """yield tuples of (batch_size, inputs) and (batch_size, labels)"""
    i = 0
    batch = []
    while True:
        for record in zip(inputs, labels):
            batch.append(record)
            i += 1
            if i % batch_size == 0:
                yield zip(*batch)
                batch = []
        if not loop:
            yield zip(*batch)
            break

In [6]:
batches = batch_looper(train, labels, 5)
b1, l1 = next(batches)
print 'features'
print np.vstack(b1)
print 'labels'
print np.vstack(l1)

features
[[ 0.  1.  0. ...,  0.  0.  0.]
 [ 1.  0.  0. ...,  0.  0.  0.]
 [ 1.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 1.  0.  0. ...,  0.  0.  0.]]
labels
[[ 0.]
 [ 0.]
 [ 0.]
 [ 1.]
 [ 1.]]


### Construct the Neural Network

We construct a MLP with two hidden layers.

#### Placeholders

We define placeholders for our input features (`x`) and labels (`y`)

In [7]:
# input size is equal to the size of our concatenated features
input_size = len(train[0])
# input vector
x = tf.placeholder(dtype=tf.float32, shape=(None, input_size))
# labels vector
y = tf.placeholder(dtype=tf.float32, shape=(None, 1))

#### Hyper Parameters

Define the number of layers and their respective sizes as well as the learning rate.

In [8]:
HP = {
    'layers': [(1000, tf.sigmoid), (900, tf.sigmoid)],
    'lr': 0.005
}

#### Layers

We define two hidden layers (sizes `1000` and `900` respectively) and the output layer (size 1) and apply the `tf.sigmoid` activation in each layer.

In [9]:
def init_layer(input_tensor, input_size, output_size, activation, name):
    W_init = tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32)
    W = tf.get_variable(name='{}_W'.format(name), shape=(input_size, output_size),
                        initializer=W_init, dtype=tf.float32)
    b_init = tf.constant_initializer(value=0.1, dtype=tf.float32)
    b = tf.get_variable(name='{}_b'.format(name), shape=(output_size,), initializer=b_init,
                        dtype=tf.float32)
    return activation(tf.matmul(input_tensor, W) + b)       

In [10]:
current_input = x
current_size = input_size
layers = []

for i, (layer_size, activation) in enumerate(HP['layers'], 1):
    layer = init_layer(current_input, current_size, layer_size, activation, name='layer_{}'.format(i))
    current_input = layer
    current_size = layer_size
    layers.append(layer)

# output layer
output = init_layer(layer, current_size, 1, tf.sigmoid, name='output')

#### Loss function

In [11]:
# loss function
loss = tf.reduce_mean(tf.pow(output - y, 2))

In [12]:
# Summaries for Tensorboard
current_epoch = tf.placeholder(tf.float32)
sum_loss = tf.Variable(initial_value=0., trainable=False, dtype=tf.float32)
add_loss = tf.assign_add(sum_loss, loss)
mean_loss = sum_loss / current_epoch
tf.summary.scalar('loss', loss)
tf.summary.scalar('mean_loss', mean_loss)

<tf.Tensor 'mean_loss:0' shape=() dtype=string>

#### Optimizer

We train using Gradient Decent with a learning rate of `0.005`.

In [13]:
# train step
train_step = tf.train.GradientDescentOptimizer(learning_rate=HP['lr']).minimize(loss)

In [14]:
# Initialize a saver for saving the model
saver = tf.train.Saver()

### Training

We start a new Tensorflow `Session` and initialize all variables (the weight and bias matrixes defined in each layer). We then iterate through batches from our training dataset and run the training step with each batch.

In [15]:
# initialize a new tensorflow session with the default Graph
session = tf.Session()
merged = tf.summary.merge_all()
train_summary = tf.summary.FileWriter('./train', session.graph)
session.run(tf.global_variables_initializer())

In [16]:
train_batches = batch_looper(train, labels, 10, loop=True)
total_epochs = 0
for e in range(1, 80 + 1):
    for i in range(1, 20 + 1):
        total_epochs += 1
        batch, lbl = next(train_batches)
        soutput = session.run([merged, train_step, loss, add_loss, mean_loss],
                              feed_dict={x: batch, y: lbl, current_epoch: total_epochs})
        summary = soutput[0]
        train_summary.add_summary(summary, total_epochs)
    meanloss = soutput[-1]
    print '[{}] Loss: {:.5f}'.format(e, meanloss)

[1] Loss: 0.31048
[2] Loss: 0.27416
[3] Loss: 0.26139
[4] Loss: 0.25563
[5] Loss: 0.25179
[6] Loss: 0.24979
[7] Loss: 0.24697
[8] Loss: 0.24483
[9] Loss: 0.24377
[10] Loss: 0.24293
[11] Loss: 0.24299
[12] Loss: 0.24099
[13] Loss: 0.23922
[14] Loss: 0.23900
[15] Loss: 0.23818
[16] Loss: 0.23652
[17] Loss: 0.23565
[18] Loss: 0.23428
[19] Loss: 0.23493
[20] Loss: 0.23397
[21] Loss: 0.23331
[22] Loss: 0.23320
[23] Loss: 0.23293
[24] Loss: 0.23219
[25] Loss: 0.23186
[26] Loss: 0.23189
[27] Loss: 0.23116
[28] Loss: 0.23101
[29] Loss: 0.23086
[30] Loss: 0.23079
[31] Loss: 0.23067
[32] Loss: 0.23057
[33] Loss: 0.23008
[34] Loss: 0.22989
[35] Loss: 0.22923
[36] Loss: 0.22893
[37] Loss: 0.22887
[38] Loss: 0.22833
[39] Loss: 0.22784
[40] Loss: 0.22753
[41] Loss: 0.22740
[42] Loss: 0.22713
[43] Loss: 0.22689
[44] Loss: 0.22638
[45] Loss: 0.22592
[46] Loss: 0.22574
[47] Loss: 0.22555
[48] Loss: 0.22546
[49] Loss: 0.22506
[50] Loss: 0.22471
[51] Loss: 0.22454
[52] Loss: 0.22457
[53] Loss: 0.22415
[5

### Testing

To evaluate accuracy, we score both negatives and positives using the trained model.

We compute the accuracy by considering scores > 0.5 as positives (**1**) otherwise as negatives (**0**) and compare the result with the correct labels. The accuracy is the average number of correct predictions.

In [17]:
def evaluate(positives, negatives):
    scores = []
    labels = []
    
    pos_labels = np.ones((len(positives),), dtype=np.bool)
    
    # score positives
    for batch, _ in batch_looper(positives, pos_labels, 100):
        scores.extend(session.run(output, feed_dict={x: batch}).flatten())
    labels.extend(pos_labels)
    
    neg_labels = np.zeros((len(negatives),), dtype=np.bool)
    
    # score negatives
    for batch, _ in batch_looper(negatives, neg_labels, 100):
        scores.extend(session.run(output, feed_dict={x: batch}).flatten())
    labels.extend(neg_labels)
    
    scores = np.array(scores, dtype=np.float32)
    labels = np.array(labels, dtype=np.bool)
    
    # compute accuracy
    scores = (scores > .5)
    accuracy = (scores == labels).mean()
    
    print 'Accuracy: {}'.format(accuracy)

In [18]:
evaluate(valid_p, valid_n)

Accuracy: 0.699152542373


In [19]:
evaluate(test_p, test_n)

Accuracy: 0.708474576271


### Save the model

In [20]:
# add placeholder and prediction op into collections for easy access on load
tf.add_to_collection('inputs', x)
tf.add_to_collection('predictor', output)

In [21]:
saver.save(session, './models/mlp', global_step=total_epochs)

'./models/mlp-1600'