### Importing libraries

In [1]:
import os
import torch
import numpy as np
import torch.nn as nn
import tensorflow as tf
import matplotlib.pyplot as plt
import optparse

### Loading data

In [2]:
# Load data
trainx = np.load('data/train.npy', encoding='bytes')
trainy = np.load('data/train_labels.npy', encoding='bytes')
testx = np.load('data/test.npy', encoding='bytes')
valx = np.load('data/dev.npy', encoding='bytes')
valy = np.load('data/dev_labels.npy', encoding='bytes')

# Preprocessing data
trainx = np.concatenate(trainx.tolist())
trainy = np.concatenate(trainy.tolist())
testx = np.concatenate(testx.tolist())
valx = np.concatenate(valx.tolist())
valy = np.concatenate(valy.tolist())

# Turn into tensors
trainx = torch.from_numpy(trainx).float()
trainy = torch.from_numpy(trainy.astype(int))
testx = torch.from_numpy(testx).float()
valx = torch.from_numpy(valx).float()
valy = torch.from_numpy(valy.astype(int))

### Logging for Tensorboard

In [3]:
class Logger(object):
    """Logging in tensorboard without tensorflow ops."""

    def __init__(self, log_dir):
        self.writer = tf.summary.FileWriter(log_dir)

    def log_scalar(self, tag, value, step):
        """Log a scalar variable.
        Parameter
        ----------
        tag : Name of the scalar
        value : value itself
        step :  training iteration
        """
        # Notice we're using the Summary "class" instead of the "tf.summary" public API.
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
        self.writer.add_summary(summary, step)

    def log_histogram(self, tag, values, step, bins=1000):
        """Logs the histogram of a list/vector of values."""
        # Convert to a numpy array
        values = np.array(values)
        
        # Create histogram using numpy        
        counts, bin_edges = np.histogram(values, bins=bins)

        # Fill fields of histogram proto
        hist = tf.HistogramProto()
        hist.min = float(np.min(values))
        hist.max = float(np.max(values))
        hist.num = int(np.prod(values.shape))
        hist.sum = float(np.sum(values))
        hist.sum_squares = float(np.sum(values**2))

        # Requires equal number as bins, where the first goes from -DBL_MAX to bin_edges[1]
        # See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto#L30
        # Thus, we drop the start of the first bin
        bin_edges = bin_edges[1:]

        # Add bin edges and counts
        for edge in bin_edges:
            hist.bucket_limit.append(edge)
        for c in counts:
            hist.bucket.append(c)

        # Create and write Summary
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
        self.writer.add_summary(summary, step)
        self.writer.flush()

### Training

In [4]:
def training_routine(net, 
                     dataset,
                     n_iters, 
                     criterion=nn.CrossEntropyLoss(), 
                     batch_size=5000,
                     optim=None,
                     stats_freq=10, 
                     lr=0.1,
                     batch_val=False,
                     val_acc_logger=None,
                     train_acc_logger=None):

    # loggers for tensorboard visualization
    if val_acc_logger:
        vLog = Logger('./logs/{}'.format(val_acc_logger))
    if train_acc_logger:
        tLog = Logger('./logs/{}'.format(train_acc_logger))
    
    # organize the data
    train_data, train_labels, val_data, val_labels = dataset

    if not optim:
        optimizer=torch.optim.SGD(net.parameters(), lr=lr)
    else:
        optimizer = optim

    # GPU
    gpu = torch.cuda.is_available()
    if gpu:
        print('Using GPU')
        net = net.cuda()
    else:
        print('Not using GPU')

    # training
    for i in range(n_iters):

        train_output = []
        train_loss = []

        for j in range(0, train_data.shape[0], batch_size):
            # print progress
            if j % (batch_size * 50) == 0:
                print('\rEpoch {:4} Batch {:6} ({:.2%})'.format(i+1, j // batch_size, j / train_data.shape[0]), end='')
            
            # create batches
            b_train_data, b_train_labels = train_data[j : j + batch_size], train_labels[j : j + batch_size]

            # use gpu if possible
            if gpu:
                b_train_data, b_train_labels = b_train_data.cuda(), b_train_labels.cuda()

            # forward pass
            b_train_output = net(b_train_data)
            b_train_loss = criterion(b_train_output, b_train_labels)
            
            # backward pass and optimization
            b_train_loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
            # save output
            if len(b_train_output.shape) != 0:
                x = b_train_output
                x = x.cpu().detach()
                train_output.append(x)
            if b_train_loss:
                train_loss.append(b_train_loss.cpu().detach())
        
        if train_acc_logger:
            train_output = torch.cat(train_output)
            train_loss = torch.FloatTensor(train_loss)
            train_prediction = train_output.cpu().detach().argmax(dim=1)
            train_accuracy = (train_prediction.numpy()==train_labels.numpy()).mean()

            # saving training accuracy for tensorboard
            tr_info = { 'accuracy': train_accuracy }
            for tag, value in tr_info.items():
                tLog.log_scalar(tag, value, i+1)
            
#             # 2. Log values and gradients of the parameters (histogram summary)
#             for tag, value in net.named_parameters():
#                 tag = tag.replace('.', '/')
#                 tLog.log_histogram(tag, value.data.cpu().numpy(), i+1)
#                 tLog.log_histogram(tag+'/grad', value.grad.data.cpu().numpy(), i+1)
        

        # Once every 10 iterations, print statistics
        if (i+1) % stats_freq == 0 or i == 0:
            # override carriage return
            print("\rStatistics for epoch", i+1)

            # computing overall output and loss
            if not train_acc_logger:
                # we will have done it already, if training logging is active
                train_output = torch.cat(train_output)
                train_loss = torch.FloatTensor(train_loss)
                # compute the accuracy of the prediction
                train_prediction = train_output.cpu().detach().argmax(dim=1)
                train_accuracy = (train_prediction.numpy()==train_labels.numpy()).mean()
            
            if batch_val:
                total_val_loss = 0
                total_val_accuracy = 0
                for j in range(0, val_data.shape[0], batch_size):
                    # create batches
                    val_data_b = val_data[j : j + batch_size]
                    val_labels_b = val_labels[j : j + batch_size]
                    # use GPU if possible 
                    if gpu:
                        val_data_b, val_labels_b = val_data_b.cuda(), val_labels_b.cuda()
                    # Now for the validation set
                    val_output = net(val_data_b)
                    val_loss = criterion(val_output, val_labels_b)
                    # compute the accuracy of the prediction
                    val_prediction = val_output.cpu().detach().argmax(dim=1)
                    val_accuracy = (val_prediction.numpy() == val_labels_b.cpu().detach().numpy()).mean()
                    # sum up to get mean later
                    total_val_loss += val_loss
                    total_val_accuracy += val_accuracy
                # compute mean validation loss and accuracy for all batches
                val_loss = total_val_loss / (val_data.shape[0] // j)
                val_accuracy = total_val_accuracy / (val_data.shape[0] // j)
                
            else:
                # use GPU if possible 
                if gpu:
                    val_data, val_labels = val_data.cuda(), val_labels.cuda()
                # Now for the validation set
                val_output = net(val_data)
                val_loss = criterion(val_output, val_labels)
                # compute accuracy
                val_prediction = val_output.cpu().detach().argmax(dim=1)
                val_accuracy = (val_prediction.numpy() == val_labels.cpu().detach().numpy()).mean()
                
            
            print("Training loss :",train_loss.cpu().detach().numpy())
            print("Training accuracy :",train_accuracy)
            print("Validation loss :",val_loss.cpu().detach().numpy())
            print("Validation accuracy :",val_accuracy)
            print()
            
            if val_acc_logger:
                tr_info = { 'accuracy': val_accuracy }
                for tag, value in tr_info.items():
                    vLog.log_scalar(tag, value, i+1) 

            # try to save the neural network
            try:
                torch.save(net, 'neural_net')
            except:
                print('Could not save neural network.')

    net = net.cpu()

## Creating the model

In [5]:
# Creating NN
net = nn.Sequential(
    nn.Linear(40, 200),
    nn.LeakyReLU(),
    nn.Linear(200, 200),
    nn.LeakyReLU(),
    nn.Linear(200, 138)
)
epochs = 100
learning_rate = 0.1
batch_size = 10000
model_name = 'small_lrelu_momtm05'
optim = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.5)

In [None]:
# Creating NN
net = nn.Sequential(
    nn.Linear(40, 200),
    nn.LeakyReLU(),
    nn.Linear(200, 200),
    nn.LeakyReLU(),
    nn.Linear(200, 138)
)
epochs = 100
learning_rate = 0.1
batch_size = 10000
model_name = 'small_lrelu_momtm05_lrdecay'
optim = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.5)
torch.optim.lr_scheduler.ReduceLROnPlateau(optim)

In [10]:
# Creating NN
net = nn.Sequential(
    nn.Linear(40, 200),
    nn.Tanh(),
    nn.Linear(200, 200),
    nn.Tanh(),
    nn.Linear(200, 138)
)
epochs = 100
learning_rate = 0.1
batch_size = 10000
model_name = 'small_tanh'
optim = None

In [5]:
# Creating NN
net = nn.Sequential(
    nn.Linear(40, 200),
    nn.LeakyReLU(0.3),
    nn.Linear(200, 200),
    nn.LeakyReLU(0.3),
    nn.Linear(200, 138)
)
epochs = 100
learning_rate = 0.1
batch_size = 10000
model_name = 'small_lrelu03'
optim = None

In [9]:
# Creating NN
net = nn.Sequential(
    nn.Linear(40, 200),
    nn.LeakyReLU(0.05),
    nn.Linear(200, 200),
    nn.LeakyReLU(0.05),
    nn.Linear(200, 138)
)
epochs = 100
learning_rate = 0.1
batch_size = 10000
model_name = 'small_lrelu005'
optim = None

In [None]:
# Creating NN
net = nn.Sequential(
    nn.Linear(40, 200),
    nn.Tanh(),
    nn.Linear(200, 200),
    nn.Tanh(),
    nn.Linear(200, 138)
)
epochs = 100
learning_rate = 0.1
batch_size = 10000
model_name = 'small_tanh'
optim = None

### Training the model

In [None]:
training_routine(net, 
                 (trainx, trainy, valx, valy), 
                 epochs, 
                 lr=learning_rate, 
                 batch_size=batch_size, 
                 optim=optim,
                 train_acc_logger='train_acc_{}'.format(model_name),
                 val_acc_logger='val_acc_{}'.format(model_name))

Using GPU
Statistics for epoch 1
Training loss : [6.199172  9.244638  7.620674  ... 2.9390204 2.905887  2.5937262]
Training accuracy : 0.21786150485161326
Validation loss : 3.254351
Validation accuracy : 0.20562413528285028

Statistics for epoch 10
Training loss : [3.3608332 2.8489623 3.0729864 ... 2.6450016 2.635248  2.2794402]
Training accuracy : 0.29015752345867174
Validation loss : 3.0705924
Validation accuracy : 0.23563187478148617

Epoch   19 Batch   1500 (97.09%)

### Saving the model

In [9]:
try:
    torch.save(net, 'neural_net_{}'.format(model_name))
except:
    print('Could not save neural network.')