In [61]:
import torch
import random
import numpy as np  # numpy
import torch.nn as nn  # nn objects
import torch.optim as optim  # nn optimizers
import matplotlib.pyplot as plt
%matplotlib notebook

## custom packages ##
from networkUtils import recurrentNet as rn
from taskUtils import generate as gen
from trainUtils import trainer as tn
from testUtils import test, plot

# Set device to gpu if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [62]:
# Hyperparameters
input_size = 5
num_classes = 2  # the number of units in the output layer
hidden_size = 10  # the number of units in the recurrent layer
batch_size = 1  # batch size = # of samples to average when computing gradient
num_layers = 1  # number of stacked RNN layers
eta = 0.001  # learning rate
epochs = 500  # epochs = # of full pases through dataset
num_networks = 5 # number of networks to average when calculating loss

In [63]:
# Loss function, optimizer, and schedule (for decaying learning rate)
criterion = nn.CrossEntropyLoss()  # loss function

In [64]:
def mean_loss(num_networks, condition, verbose=False):
    seqlen1, seqlen2, seqlen3 = condition[0], condition[1], condition[2]
    mean_loss = np.array([])
    for i in range(num_networks):
        net = rn.RecurrentXORNet(input_size, hidden_size, num_layers, num_classes, batch_size).to(device)
        optimizer = optim.Adam(net.parameters(), eta)  # tells optimizer to adjust all parameter weights with steps based on eta
        sheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, verbose=False) # lowers lr if the loss global min doesn't decrease for 5 epochs
        dataset, targets, sequence_length = gen.generate_dataset(False, input_size, seqlen1, seqlen2, seqlen3)
        loss = tn.train_network(net, dataset, targets, sequence_length, input_size, batch_size, epochs, optimizer, criterion, sheduler, verbose)
        if i == 0:
            mean_loss = loss
        else:
            mean_loss = mean_loss + loss
    mean_loss = mean_loss/num_networks
    return mean_loss

In [65]:
inputs_at_beginning = [0, 0, 10] # inputs placed in the beginning
inputs_at_end = [0, 0, 10] # inputs placed in the end
inputs_at_beginning_middle = [0, 0, 10] # inputs placed in the beginning and middle
inputs_at_middle = [0, 0, 10] # inputs placed directly in the middle

In [66]:
mean_loss_1 = mean_loss(num_networks, inputs_at_beginning)
mean_loss_2 = mean_loss(num_networks, inputs_at_end)
mean_loss_3 = mean_loss(num_networks, inputs_at_beginning_middle)
mean_loss_4 = mean_loss(num_networks, inputs_at_middle)

In [67]:
# plot losses
plot.plot_four_losses("Average Effect of Position on Network Loss for 5 networks", 
                 mean_loss_1, mean_loss_2, mean_loss_3, mean_loss_4)
plt.legend(["Inputs at beginning", "Inputs at end", "Inputs at beginning/middle", "Inputs at middle"])
plt.show()

<IPython.core.display.Javascript object>