# Analogous Recurrent ANN Trainer

This notebook can be used to generate and train a recurrent ANN so that the weights can be copied over to a SNN with the same architecture.

## Imports

In [2]:
import torch
import numpy as np
from torch import nn
import matplotlib.pyplot as plt
from gensim.models import Word2Vec
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

## Load Data

In [3]:
# load all data and prepare for vector conversion

# load data
f = open("Training Data\\train_5500.txt")
data = f.read()

# split data into sentences
sents = data.split('\n')

# split each sentence into words
for i in range(len(sents)):
    sents[i] = sents[i].split(' ')[:-1]

In [4]:
# perform word to vector conversion

# load in word to vector converter
w2v_load = Word2Vec.load("Usable Word2Vec Model\\word2vec.model")
wv = w2v_load.wv

# perform conversion
vec_data = []
for sent in sents[:-1]:
    vecs = []
    vecs.append(sent[0])
    for word in sent[1:]:
        try:
            vecs.append(torch.from_numpy(wv[word]))
        except:
            pass
    vecs.append(torch.zeros(64))
    vec_data.append(vecs)

# pad all sentences to length of longest sentence
max_len = max([len(sent) for sent in vec_data])
vec_data_pad = []
for sent in vec_data:
    pad_len = max_len - len(sent)
    for i in range(pad_len):
        sent.append(torch.zeros(64))
    vec_data_pad.append(sent)
vec_data = vec_data_pad

# split into training and test data
train_data = vec_data[:5000]
test_data = vec_data[5000:-1]

# NOTE: first word of each sentence is correct categ. -- last sentence is empty (excluded)

  vecs.append(torch.from_numpy(wv[word]))


In [5]:
# create DataSet which can be used with PyTorch DataLoader

ans_key = { 'DESC' :  0,
            'ENTY' :  1,
            'ABBR' :  2,
            'HUM'  :  3,
            'LOC'  :  4,
            'NUM'  :  5 }

class QuestionDataset(Dataset):
    """ Question Dataset """
    
    def __init__(self, data):
        """
        data = list of (list of words -- first word is label)
        """
        self.labels = []
        self.sents = []
        for sent in data:
            lab_val = ans_key[sent[0].split(":")[0]]
            lab_arr = torch.tensor(lab_val)
            self.labels.append(lab_arr)
            self.sents.append(sent[1:])
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        sent = self.sents[idx]
        label = self.labels[idx]
        return sent, label

train_DSet = QuestionDataset(train_data)
test_DSet = QuestionDataset(test_data)

In [6]:
# Create data loaders.

batch_size = 1
train_dataloader = DataLoader(train_DSet, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_DSet, batch_size=batch_size, shuffle=True)

for X, y in test_dataloader:
    print(X)
    print(y)
    break

[tensor([[-0.7501, -0.3085, -0.3712,  1.2873, -0.3685, -0.1576,  0.6909, -0.1197,
         -1.3665, -0.5399,  0.5410,  0.3175, -0.8711, -1.6558, -0.6625,  1.4596,
         -0.4190, -0.1719, -0.4393,  0.9215,  0.7218,  0.0266,  1.7388, -0.4094,
          0.4647,  2.1667,  0.4310, -0.7656, -0.4223, -1.2149, -0.5025, -1.2658,
         -2.3934,  0.1058, -1.4723,  0.7023,  0.6207, -1.3136, -0.9164,  0.7105,
         -0.5051,  0.5171,  0.5749, -1.5660,  2.4641, -1.1979,  0.6296, -0.5382,
          0.9257, -0.2696,  0.7026,  0.8137,  0.6088,  3.1219, -0.6065,  0.5419,
          0.7573, -0.5837,  0.0873,  0.8292,  0.5570,  0.4734,  0.3817,  2.1990]]), tensor([[-0.1187,  0.4110,  0.4454,  0.4726, -0.5734, -0.6245,  1.2417, -0.2847,
         -0.9390, -0.3459,  0.2911,  0.8602, -1.7420, -0.7528, -0.7632,  1.7993,
          1.9305, -0.6733,  0.1016,  0.8474,  1.0793,  0.7145, -0.0355, -1.8987,
          0.5883,  1.7181, -1.1460,  0.1994, -0.3177, -2.5541,  1.2290, -0.7425,
         -2.8377,  0.805

## Create Model

In [6]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        """ Builds recurrent neural network model """
        super().__init__()
        self.ff = nn.Sequential(
            nn.Linear(64, 48, bias=False),
            nn.ReLU(),
        )
        self.rnn = nn.RNN(48, 16, nonlinearity='relu', bias=False)
        self.out = nn.Sequential(
            nn.Linear(16, 6, bias=False),
            nn.LogSoftmax(dim=1),
        )

    def forward(self, q):
        """ Implements feed-forward then recurrent layer """
        ff_q = []
        for word in q:
            ff_q.append(self.ff(word))
        ff_q = torch.stack(ff_q)
        h_N = Variable(torch.zeros(1, 16)).to(device)
        for word in ff_q:
            if not torch.all(word.eq(0)):
                rnn_out, h_N = self.rnn(word, h_N)
            else:
                break
        output = self.out(rnn_out)
        return output

model = NeuralNetwork().to(device)
print(model)

Using cuda device
NeuralNetwork(
  (ff): Sequential(
    (0): Linear(in_features=64, out_features=48, bias=False)
    (1): ReLU()
  )
  (rnn): RNN(48, 16, bias=False)
  (out): Sequential(
    (0): Linear(in_features=16, out_features=6, bias=False)
    (1): LogSoftmax(dim=1)
  )
)


In [1]:
torch.qint4

NameError: name 'torch' is not defined

In [7]:
# check model

param_list = [*model.parameters()]
len(param_list)

4

## Optimize Model

In [8]:
# create loss function and optimizer
loss_fn = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# define training routine
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = torch.stack(X).to(device), y.to(device)

        # Compute prediction error and optimize
        optimizer.zero_grad()
        for word in X:
            if torch.all(word.eq(0)):
                print("FLAG!")
                print(X)
                break
            else:
                break
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        
        # clamp diagonal hidden-hidden weights of RNN layer to 0
        model.rnn._parameters['weight_hh_l0'].data.diagonal().clamp_(min=0, max=0)
        
        # print loss and accuracy at selected iterations
        if batch % 1000 == 0:
            loss, current = loss.item(), (batch + 1)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

# define test routine
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    i = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = torch.stack(X).to(device), y.to(device) #torch.FloatTensor(y).to(device)
            pred = model(X)
            target = y#.argmax(1)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == target).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [9]:
total_epochs = 0

In [14]:
# perform training and test performance over epochs

epochs = 60
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")
total_epochs += epochs
print("\nTotal epochs: {0}".format(total_epochs))

Epoch 1
-------------------------------
loss: 2.115939  [    1/ 5000]
loss: 0.000001  [ 1001/ 5000]
loss: 1.166997  [ 2001/ 5000]
loss: 0.673601  [ 3001/ 5000]
loss: 1.069983  [ 4001/ 5000]
Test Error: 
 Accuracy: 70.3%, Avg loss: 0.796557 

Epoch 2
-------------------------------
loss: 0.629578  [    1/ 5000]
loss: 1.545170  [ 1001/ 5000]
loss: 0.558245  [ 2001/ 5000]
loss: 0.004281  [ 3001/ 5000]
loss: 0.016951  [ 4001/ 5000]
Test Error: 
 Accuracy: 72.7%, Avg loss: 0.787113 

Epoch 3
-------------------------------
loss: 2.344981  [    1/ 5000]
loss: 0.496069  [ 1001/ 5000]
loss: 0.483845  [ 2001/ 5000]
loss: 0.234901  [ 3001/ 5000]
loss: 0.318510  [ 4001/ 5000]
Test Error: 
 Accuracy: 72.9%, Avg loss: 0.767776 

Epoch 4
-------------------------------
loss: 0.039172  [    1/ 5000]
loss: 0.000000  [ 1001/ 5000]
loss: 0.135399  [ 2001/ 5000]
loss: 0.182714  [ 3001/ 5000]
loss: 0.446136  [ 4001/ 5000]
Test Error: 
 Accuracy: 73.4%, Avg loss: 0.726187 

Epoch 5
------------------------

Test Error: 
 Accuracy: 69.8%, Avg loss: 0.888721 

Epoch 35
-------------------------------
loss: 0.037196  [    1/ 5000]
loss: 0.000156  [ 1001/ 5000]
loss: 0.012394  [ 2001/ 5000]
loss: 0.000389  [ 3001/ 5000]
loss: 0.000002  [ 4001/ 5000]
Test Error: 
 Accuracy: 71.2%, Avg loss: 0.903190 

Epoch 36
-------------------------------
loss: 0.007299  [    1/ 5000]
loss: 0.000213  [ 1001/ 5000]
loss: 0.562013  [ 2001/ 5000]
loss: 1.536533  [ 3001/ 5000]
loss: 0.000000  [ 4001/ 5000]
Test Error: 
 Accuracy: 72.9%, Avg loss: 0.821677 

Epoch 37
-------------------------------
loss: 8.949102  [    1/ 5000]
loss: 0.142331  [ 1001/ 5000]
loss: 2.277160  [ 2001/ 5000]
loss: 0.000000  [ 3001/ 5000]
loss: 0.006997  [ 4001/ 5000]
Test Error: 
 Accuracy: 72.1%, Avg loss: 0.917136 

Epoch 38
-------------------------------
loss: 0.337495  [    1/ 5000]
loss: 0.082520  [ 1001/ 5000]
loss: 0.000000  [ 2001/ 5000]
loss: 0.035197  [ 3001/ 5000]
loss: 1.696650  [ 4001/ 5000]
Test Error: 
 Accuracy: 72.1

In [15]:
# in case training takes a wrong turn
temp_model_backup = model

In [16]:
# check all weights

param_list = [*model.parameters()]
i = 0
for lay in param_list:
    i += 1
    print("Layer {0}".format(i))
    print(lay.shape)
    print(lay)

Layer 1
torch.Size([48, 64])
Parameter containing:
tensor([[-0.0706,  0.8817,  0.1850,  ..., -0.0875,  0.0828,  0.6999],
        [ 0.9073,  0.2710,  0.9453,  ..., -0.1008, -0.8766, -1.6422],
        [ 0.1462,  0.1366,  0.5575,  ..., -0.7116, -0.1804, -0.7887],
        ...,
        [-0.7479, -0.2158,  0.9840,  ...,  0.0507,  0.1247,  0.4779],
        [-0.0898, -0.0664,  0.1271,  ..., -0.3905,  0.2266,  0.6929],
        [-0.1669, -0.0281, -0.6965,  ...,  0.7204,  1.1612, -0.3478]],
       device='cuda:0', requires_grad=True)
Layer 2
torch.Size([16, 48])
Parameter containing:
tensor([[-6.4465e-02, -1.0213e+00,  9.9517e-01,  4.1901e-01, -3.5492e-01,
         -1.1138e+00, -4.2743e-01,  5.3006e-01, -1.8814e+00,  3.3486e-01,
          4.3346e-01,  5.5507e-01,  2.1036e-01, -1.5811e-01, -1.6214e-01,
          4.8606e-01, -8.9539e-01, -1.2283e+00, -3.0397e-01, -1.6822e+00,
         -2.2543e+00, -1.4987e+00, -1.9769e+00,  1.3467e+00,  9.5415e-02,
         -1.3498e-01, -3.2551e+00,  3.0033e+00, -8

## Save Model

In [17]:
model_dir = "Recurrent ANN Models/"
model_name = "RANN_13.pth"

torch.save(model.state_dict(), model_dir + model_name)
print("Saved PyTorch Model State to " + model_dir + model_name)

Saved PyTorch Model State to Recurrent ANN Models/RANN_12.pth
