In [38]:
import pickle

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader
from torch.nn import Embedding, RNN
from sklearn.model_selection import train_test_split
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

# Set any global values

In [39]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f572a5c7810>

In [40]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


# Load the training data

In [41]:
with open('training_data.pickle','rb') as f:
    training_data = pickle.load(f)

In [None]:
class_to_ix = {"Found":0, "Unfound":1}
all_letters = "()ab~&|>"

vocab = ['<pad>'] + sorted(set([char for seq in all_letters for char in seq]))


n_classes = len(class_to_ix)
n_letters = len(all_letters)

In [43]:
X = []
Y = []
embed_dim = len(vocab)
embed = Embedding(len(vocab), embed_dim) # embedding_dim = len(vocab)
for element in training_data:
    input = [vocab.index(token) for token in element[0]]
    input_tensor = torch.tensor(input, dtype=torch.int)
    classification = class_to_ix[element[1]]
    X.append(input_tensor)
    output_tensor = torch.tensor(classification, dtype=torch.int)
    Y.append(output_tensor)

In [44]:
class TTPStyleDataset(Dataset):
    def __init__(self, X,Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        sample = [self.X[idx], self.Y[idx]]
        return sample

In [45]:
input_dataset = TTPStyleDataset(X,Y)

def my_collate_fn(data):
    (xx,yy) = zip(*data)
    x_lens = [len(x) for x in xx]
    y_lens = [1 for y in yy]

    xx_pad = pad_sequence(xx, batch_first=False, padding_value=0)
    yy_pad = torch.tensor(yy)

    return xx_pad, yy_pad, x_lens, y_lens
    

dataloader = DataLoader(input_dataset, shuffle=True, batch_size=16, num_workers=0, collate_fn=my_collate_fn)

# Neural Network

In [46]:
class SoftmaxRNN(nn.RNN):
    def __init_subclass__(cls):
        return super().__init_subclass__()

In [47]:
#input_size - The number of expected features in the input x
rnn = SoftmaxRNN(input_size=embed_dim, hidden_size=2, num_layers=1, nonlinearity='relu', bias=True, batch_first=False, dropout=0.0, bidirectional=False)

In [None]:
num_epochs = 5
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
optimizer = torch.optim.SGD(rnn.parameters(), lr=learning_rate)

criterion = nn.CrossEntropyLoss() 
size = len(dataloader.dataset)
for epoch in range(num_epochs):
    for batch,(x_padded, y_padded, x_lens, y_lens) in enumerate(dataloader):
        
        rnn.zero_grad()
        x_embed = embed(x_padded)
        x_packed = pack_padded_sequence(x_embed, x_lens, batch_first=False, enforce_sorted=False)
        output_packed, hidden = rnn(x_packed)
        output, seq_len = pad_packed_sequence(output_packed, batch_first=False)
        hidden = torch.reshape(hidden, (16,2))
        y_padded = torch.reshape(y_padded, (16,))
        y_padded = y_padded.long()
        loss = criterion(hidden, y_padded)
        
        loss.backward()
        optimizer.step()
        

        if batch % 10000 == 0:
            loss, current = loss.item(), (batch + 1) * len(x_padded)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

loss: 0.813728  [   28/176400]
loss: 0.558837  [ 2929/176400]
loss: 0.843796  [ 5829/176400]
loss: 0.581815  [ 8428/176400]
loss: 0.571478  [12030/176400]
loss: 0.445114  [14028/176400]
loss: 0.666364  [17429/176400]
loss: 0.658583  [18927/176400]
loss: 0.495002  [24030/176400]
loss: 0.555339  [25228/176400]
loss: 0.711224  [28028/176400]
loss: 0.492482  [33030/176400]
loss: 0.558448  [33628/176400]
loss: 0.443641  [39030/176400]
loss: 0.738062  [40629/176400]
loss: 0.547933  [43529/176400]
loss: 0.615398  [44828/176400]
loss: 0.722951  [52731/176400]
loss: 0.696647  [48627/176400]
loss: 0.612042  [55129/176400]
loss: 0.380701  [56028/176400]
loss: 0.675674  [60929/176400]
loss: 0.662487  [63829/176400]
loss: 0.656744  [69030/176400]
loss: 0.708699  [69629/176400]
loss: 0.623604  [70028/176400]
loss: 0.653996  [75429/176400]
loss: 0.397277  [78329/176400]
loss: 0.729692  [81229/176400]
loss: 0.456908  [84129/176400]
loss: 0.678236  [87029/176400]
loss: 0.506785  [86828/176400]
loss: 0.

In [None]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)

print(np.shape(input))
print(np.shape(target))
output.backward()
# Example of target with class probabilities
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
output = loss(input, target)
output.backward()

torch.Size([3, 5])
torch.Size([3])
