In [8]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
from IPython import embed
import pickle
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

import torch.nn.functional as F #functions
from torchvision import datasets, transforms

import sklearn.metrics as sk_m
from sklearn.metrics import plot_confusion_matrix

import os
import shutil

import sys
sys.path.append(os.path.abspath("/Users/andyvarner/Documents/NN_Spring2023/project_2"))
import dataset_methods

%matplotlib notebook

  Referenced from: <E7E99FB4-837B-39DF-9112-617A7DBD769D> /Users/andyvarner/mambaforge/envs/dev/lib/python3.8/site-packages/torchvision/image.so
  warn(


In [1]:
text = ['hey how are you','good i am fine','have a nice day']

# Join all the sentences together and extract the unique characters from the combined sentences
chars = set(''.join(text))

# Creating a dictionary that maps integers to the characters
int2char = dict(enumerate(chars))

# Creating another dictionary that maps characters to integers
char2int = {char: ind for ind, char in int2char.items()}

char2int

{' ': 0,
 'd': 1,
 'g': 2,
 'e': 3,
 'a': 4,
 'n': 5,
 'v': 6,
 'h': 7,
 'm': 8,
 'o': 9,
 'f': 10,
 'c': 11,
 'u': 12,
 'w': 13,
 'i': 14,
 'r': 15,
 'y': 16}

In [2]:
maxlen = len(max(text, key=len))
print("The longest string has {} characters".format(maxlen))

The longest string has 15 characters


In [3]:

# Padding

# A simple loop that loops through the list of sentences and adds a ' ' whitespace until the length of the sentence matches
# the length of the longest sentence
for i in range(len(text)):
    while len(text[i])<maxlen:
        text[i] += ' '

In [4]:

# Creating lists that will hold our input and target sequences
input_seq = []
target_seq = []

for i in range(len(text)):
    # Remove last character for input sequence
    input_seq.append(text[i][:-1])
    
    # Remove firsts character for target sequence
    target_seq.append(text[i][1:])
    print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))

Input Sequence: hey how are yo
Target Sequence: ey how are you
Input Sequence: good i am fine
Target Sequence: ood i am fine 
Input Sequence: have a nice da
Target Sequence: ave a nice day


In [5]:
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]

In [6]:

dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    # Creating a multi-dimensional array of zeros with the desired output shape
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    
    # Replacing the 0 at the relevant character index with a 1 to represent that character
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

In [9]:
input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)
print("Input shape: {} --> (Batch Size, Sequence Length, One-Hot Encoding Size)".format(input_seq.shape))

Input shape: (3, 14, 17) --> (Batch Size, Sequence Length, One-Hot Encoding Size)


In [10]:
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

In [20]:
class ChessRNN(torch.nn.Module): #inherits from nn.Module
    
    def __init__(self, input_size, hidden_size, num_layers, lr, output_size):
        
        super(ChessRNN, self).__init__() #initialize nn.Module
        
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, num_layers=num_layers)
        self.fc = nn.Linear(hidden_size, output_size)
        self.lr = lr
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        
    def forward(self, x):
        
        batch_size = x.size(0)
        
        hidden = self.init_hidden(batch_size)
        
        out, hidden = self.rnn(x, hidden)
        out = self.fc(hidden[-1])
        
        return out, hidden
        
    def init_optimizer(self):
        
        self.optimizer = torch.optim.Adam(self.parameters(), lr = self.lr)


    def objective(self, outputs, labels): # this is the loss function

        loss = torch.nn.CrossEntropyLoss(outputs, labels)
        
        return loss
    
    def init_hidden(self, batch_size):
    
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        
        return hidden
    
def train_rnn(model, train_dataset, test_dataset, num_epochs = 10, rate = 5):
    
    criterion = nn.CrossEntropyLoss()
    
    training_loss, cf_matrices = [], []

    model.init_optimizer()
    
    for epoch in range(num_epochs):
        epoch_loss = 0
    
        # Set model to train mode
        model.train()

        # Loop over the train data
        
        for i, data, in enumerate(tqdm(train_dataset, desc = "Train Epoch %s" % epoch)):

            turns = data[0]  #inputs
            winner = data[1] #labels
            

            
            # Forward pass
            outputs, hidden = model(turns)

            #loss = torch.nn.functional.mse_loss(outputs, winner)
            loss = criterion(outputs,winner.view(-1).long())
            
            # Zero out gradients
            model.optimizer.zero_grad()
            
            # Backward pass
            loss.backward()
            
            model.optimizer.step()

            # Update metrics

            epoch_loss += loss.item()

        epoch_loss = epoch_loss / (i + 1)

        print(f"epoch {epoch}, epoch_loss={epoch_loss}")

        training_loss.append(epoch_loss)

#         # Validate network
        
        if(epoch % rate == 0):
            
            model.eval()
            
            acc = 0
            all_labels, all_preds = [], []

            for i, (turns, winner) in enumerate(tqdm(test_dataset, desc = "Test Epoch %s" % epoch)):

#                 turns = data[0]  #inputs
#                 winner = data[1] #labels
                #sample = sample.view(-1, 784)

                logits = model(turns)
                pred = torch.argmax(logits)

                label = np.argmax(winner.numpy())
                
                #all_preds.append(int(pred.detach().cpu().numpy()))
                all_preds.append(pred.numpy())
                all_labels.append(label)
                
                if(pred == label):
                    acc += 1
                    
            acc = acc / (i + 1)
            
            print("Valid Accuracy %s" % acc)
                
        ##get metrics
        training_metrics = {}
        cf_matrix = sk_m.confusion_matrix(all_labels, all_preds)

        #epoch_accuracy = calculate_accuracy(np.asarray(all_preds), np.asarray(all_labels))

        cf_matrices.append(cf_matrix)
        print(f"confusion matrix appended. epoch {epoch}")
        model.train()
            
        training_metrics = {}
        training_metrics["labels"] = all_labels
        training_metrics["preds"] = all_preds
        training_metrics["mats"] = cf_matrices

    return training_loss, training_metrics

In [21]:
# Set hyperparameters
input_size = dict_size    # number of features in the input (one-hot encoding of moves)
hidden_size = 12  # number of hidden units in the RNN
output_size = dict_size   # number of output classes (1 for each player)
num_layers = 1

learning_rate = 0.01
num_epochs = 20

model = ChessRNN(input_size, hidden_size, num_layers, learning_rate, output_size)

#train, test = get_dataset()
# embed()

train_loss, train_metrics = train_rnn(model, input_seq, target_seq, num_epochs=num_epochs)

Train Epoch 0:   0%|                                                                                                                                                     | 0/3 [00:00<?, ?it/s]


RuntimeError: For unbatched 2-D input, hx should also be 2-D but got 3-D tensor

In [None]:
exp_name = "dataset_20"

results = {}
results["loss"] = train_loss
results["validation"] = train_metrics # 1 confusion matrix per validation run
    
path_save = f"/Users/andyvarner/Documents/NN_Spring2023/project_2/Results"

# if(os.path.join(path_save)):
#     print("creating folder")
#     create_folder(path_save)  

title = "%s.pkl" % (str(exp_name).zfill(3))
filename = os.path.join(path_save, title)
print(filename)

pickle.dump(results, open(filename, "wb"))

# torch.save(model.state_dict(), "kernel_size_1.pt")