In [170]:
import torch
import torch.nn as nn
import re
import torch.nn.functional as F
import numpy as np

# For M1 macs
# TODO: Why is it 10x slower than cpu?
# if torch.backends.mps.is_available():
#     print("Using M1 GPU")
#     torch.set_default_device('mps')
#     mps_device = torch.device("mps")

torch.set_default_device('cpu')
with open("./data.txt", "r") as f:
    data = f.read().split("\n")

    data = [line.lower() for line in data]
    punctuations = re.compile(r"[^A-Za-z\s]+")
    data = [re.sub(punctuations, '', line) for line in data]
    data = [line for line in data if line != ""]
    print("Number of sentences:", len(data))


words = {word for sentence in data for word in sentence.split()}
# words = {'it', 'weddingring', 'room', 'spot'}
num_of_words = len(words)
print("Number of words:", num_of_words)
word2index = {word: i for i, word in enumerate(words)}
index2word = {i: word for i, word in enumerate(words)}

chars = set(char for line in data for char in line)
num_of_chars = len(chars)
print("Number of chars", num_of_chars)
batch_size = 32

def word2tensor(word):
    # That extra 1 dimension is because PyTorch assumes everything is in batches - we’re just using a batch size of 1 here.
    # TODO: Check if we can remove the extra dimension or embed more words in the same tensor
    tensor = torch.zeros(batch_size, 1, num_of_words)
    tensor[0][0][word2index[word]] = 1
    return tensor

def tensor2word(tensor):
    return index2word[tensor.argmax().item()]

# Creating data for training 
HIDDEN_LAYER_SIZE = 2
CONTEXT_WINDOW = 3
data_context = []

for line in data:
    line = line.split()
    n = len(line)
    for i in range(n - CONTEXT_WINDOW):
        data_context.append((line[i: i + CONTEXT_WINDOW], line[i + CONTEXT_WINDOW]))

Number of sentences: 9633
Number of words: 8600
Number of chars 27


In [168]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size) -> None:
        super().__init__()

        self.hidden_size = hidden_size
        
        self.input2hidden = nn.Linear(input_size, hidden_size)
        self.hidden2hidden = nn.Linear(hidden_size, hidden_size)
        self.hidden2output = nn.Linear(hidden_size, output_size)
        self.log_softmax = nn.LogSoftmax(dim=2)

    def forward(self, input, hidden):
        # print("Start", np.nonzero(input.detach().numpy())[-1], hidden)
        input = self.input2hidden(input)
        # print("After first layer", np.nonzero(input.detach().numpy())[-1], hidden)
        # print("1) After input to hidden", input.shape, hidden.shape)

        hidden = self.hidden2hidden(hidden)
        # print("2) After hidden to hidden", hidden.shape)

        hidden = F.tanh(input + hidden)
        # print("3) After tanh", input.shape, hidden.shape)

        output = self.hidden2output(hidden)
        # print("4) After hidden to output", output.shape)

        output = self.log_softmax(output)
        # print("5) After softmax", output.shape)
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(batch_size, self.hidden_size)


In [162]:
rnn = RNN(num_of_words, HIDDEN_LAYER_SIZE, num_of_words)
lr = 0.01
criterion = nn.NLLLoss()
optimizer = torch.optim.SGD(rnn.parameters(), lr=lr)
epochs = 10

for epoch in range(epochs):
    total_loss = 0

    for context, target in data_context: 
        hidden = rnn.init_hidden()
        rnn.zero_grad()
        for word in context:
            output, hidden = rnn(word2tensor(word), hidden)
        
        # print("Predicted:", tensor2word(output), ", actual:", target, ", context:", context)
        # Get rid of the batch size dimension
        output = output.squeeze(0)
        target_tensor = torch.tensor([word2index[target]], dtype=torch.long)
        # Why the fuck do we need to convert it to LongTensor?
        loss = criterion(output, target_tensor)

        loss.backward()
        # print(loss.item())
        optimizer.step()
        total_loss += loss.item()

        # For debugging purposes: Dump output and target tensor to file output.txt

        # import sys
        # np.set_printoptions(threshold=sys.maxsize)
        # with open("output.txt", "a") as f:
        #     f.write("Predicted tensor: ")
        #     output_array = output.detach().numpy()
        #     # Find indices where values are not 0
        #     # nonzero_indices = np.nonzero(output_array)[-1]
        #     print("Output array", output_array)
        #     f.write(str(output_array) + "\n")

        #     print("Target tensor", target_tensor.detach().numpy())
        #     f.write("Target tensor: " + str(target_tensor.detach().numpy()) + " " + str(target) + "\n")

        #     print("Loss:", loss.item())
        #     f.write("Loss: " + str(loss.item()) + "\n\n")

    print(f"Epoch {epoch + 1}/{epochs} Loss: {total_loss}")

Epoch 1/10 Loss: 564788.5739929676
Epoch 2/10 Loss: 521980.39820051193
Epoch 3/10 Loss: 511599.02045464516
Epoch 4/10 Loss: 504765.9905807972
Epoch 5/10 Loss: 499933.83950662613
Epoch 6/10 Loss: 496379.2070118189
Epoch 7/10 Loss: 493662.4143565893
Epoch 8/10 Loss: 491526.34502995014
Epoch 9/10 Loss: 489797.94599330425
Epoch 10/10 Loss: 488364.6461007595


In [171]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, data_context):
        self.data = data_context

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Assuming data_context is a list of tuples (context, target)
dataset = CustomDataset(data_context)
batch_size = 32  # Specify your desired batch size
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

rnn2 = RNN(num_of_words, HIDDEN_LAYER_SIZE, num_of_words)
lr = 0.02
criterion = nn.NLLLoss()
optimizer = torch.optim.SGD(rnn2.parameters(), lr=lr)
epochs = 100

for epoch in range(epochs):
    total_loss = 0

    for batch_idx, (contexts, targets) in enumerate(dataloader):
        optimizer.zero_grad()
        batch_loss = 0

        for context, target in zip(contexts, targets):
            hidden = rnn2.init_hidden(batch_size=batch_size)
            for word in context:
                output, hidden = rnn2(word2tensor(word), hidden)

            output = output.squeeze(0)
            target_tensor = torch.tensor([word2index[target]], dtype=torch.long)
            loss = criterion(output, target_tensor)
            batch_loss += loss

        batch_loss.backward()
        optimizer.step()
        total_loss += batch_loss.item()

    print(f"Epoch {epoch + 1}/{epochs} Loss: {total_loss}")


ValueError: Expected input batch_size (32) to match target batch_size (1).

In [None]:
print(words)
hello_encodded = word2tensor("junior")
output, hidden = rnn(hello_encodded, hidden)



In [None]:
torch.set_default_device('mps')
a = torch.tensor([1, 2, 3, 4, 5])
print(a.device)

mps:0


In [None]:
b = torch.tensor([1, 2, 3, 4, 5], device='cpu')
print(b.device)

cpu
