In [206]:
import torch
import torch.nn as nn
import re
import torch.nn.functional as F
import numpy as np

with open("./data.txt", "r") as f:
    data = f.read().split("\n")

    data = [line.lower() for line in data]
    punctuations = re.compile(r"[^A-Za-z\s]+")
    data = [re.sub(punctuations, '', line) for line in data]
    data = [line for line in data if line != ""]
    print("Number of sentences:", len(data))


words = {word for sentence in data for word in sentence.split()}
# words = {'it', 'weddingring', 'room', 'spot'}
num_of_words = len(words)
print("Number of words:", num_of_words)
word2index = {word: i for i, word in enumerate(words)}
index2word = {i: word for i, word in enumerate(words)}

chars = set(char for line in data for char in line)
num_of_chars = len(chars)
print("Number of chars", num_of_chars)
batch_size = 1

def word2tensor(word):
    # That extra 1 dimension is because PyTorch assumes everything is in batches - we’re just using a batch size of 1 here.
    # TODO: Check if we can remove the extra dimension or embed more words in the same tensor
    tensor = torch.zeros(batch_size, 1, num_of_words)
    tensor[0][0][word2index[word]] = 1
    return tensor

def tensor2word(tensor):
    return index2word[tensor.argmax().item()]

# def categoryFromOutput(output):
#     _, top_i = output.topk(1)
#     category_i = top_i[0][0].item()
#     return num_of_words[category_i], category_i


Number of sentences: 9633
Number of words: 8600
Number of chars 27


In [211]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size) -> None:
        super().__init__()

        self.hidden_size = hidden_size
        
        self.input2hidden = nn.Linear(input_size, hidden_size)
        self.hidden2hidden = nn.Linear(hidden_size, hidden_size)
        self.hidden2output = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        print("Start", np.nonzero(input.detach().numpy())[-1], hidden)
        input = self.input2hidden(input)
        print("After first layer", np.nonzero(input.detach().numpy())[-1], hidden)
        print("1) After input to hidden", input.shape, hidden.shape)

        hidden = self.hidden2hidden(hidden)
        print("2) After hidden to hidden", hidden.shape)

        hidden = F.tanh(input + hidden)
        print("3) After tanh", input.shape, hidden.shape)

        output = self.hidden2output(hidden)
        print("4) After hidden to output", output.shape)

        output = self.softmax(output)
        print("5) After softmax", output.shape)
        return output, hidden

    def init_hidden(self, batch_size=batch_size):
        return torch.zeros(batch_size, self.hidden_size)


# Creating data for training 
HIDDEN_LAYER_SIZE = 2
CONTEXT_WINDOW = 3
data_context = []

for line in data:
    line = line.split()
    n = len(line)
    for i in range(n - CONTEXT_WINDOW):
        data_context.append((line[i: i + CONTEXT_WINDOW], line[i + CONTEXT_WINDOW]))


In [212]:

rnn = RNN(num_of_words, HIDDEN_LAYER_SIZE, num_of_words)
lr = 0.01
criterion = nn.NLLLoss()
optimizer = torch.optim.SGD(rnn.parameters(), lr=lr)
epochs = 1

data_context = data_context[:1]
for epoch in range(epochs):
    total_loss = 0

    for context, target in data_context: 
        hidden = rnn.init_hidden()
        rnn.zero_grad()
        # print(context, target)
        for word in context:
            output, hidden = rnn(word2tensor(word), hidden)
        
        # print("Predicted:", tensor2word(output), ", actual:", target, ", context:", context)
        # Get rid of the batch size dimension
        output = output.squeeze(0)
        target_tensor = torch.tensor([word2index[target]], dtype=torch.long)
        loss = criterion(output, target_tensor)

        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        # Dump output and target tensor to a file output.txt
        with open("output.txt", "a") as f:
            f.write("Predicted tensor: ")
            output_array = output.detach().numpy()
            # Find indices where values are not 0
            nonzero_indices = np.nonzero(output_array)[-1]
            f.write(str(nonzero_indices) + "\n")

            f.write("Target tensor: " + str(target_tensor.detach().numpy()) + " " + str(target) + "\n")

            f.write("Loss: " + str(loss.item()) + "\n\n")

    print(f"Epoch {epoch + 1}/{epochs} Loss: {total_loss}")

Start [4390] tensor([[0., 0.]])
After first layer [0 1] tensor([[0., 0.]])
1) After input to hidden torch.Size([1, 1, 2]) torch.Size([1, 2])
2) After hidden to hidden torch.Size([1, 2])
3) After tanh torch.Size([1, 1, 2]) torch.Size([1, 1, 2])
4) After hidden to output torch.Size([1, 1, 8600])
5) After softmax torch.Size([1, 1, 8600])
Start [4993] tensor([[[0.4916, 0.1534]]], grad_fn=<TanhBackward0>)
After first layer [0 1] tensor([[[0.4916, 0.1534]]], grad_fn=<TanhBackward0>)
1) After input to hidden torch.Size([1, 1, 2]) torch.Size([1, 1, 2])
2) After hidden to hidden torch.Size([1, 1, 2])
3) After tanh torch.Size([1, 1, 2]) torch.Size([1, 1, 2])
4) After hidden to output torch.Size([1, 1, 8600])
5) After softmax torch.Size([1, 1, 8600])
Start [6012] tensor([[[0.2191, 0.1146]]], grad_fn=<TanhBackward0>)
After first layer [0 1] tensor([[[0.2191, 0.1146]]], grad_fn=<TanhBackward0>)
1) After input to hidden torch.Size([1, 1, 2]) torch.Size([1, 1, 2])
2) After hidden to hidden torch.Size

In [193]:
m = nn.LogSoftmax(dim=1)
loss = nn.NLLLoss()
# input is of size N x C = 3 x 5
input = torch.zeros(1, 5, requires_grad=True)
print(input)
input = m(input)
print(input)
print(input.sum())
# each element in target has to have 0 <= value < C
target = torch.tensor([2])
print(target)
print(input)
output = loss(input, target)
output.backward()
print(output)

sample = torch.randn(1, 1, 5)
print(sample)
# Print non-zero indices

tensor([[0., 0., 0., 0., 0.]], requires_grad=True)
tensor([[-1.6094, -1.6094, -1.6094, -1.6094, -1.6094]],
       grad_fn=<LogSoftmaxBackward0>)
tensor(-8.0472, grad_fn=<SumBackward0>)
tensor([2])
tensor([[-1.6094, -1.6094, -1.6094, -1.6094, -1.6094]],
       grad_fn=<LogSoftmaxBackward0>)
tensor(1.6094, grad_fn=<NllLossBackward0>)
tensor([[[-1.1418,  0.4812,  0.1088,  0.0671,  0.2627]]])
[0 1 2 3 4]
3
