RNN

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir("/content/drive/Othercomputers/My Laptop/nlp_scratch")

In [1]:
import torch
from torch import nn
import torch.functional as F
import numpy as np

In [2]:
with open("data/names.txt") as f:
    names = f.readlines()

In [3]:
chars = sorted(set("".join(names)))
vocab_size=len(chars)

In [4]:
char_to_idx = {ch:i for i, ch in enumerate(chars)}
idx_to_char = {i:ch for i,ch in enumerate(chars)}

In [5]:
class NameDataset(torch.utils.data.Dataset):

    def __init__(self,names):
        super().__init__()
        self.names=names

    def __getitem__(self, index):
        return self.names[index]
    
    def __len__(self):
        return len(self.names)

In [6]:
# Dataloader
dataloader = torch.utils.data.DataLoader(dataset=NameDataset(names=names),batch_size=64,shuffle=True)

In [7]:
# multi-layer RNN
from models.attention.rnn import RNNv2

In [8]:
rnn = RNNv2(vocab_size,8,num_layers=4)
learning_rate = 0.001
criterion = nn.CrossEntropyLoss(reduction="mean")
optim = torch.optim.Adam(rnn.parameters(),learning_rate)
onehot_tensor = torch.eye(27)
softmax= nn.Softmax(dim=-1)

In [9]:
# training
torch.autograd.set_detect_anomaly(True)
low_loss = 0
n_epochs = 10
for j in range(n_epochs):
    total_loss= 0
    for name in dataloader:
        input_idx = torch.tensor([char_to_idx[ch] for ch in name[0][:-1]])
        target_idx = torch.tensor([char_to_idx[ch] for ch in name[0][1:]])
        input_onehot = onehot_tensor[input_idx]
        rnn.zero_grad()
        logits,_ = rnn(input_onehot)
        # softmax_output = softmax(output)
        loss = criterion(logits,target_idx)
        loss.backward()
        optim.step()
        total_loss+=loss
          # if n_epochs % print_every == 0:
          #     print(f"Epoch {n_epochs}, Loss: {loss.item() / input_idx.size(0):.4f}")
    print(f"Epoch {j+1}, Loss: {total_loss.item() / len(dataloader):.4f}")

tensor([-0.8777, -0.3559,  0.3949,  0.7247, -0.9587,  0.2751, -0.5884, -0.5607],
       grad_fn=<TanhBackward0>)
tensor([ 0.7408,  0.3656, -0.6715, -0.9002, -0.9963,  0.3132,  0.9787, -0.9964],
       grad_fn=<TanhBackward0>)
tensor([-0.9911, -0.9999, -0.9826, -0.2418,  0.2683, -0.2813,  0.6620, -0.8598],
       grad_fn=<TanhBackward0>)
tensor([-0.9070,  0.9660, -0.8133, -0.1285,  0.9724,  0.5443, -0.6722,  0.9852],
       grad_fn=<TanhBackward0>)
[tensor([-0.8777, -0.3559,  0.3949,  0.7247, -0.9587,  0.2751, -0.5884, -0.5607],
       grad_fn=<TanhBackward0>), tensor([ 0.7408,  0.3656, -0.6715, -0.9002, -0.9963,  0.3132,  0.9787, -0.9964],
       grad_fn=<TanhBackward0>), tensor([-0.9911, -0.9999, -0.9826, -0.2418,  0.2683, -0.2813,  0.6620, -0.8598],
       grad_fn=<TanhBackward0>), tensor([-0.9070,  0.9660, -0.8133, -0.1285,  0.9724,  0.5443, -0.6722,  0.9852],
       grad_fn=<TanhBackward0>)]
tensor([-0.7776,  0.7231,  0.8350,  0.8251, -1.0000,  0.7421,  0.9888,  0.9996],
       gra

KeyboardInterrupt: 

In [None]:
# save the weights
torch.save(rnn.state_dict(),"rnn_ln_weights.pth")

# load weights
rnn.load_state_dict(torch.load("rnn_ln_weights.pth"))

In [None]:
# sampling
start = np.random.choice(chars)
print(start)
with torch.no_grad():
    input = onehot_tensor[char_to_idx[start]]
    output_name = start
    for _ in range(20):
        output = rnn(input)
        idx = torch.multinomial(output,1)
        idx = idx.item()
        if idx_to_char[idx]=="\n":
            break
        else:
            letter = idx_to_char[idx]
            output_name+=letter
            input = onehot_tensor[char_to_idx[letter]]


### LSTM

### Seq2Seq

In [None]:
import torch
