RNN

In [1]:
import torch
from torch import nn
import torch.functional as F
import numpy as np

In [27]:
class RNN(nn.Module):

    def __init__(self,embed_dim,hidden_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.W_xh= nn.Parameter(torch.randn(hidden_dim,embed_dim))
        self.W_hh = nn.Parameter(torch.randn(hidden_dim,hidden_dim))
        self.W_hy = nn.Parameter(torch.randn(embed_dim,hidden_dim))
        self.B_h = nn.Parameter(torch.randn(hidden_dim))
        self.B_y = nn.Parameter(torch.randn(embed_dim))
        self.h = None
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax(dim=-1)
        self.init_hidden()
    def forward(self,x):
        self.h= self.tanh((self.W_hh@self.h) + (self.W_xh@x) + self.B_h)
        return self.softmax((self.W_hy@self.h)+self.B_y)
    
    def init_hidden(self):
        device = torch.device("cpu")
        self.h = torch.zeros((self.hidden_dim),requires_grad=False).to(device)

In [28]:
with open("data/names.txt") as f:
    names = f.readlines()

In [29]:
chars = sorted(set("".join(names)))
vocab_size=len(chars)

In [30]:
char_to_idx = {ch:i for i, ch in enumerate(chars)}
idx_to_char = {i:ch for i,ch in enumerate(chars)}

In [31]:
device = torch.device("cpu")

In [None]:
# initialize
n_epochs = 20
print_every = 1000
learning_rate = 0.99
rnn = RNN(vocab_size,256).to(device)
onehot_tensor = torch.eye(vocab_size)
criterion = nn.CrossEntropyLoss(reduction="mean")
optim = torch.optim.Adam(rnn.parameters(),learning_rate)
onehot_tensor = torch.eye(vocab_size) # look up table based on index of char

In [62]:
# Dataset
class NameDataset(torch.utils.data.Dataset):

    def __init__(self,names: list[str]):
        super().__init__()
        self.names = names

    def __len__(self):
        return len(self.names)
    
    def __getitem__(self, index):
        return self.names[index]

In [63]:
# Dataloader
dataloader = torch.utils.data.DataLoader(dataset=NameDataset(names=names),shuffle=True)

In [64]:
# training
for j in range(n_epochs):
    total_loss= 0
    for name in dataloader:
        input_idx = torch.tensor([char_to_idx[ch] for ch in name[0][:-1]])
        target_idx = torch.tensor([char_to_idx[ch] for ch in name[0][1:]]).to(device)
        input_onehot = onehot_tensor[input_idx].to(device)
        loss =0
        rnn.zero_grad()
        rnn.init_hidden()
        for i in range(len(name[0])-1):
            output = rnn(input_onehot[i])
            loss += criterion(output,target_idx[i])
        loss/=len(name[0])-1
        loss.backward()
        optim.step()
        total_loss+=loss
        # if n_epochs % print_every == 0:
        #     print(f"Epoch {n_epochs}, Loss: {loss.item() / input_idx.size(0):.4f}")
    print(f"Epoch {j+1}, Loss: {total_loss.item() / len(dataloader):.4f}")

Epoch 1, Loss: 3.2215
Epoch 2, Loss: 3.2330
Epoch 3, Loss: 3.3193
Epoch 4, Loss: 3.3091
Epoch 5, Loss: 3.3104


KeyboardInterrupt: 

In [None]:




# sampling
start = np.random.choice(chars)
print(start)
with torch.no_grad():
    input = onehot_tensor[char_to_idx[start]]
    rnn.init_hidden()
    output_name = start
    for _ in range(20):
        output = rnn(input)
        idx = torch.multinomial(output,1)
        idx = idx.item()
        if idx_to_char[idx]=="\n":
            break
        else:
            letter = idx_to_char[idx] 
            output_name+=letter
            input = onehot_tensor[char_to_idx[letter]]


j


In [16]:
print(output_name)

jscqkzfgzykqfofakcyzu


### Seq2Seq

In [None]:
import torch
