In [1]:
f = open("./TinyShakespeare/input.txt")
text = f.read()
print(text[:100])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


In [2]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
vocab_size

65

In [3]:
char_to_idx = {ch:i for i, ch in enumerate(chars)}
idx_to_char = {i:ch for i, ch in enumerate(chars)}

def encode(text):
    return [char_to_idx[ch] for ch in text]

def decode(idx):
    return [idx_to_char[i] for i in idx]

In [4]:
import torch
data = torch.tensor(encode(text), dtype=torch.long)
data.shape

torch.Size([1115394])

In [5]:
n = int(0.9 * len(data))
train_data = data[:n]
test_data = data[n:]

In [6]:
sequence_length = 8
x = train_data[:sequence_length]
y = train_data[1:sequence_length + 1]
for t in range(sequence_length):
    context = x[:t + 1]
    target = y[t]
    print(context, target)
x.shape

tensor([18]) tensor(47)
tensor([18, 47]) tensor(56)
tensor([18, 47, 56]) tensor(57)
tensor([18, 47, 56, 57]) tensor(58)
tensor([18, 47, 56, 57, 58]) tensor(1)
tensor([18, 47, 56, 57, 58,  1]) tensor(15)
tensor([18, 47, 56, 57, 58,  1, 15]) tensor(47)
tensor([18, 47, 56, 57, 58,  1, 15, 47]) tensor(58)


torch.Size([8])

In [7]:
import torch
from torch.utils.data import Dataset
import random

class ContextTargetDataset(Dataset):
    def __init__(self, data, window_size):
        self.data = data
        self.window_size = window_size

    def __len__(self):
        return len(self.data) - self.window_size

    def __getitem__(self, idx):
        start_idx = random.randint(0, len(self.data) - self.window_size - 1)
        x = self.data[start_idx : start_idx + self.window_size]
        y = self.data[start_idx + 1 : start_idx + self.window_size + 1]
        
        return x, y

    def collate_fn(batch):
        xs, ys = zip(*batch)
        return torch.stack(xs), torch.stack(ys)



In [8]:
from torch.utils.data import DataLoader
train_dataset = ContextTargetDataset(train_data, window_size=8)
train_loader = DataLoader(train_dataset, batch_size=2)

test_dataset = ContextTargetDataset(test_data, window_size=8)
test_loader = DataLoader(test_dataset, batch_size=2)
for batch_x, batch_y in test_loader:
    print("X: ", batch_x)
    print("Y: ", batch_y)
    break

X:  tensor([[61, 47, 50, 50,  1, 40, 43,  1],
        [59, 58,  1, 58, 46, 56, 43, 43]])
Y:  tensor([[47, 50, 50,  1, 40, 43,  1, 58],
        [58,  1, 58, 46, 56, 43, 43,  1]])


In [9]:
embed_dim = 32
hidden_size = 64
output_size = vocab_size
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# RNN

In [10]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size):
        super(RNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        
        self.hidden_size = hidden_size
        self.W_xh = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.W_hh = nn.Parameter(torch.randn(hidden_size, hidden_size))
        self.b_h = nn.Parameter(torch.zeros(hidden_size))
        
        self.W_hy = nn.Parameter(torch.randn(output_size, hidden_size))
        self.b_y = nn.Parameter(torch.zeros(output_size))

    def step(self, x_t, h_t):
        h_t = torch.tanh(x_t @ self.W_xh.T + h_t @ self.W_hh.T + self.b_h)
        y_t = h_t @ self.W_hy.T + self.b_y
        return y_t, h_t

    def forward(self, x):
        batch_size, seq_len = x.shape
        x_embed = self.embedding(x)
        h_t = torch.zeros(batch_size, self.hidden_size).to(device)

        outputs = []
        
        for t in range(seq_len):
            x_t = x_embed[:,t, :]

            y_t, h_t = self.step(x_t, h_t)
            outputs.append(y_t)
            
        outputs = torch.stack(outputs, dim=1)
        return outputs

    def generate(self, start_token, sample_size = 100):
        self.eval()
    
        input_seq = torch.tensor([[char_to_idx[start_token]]], dtype=torch.long).to(device)
        generated = [start_token]
        
        h_t = torch.zeros(1, model.hidden_size).to(device)
    
        with torch.no_grad():
            for _ in range(sample_size):
                x_embed = model.embedding(input_seq[:, -1])
                y_t, h_t = self.step(x_embed, h_t)
                
                probs = torch.softmax(y_t, dim=-1)
                next_token_id = torch.multinomial(probs, num_samples=1).item()
                
                next_token = idx_to_char[next_token_id]
                generated.append(next_token)
                
                input_seq = torch.cat([input_seq, torch.tensor([[next_token_id]]).to(device)], dim=1)
        
        return ''.join(generated)

In [11]:
model = RNN(vocab_size, embed_dim, hidden_size, output_size)

In [12]:
from tqdm.notebook import tqdm
train_loader = DataLoader(train_dataset, batch_size=32)
epochs = 5
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()

model.to(device)

for epoch in range(epochs):
    model.train()
    total_loss = 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")

    for  i, (batch_x, batch_y) in enumerate(loop, 1):
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        outputs = model(batch_x)
        loss = criterion(outputs.view(-1, vocab_size), batch_y.view(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        avg_loss = total_loss / i
        loop.set_postfix(batch_loss=loss.item(), avg_loss=avg_loss)

Epoch 1/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/31371 [00:00<?, ?it/s]

In [13]:
print(model.generate(start_token='\n', sample_size = 500))


The led so fdition, guither.' I save.
O, a more in it griter, tir, thy gatenty houset
Bramen purgel I will you gore ats in hork,
Thou fist, proo if!
What on they -

You foltor on to the folt pringhall's poonery
selfollo, the prace,
To you tie thou should gry a loak'd we it whfances do boman's nation
Resing in be my gracuetes-liveich rik too cum mailt.

TIO:
He-losectirs the Mowisback it jo: thou!
And stare no muct our papear my made, poor their Marcan mutore bUtreet.

Sear.

BRUTUS:
As we shall 


# Torch RNN

In [14]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm

class TorchRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size):
        super(TorchRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.rnn = nn.RNN(embed_dim, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x_embed = self.embedding(x)
        out, _ = self.rnn(x_embed)
        logits = self.fc(out)
        return logits

    def generate(self, start_token, sample_size = 100):
        self.eval()
        generated = [start_token]
        
        input_token = torch.tensor([[char_to_idx.get(start_token)]]).to(device)
        h_t = None
        with torch.no_grad():
            for _ in range(sample_size):
                logits = self.forward(input_token)
                logits = logits[:, -1, :] 
                probs = torch.softmax(logits, dim=-1)
                
                next_token_id = torch.multinomial(probs, num_samples=1).item()
                next_token = idx_to_char[next_token_id]
                generated.append(next_token)
                
                input_token = torch.tensor([[next_token_id]]).to(device)
    
        return ''.join(generated)


In [15]:
torch_model = TorchRNN(vocab_size, embed_dim, hidden_size, output_size).to(device)

In [16]:
torch_optimizer = torch.optim.AdamW(torch_model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

for epoch in range(5):
    torch_model.train()
    total_loss = 0
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/5")

    for i, (batch_x, batch_y) in enumerate(loop, 1):
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        outputs = torch_model(batch_x)

        loss = criterion(outputs.view(-1, vocab_size), batch_y.view(-1))

        torch_optimizer.zero_grad()
        loss.backward()
        torch_optimizer.step()

        total_loss += loss.item()
        avg_loss = total_loss / i
        loop.set_postfix(batch_loss=loss.item(), avg_loss=avg_loss)

Epoch 1/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/31371 [00:00<?, ?it/s]

In [17]:
output = torch_model.generate(start_token = '\n', sample_size = 500)
print(output)


Thoury?
Theageser the wed ameayoorrd wo'sthe oth wherepe esthothand t. olt tr yoppplames l, isea ik thig sthepond:
Ar gioug ugeear t prbarel,-w tht thereas ishararouthe rd oou at fe paindr y bey ghoourin'lor llfint to,

MI t l l
HAce peme blswout loothe alt he shir Sond y

LAUSent. bol.

COPERIfe t
GLAMyome lfouererorowinsthe.
GUSe hasthend ighally ot mowo, woun ourd te wid ong:
Thit s sakeds hot mer s l lk Gofthmorifistlleanols h bed lere domy prr!

Torrorway iend plo thin.
Bucemu.
Wieeall heth


# LSTM

In [18]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)

        self.hidden_size = hidden_size
        self.W_ii = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.W_hi = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.W_if = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.W_hf = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.W_ig = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.W_hg = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.W_io = nn.Parameter(torch.randn(hidden_size, embed_dim))
        self.W_ho = nn.Parameter(torch.randn(hidden_size, hidden_size))

        self.b_ii = nn.Parameter(torch.zeros(hidden_size))
        self.b_hi = nn.Parameter(torch.zeros(hidden_size))
        self.b_if = nn.Parameter(torch.zeros(hidden_size))
        self.b_hf = nn.Parameter(torch.zeros(hidden_size))
        self.b_ig = nn.Parameter(torch.zeros(hidden_size))
        self.b_hg = nn.Parameter(torch.zeros(hidden_size))
        self.b_io = nn.Parameter(torch.zeros(hidden_size))
        self.b_ho = nn.Parameter(torch.zeros(hidden_size))

        self.fc_out = nn.Linear(hidden_size, output_size)

    def step(self, x_t, h_t, c_t):
        i_t = torch.sigmoid(x_t @ self.W_ii.T + self.b_ii + h_t @ self.W_hi.T + self.b_hi)
        f_t = torch.sigmoid(x_t @ self.W_if.T + self.b_if + h_t @ self.W_hf.T + self.b_hf)
        g_t = torch.tanh(x_t @ self.W_ig.T + self.b_ig + h_t @ self.W_hg.T + self.b_hg)
        o_t = torch.sigmoid(x_t @ self.W_io.T + self.b_io + h_t @ self.W_ho.T + self.b_ho)

        c_t = f_t * c_t + i_t * g_t
        h_t = o_t * torch.tanh(c_t)

        return h_t, c_t
    
    def forward(self, x):
        batch_size, seq_len = x.shape
        x_embed = self.embedding(x)
        h_t = torch.zeros(batch_size, self.hidden_size).to(device)
        c_t = torch.zeros(batch_size, self.hidden_size).to(device)
        
        outputs = []
        for t in range(seq_len):
            x_t = x_embed[:,t, :]
            h_t, c_t = self.step(x_t, h_t, c_t)
            
            outputs.append(h_t)
            
        outputs = torch.stack(outputs, dim=1)
        outputs = self.fc_out(outputs)
        return outputs

    def generate(self, start_token, sample_size=100):
        self.eval()
        input_seq = torch.tensor([[char_to_idx[start_token]]], dtype=torch.long).to(device)
        generated = [start_token]

        h_t = torch.zeros(1, self.hidden_size).to(device)
        c_t = torch.zeros(1, self.hidden_size).to(device)

        with torch.no_grad():
            for _ in range(sample_size):
                x_embed = self.embedding(input_seq[:, -1])
                h_t, c_t = self.step(x_embed, h_t, c_t)
                y_t = self.fc_out(h_t)
                probs = torch.softmax(y_t, dim=-1)
                next_token_id = torch.multinomial(probs, num_samples=1).item()
                next_token = idx_to_char[next_token_id]
                generated.append(next_token)

                input_seq = torch.cat([input_seq, torch.tensor([[next_token_id]]).to(device)], dim=1)

        return ''.join(generated)

In [19]:
lstm_model = LSTM(vocab_size, embed_dim, hidden_size, output_size)

In [20]:
from tqdm.notebook import tqdm
train_loader = DataLoader(train_dataset, batch_size=32)
epochs = 5
optimizer = torch.optim.AdamW(lstm_model.parameters(), lr=1e-3)
criterion = torch.nn.CrossEntropyLoss()

lstm_model.to(device)

for epoch in range(epochs):
    lstm_model.train()
    total_loss = 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")

    for  i, (batch_x, batch_y) in enumerate(loop, 1):
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        outputs = lstm_model(batch_x)
        loss = criterion(outputs.view(-1, vocab_size), batch_y.view(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        avg_loss = total_loss / i
        loop.set_postfix(batch_loss=loss.item(), avg_loss=avg_loss)

Epoch 1/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/31371 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/31371 [00:00<?, ?it/s]

In [21]:
output = lstm_model.generate("\n", 500)
print(output)


No, pins, his dead?

PERIVA:
So shalt run is full the come to disnorn sorry. Come,
For no bardon thou'resited, where the very
Tough demiest o', and that Rebet, me now woman granks at you fraizens.
NBEONTIFF YORK:
I come
'Tis confest rever me my grach ourou a patch
Forself it refer her his sine!
I truly here I,
And as to the of it habt of anse not only.

LEONTES:
On I'll slails: theraminess ob'd frame such live sworn bowent:
I save to hand, Pace, of spire etturs:
Granly
givn to in old may hence a
