# Starting code

In [2]:
import numpy as np
import lightning as L
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader

# Data

In [3]:
data = open("input.txt", "r", encoding="UTF-8").read()
data = data.lower()

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print("data has %d characters, %d unique." % (data_size, vocab_size))

char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}
str_to_idx = lambda x: list(map(lambda xx: char_to_idx[xx], x))
idx_to_str = lambda x: "".join(list(map(lambda xx: idx_to_char[xx], x)))
tensor_to_str = lambda x: "".join(list(map(lambda xx: idx_to_char[xx.item()], x)))


def str_to_tensor(x, device="cpu"):
    return torch.tensor(list(map(lambda xx: char_to_idx[xx], x)), device=device)

data has 1115394 characters, 39 unique.


In [4]:
class TextSampler(Dataset):
    def __init__(self, data, seq_size):
        self.seq_size = seq_size + 1
        self.data = data

    def __len__(self):
        return len(self.data) // self.seq_size

    def __getitem__(self, idx):
        gen = str_to_idx(
            self.data[idx * self.seq_size : idx * self.seq_size + self.seq_size]
        )
        x = np.array(gen[:-1], dtype=int)
        y = np.array(gen[1:], dtype=int)

        return x, y

# Vanilla RNN layer

In [5]:
class VanillaRNN(L.LightningModule):
    def __init__(self, hidden_size, vocab_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.Wxh = nn.Parameter(
            torch.normal(0, 1, size=(hidden_size, vocab_size)) * 0.01
        )
        self.Whh = nn.Parameter(
            torch.normal(0, 1, size=(hidden_size, hidden_size)) * 0.01
        )
        self.bh = nn.Parameter(torch.zeros(size=(hidden_size, 1), dtype=torch.float32))

    def forward(self, x, h_prev=None):
        seq_length, _ = x.shape
        x = torch.unsqueeze(x, dim=-1)

        hiddens = torch.zeros(
            size=(seq_length, self.hidden_size, 1),
            dtype=torch.float32,
            device=self.device,
        )

        for t in range(seq_length):
            h_prev = nn.LeakyReLU()(self.Wxh @ x[t] + self.Whh @ h_prev + self.bh)
            hiddens[t] = h_prev

        return hiddens, hiddens[-1]

# LSTM layer

In [6]:
class LSTM(L.LightningModule):
    def __init__(self, hidden_size, vocab_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.U = nn.ParameterList(
            [
                torch.normal(0, 1, size=(hidden_size, vocab_size)) * 0.01
                for i in range(4)
            ]
        )
        self.W = nn.ParameterList(
            [
                torch.normal(0, 1, size=(hidden_size, hidden_size)) * 0.01
                for i in range(4)
            ]
        )
        self.b = nn.ParameterList(
            [torch.zeros(size=(hidden_size, 1), dtype=torch.float32) for i in range(4)]
        )

    def forward(self, x, state):
        Wi, Wf, Wo, Wc = self.W
        Ui, Uf, Uo, Uc = self.U
        bi, bf, bo, bc = self.b

        seq_length, _ = x.shape
        x = torch.unsqueeze(x, dim=-1)

        hiddens = torch.zeros(
            size=(seq_length, self.hidden_size, 1),
            dtype=torch.float32,
            device=self.device,
        )
        cells = torch.zeros(
            size=(seq_length, self.hidden_size, 1),
            dtype=torch.float32,
            device=self.device,
        )

        h_prev, c_prev = state

        for t in range(seq_length):
            F = nn.Sigmoid()(Uf @ x[t] + Wf @ h_prev + bf)
            I = nn.Sigmoid()(Ui @ x[t] + Wi @ h_prev + bi)
            O = nn.Sigmoid()(Uo @ x[t] + Wo @ h_prev + bo)
            C = F * c_prev + I * nn.Tanh()(Uc @ x[t] + Wc @ h_prev + bc)
            H = O * nn.Tanh()(C)
            hiddens[t], cells[t] = H, C
            h_prev, c_prev = H, C

        return hiddens, torch.stack([hiddens[-1], cells[-1]])

# Generator Module

In [7]:
class ShakespheareGenerator(L.LightningModule):
    def __init__(self, recurrent_layer, hidden_size, vocab_size):
        super().__init__()
        self.train_loss = []

        self.hidden_size = hidden_size
        self.vocab_size = vocab_size

        self.rnn = recurrent_layer(hidden_size, vocab_size)
        self.dense = nn.Linear(hidden_size, vocab_size, bias=True)

        self.hidden_state = 0
        self.reset_hidden()

    def forward(self, x, state):
        hiddens, state = self.rnn(x, state)
        out = self.dense(hiddens.squeeze(dim=-1))

        return out, state

    def training_step(self, batch, batch_idx):
        x, y = batch
        x, y = x[0], y[0]  # remove batch
        x = F.one_hot(x, self.vocab_size).float()
        out, h_prev = self.forward(x, self.hidden_state.clone().detach())
        self.hidden_state = h_prev.clone().detach()
        loss = nn.CrossEntropyLoss()(out, y)

        self.train_loss.append(loss.item())
        return loss

    def reset_hidden(self):
        if type(self.rnn) == VanillaRNN:
            self.hidden_state = torch.zeros(
                size=(self.hidden_size, 1), dtype=torch.float32, device=self.device
            )
        elif type(self.rnn) == LSTM:
            self.hidden_state = torch.stack(
                [
                    torch.zeros(
                        size=(self.hidden_size, 1),
                        dtype=torch.float32,
                        device=self.device,
                    ),
                    torch.zeros(
                        size=(self.hidden_size, 1),
                        dtype=torch.float32,
                        device=self.device,
                    ),
                ]
            )

    def on_train_epoch_start(self):
        self.reset_hidden()

    def on_train_epoch_end(self):
        avg_train_acc = sum(self.train_loss) / len(self.train_loss)
        self.train_loss.clear()
        self.reset_hidden()
        self.print(f"Epoch {self.current_epoch} Training Loss: {avg_train_acc:.5f}")
        print("------------------------------------")
        with torch.no_grad():
            print(self.generate("duke vincentio:"))
        print("------------------------------------")

    def generate(self, start="d", max_len=100):
        ret = start

        self.reset_hidden()
        last_hidden = self.hidden_state

        # accelerate the generation with starting sequence
        input = F.one_hot(
            str_to_tensor(start, device=self.device), self.vocab_size
        ).float()
        out, last_hidden = self.forward(input, last_hidden)
        out = torch.softmax(out, dim=-1)[-1]
        next = torch.multinomial(out, num_samples=1).squeeze()
        input = F.one_hot(next, vocab_size).float()
        ret = ret + idx_to_char[next.item()]

        # then generate the rest 1 char by 1 char using previous hiddens
        for t in range(max_len):
            input = F.one_hot(
                str_to_tensor(ret[-1], device=self.device), self.vocab_size
            ).float()
            out, last_hidden = self.forward(input, last_hidden)
            out = torch.softmax(out, dim=-1)
            next = torch.multinomial(out, num_samples=1).squeeze()
            input = F.one_hot(next, vocab_size).float()
            ret = ret + idx_to_char[next.item()]

        return ret

    def configure_optimizers(self):
        opt = torch.optim.Adam(self.parameters(), lr=1e-3)
        return opt

# Vanilla RNN training

In [8]:
hidden_size = 128
seq_size = 40

model = ShakespheareGenerator(VanillaRNN, hidden_size, vocab_size)

train_dataset = TextSampler(data, seq_size=seq_size)
train_loader = DataLoader(
    train_dataset, batch_size=1, shuffle=False, num_workers=2, pin_memory=True
)

trainer = L.Trainer(max_epochs=25, accelerator="gpu")
trainer.fit(model, train_loader)

with open("rnn_output.txt", "w", encoding="UTF-8") as f:
    print("---------------------------------------------", file=f)
    print(model.generate(start="duke vincentio:", max_len=1000), file=f)
    print("---------------------------------------------", file=f)
    print(model.generate(start="romeo:", max_len=1000), file=f)
    print("---------------------------------------------", file=f)
    print(model.generate(start="juliet:", max_len=1000), file=f)
    print("---------------------------------------------", file=f)
    print(model.generate(start="a", max_len=1000), file=f)
    print("---------------------------------------------", file=f)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | rnn   | VanillaRNN | 21.5 K
1 | dense | Linear     | 5.0 K 
-------------------------------------
26.5 K    Trainable params
0         Non-trainable params
26.5 K    Total params
0.106     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Epoch 0 Training Loss: 1.90169
------------------------------------
duke vincentio:
i dout poen othe met'st; died? woulh non cone to are fargh rans, acome, which grief you asthing lild
------------------------------------
Epoch 1 Training Loss: 1.70002
------------------------------------
duke vincentio:
thanks autist?

sebastian:
no dobless they's. i weaps. with being not more more ishid dell ol eldew'
------------------------------------
Epoch 2 Training Loss: 1.66014
------------------------------------
duke vincentio:
to thancolm it them off, count the provirningst you dishormorat; and wathsef
a son.

antonio:
sappon
------------------------------------
Epoch 3 Training Loss: 1.64097
------------------------------------
duke vincentio: to the broke?

fortoyor wron all'd, whencinightly
out they home to remofinast'd genslifuy got by rub
------------------------------------
Epoch 4 Training Loss: 1.62817
------------------------------------
duke vincentio: that?

sebastian:
they time 

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24 Training Loss: 1.58567
------------------------------------
duke vincentio:
then do i we say, companched thoo, his will wed
are: and my father the son, whiloul she is to cravin
------------------------------------


# LSTM training

In [9]:
hidden_size = 128
seq_size = 40

model = ShakespheareGenerator(LSTM, hidden_size, vocab_size)

train_dataset = TextSampler(data, seq_size=seq_size)
train_loader = DataLoader(
    train_dataset, batch_size=1, shuffle=False, num_workers=2, pin_memory=False
)

trainer = L.Trainer(max_epochs=25, accelerator="gpu")
trainer.fit(model, train_loader)

model.generate(start="duke vincentio:", max_len=500)

with open("lstm_output.txt", "w", encoding="UTF-8") as f:
    print("---------------------------------------------", file=f)
    print(model.generate(start="duke vincentio:", max_len=1000), file=f)
    print("---------------------------------------------", file=f)
    print(model.generate(start="romeo:", max_len=1000), file=f)
    print("---------------------------------------------", file=f)
    print(model.generate(start="juliet:", max_len=1000), file=f)
    print("---------------------------------------------", file=f)
    print(model.generate(start="a", max_len=1000), file=f)
    print("---------------------------------------------", file=f)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | rnn   | LSTM   | 86.0 K
1 | dense | Linear | 5.0 K 
---------------------------------
91.0 K    Trainable params
0         Non-trainable params
91.0 K    Total params
0.364     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Epoch 0 Training Loss: 1.83613
------------------------------------
duke vincentio: there my lough
urmosterly pleastily as
thay soke i'll breaken!' thou. not be
vingi
age old less, as 
------------------------------------
Epoch 1 Training Loss: 1.57221
------------------------------------
duke vincentio:
good life she dith wanworl men, to-katishes.

gonzalo:
indoth! what? you wam so persent?

alonso:
yo
------------------------------------
Epoch 2 Training Loss: 1.50511
------------------------------------
duke vincentio:
o honourn contuban; besevives it
jest om enour but offends'
so so, let my maps'dle him!

aliustio:
a
------------------------------------
Epoch 3 Training Loss: 1.47096
------------------------------------
duke vincentio:
he gentlewonest embrace;
and person of the down'd awad me; my book.

gonzalo:
now i dread the world 
------------------------------------
Epoch 4 Training Loss: 1.44941
------------------------------------
duke vincentio:
to proverse been spirits the

`Trainer.fit` stopped: `max_epochs=25` reached.


Epoch 24 Training Loss: 1.36831
------------------------------------
duke vincentio:
nothing brother of a dogat out sluck words!

petruchio:
or thine sister?

lucento:
hast thou hear th
------------------------------------
