In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import tqdm
import lightning.pytorch as pl
import pandas as pd
import random


In [2]:
with open("data/tiny-shakespeare.txt") as f:
    text = f.read()


In [3]:
# chunk the text into sequences of length seq_length
seq_length = 512
sequences = []
for i in range(0, len(text) - seq_length, seq_length):
    sequences.append(text[i:i + seq_length])


In [4]:
print(len(sequences), sequences[0])


2178 First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, t


In [5]:
@torch.no_grad()
def encode(string: str) -> torch.Tensor:
    string = string.encode('utf-8')
    return torch.as_tensor([int(c) for c in string])

@torch.no_grad()
def decode(arr: torch.Tensor) -> str:
    arr = arr.tolist()
    return ''.join([chr(c) for c in arr])


In [6]:
print(encode('hello'))
print(decode(encode('hello')))


tensor([104, 101, 108, 108, 111])
hello


In [7]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers):
        super(Model, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        
        # Replace LSTM with GRU
        self.gru = nn.GRU(embed_dim, hidden_dim, num_layers, batch_first=True)
        
        # Fully connected layer to predict each character
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):
        # Embedding
        x = self.embedding(x)
        
        # Initialize hidden state if not provided
        if hidden is None:
            hidden = torch.zeros(self.num_layers, x.size(0), self.hidden_dim, device=x.device)
        
        # GRU output along with new hidden state
        out, hidden = self.gru(x, hidden)
        
        # Reshape output for the fully connected layer
        out = out.reshape(-1, self.hidden_dim)
        out = self.fc(out)
        return out, hidden

# Create an instance of the updated model
vocab_size = 256  # number of unique characters
embed_dim = 128   # embedding dimension
hidden_dim = 256  # LSTM hidden dimensions
num_layers = 2  # number of GRU layers

model = Model(vocab_size, embed_dim, hidden_dim, num_layers)


In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [21]:
import torch.nn.functional as F

def temperature_sampling(logits, temperature=0.7):
    # Scale logits by temperature
    scaled_logits = logits / temperature
    # Convert logits to probabilities
    probs = F.softmax(scaled_logits, dim=-1)
    # Sample from the probabilities
    return torch.multinomial(probs, num_samples=1)

# Validation Loop with Temperature Sampling
model.eval()
initial_input = encode('First Citizen:').unsqueeze(0)
generated_text = []
hidden = None  # Hidden state initialization

with torch.no_grad():
    for _ in range(512):  # Generate 64 characters
        output, hidden = model(initial_input, hidden)  # Ensure model accepts and returns hidden state
        predicted = temperature_sampling(output[-1], temperature=0.8)
        generated_text.append(predicted)
        initial_input = predicted.unsqueeze(0)

print("First Citizen:"+decode(torch.stack(generated_text).flatten()))


First Citizen:
erars
Efl mot ofd Ela hi drat thto thy llodedt y dingesie, ildomOeng fated haerde!e al'sor chat huv watlede dil theithers iensed mett lormead defers'
Toy frarten that y thee!
Dhe, thy acat dorg yiof tereurtly shas metheso worllmO thase foofl it arion urog
O'd!''A my soreaermy lethoseer, ate looret y merbalin ans! iodedn or sou, thack,
Fhaus my aeadt cusle. mu hee mal margat thor cat hrord themeance rert th urithatF me that kon'dedse haved dit ofl drut men yold that du rallll, kuoos deve ordadss!'':
AQrErT:


In [22]:
epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    with tqdm.tqdm(enumerate(sequences), total=len(sequences)) as pbar:
        for i, seq in pbar:
            optimizer.zero_grad()

            # Encode sequence and prepare inputs and targets
            story = encode(seq).view(1, -1)
            inputs = story[:, :-1]  # All characters except the last
            targets = story[:, 1:].flatten()  # All characters except the first, flattened for loss calculation

            # Forward pass
            outputs, _ = model(inputs)  # Outputs now includes hidden states which are ignored during training
            loss = criterion(outputs, targets)  # Loss calculation between outputs and shifted targets

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            pbar.set_description(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / (i + 1):.4f}")


Epoch 1/20, Loss: 2.5915:   2%|▏         | 39/2178 [00:25<23:32,  1.51it/s]


KeyboardInterrupt: 