In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import tqdm
import lightning.pytorch as pl
import pandas as pd
import random


In [2]:
with open("data/tiny-shakespeare.txt") as f:
    text = f.read()


In [3]:
# chunk the text into sequences of length seq_length
seq_length = 1024
sequences = []
for i in range(0, len(text) - seq_length, seq_length):
    sequences.append(text[i:i + seq_length])


In [4]:
print(len(sequences), sequences[0])


1089 First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for reven

In [5]:
@torch.no_grad()
def encode(string: str) -> torch.Tensor:
    string = string.encode('utf-8')
    return torch.as_tensor([int(c) for c in string])

@torch.no_grad()
def decode(arr: torch.Tensor) -> str:
    arr = arr.tolist()
    return ''.join([chr(c) for c in arr])


In [6]:
print(encode('hello'))
print(decode(encode('hello')))


tensor([104, 101, 108, 108, 111])
hello


In [7]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_layers):
        super(Model, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        
        # LSTM layer
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)
        
        # Fully connected layer to predict each character
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):
        # Embedding
        x = self.embedding(x)
        
        # Initialize hidden state and cell state if not provided
        if hidden is None:
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim, device=x.device)
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim, device=x.device)
            hidden = (h0, c0)
        
        # LSTM output along with new hidden state
        out, hidden = self.lstm(x, hidden)
        
        # Reshape output for the fully connected layer
        out = out.reshape(-1, self.hidden_dim)
        out = self.fc(out)
        return out, hidden

# Create an instance of the updated model
vocab_size = 256  # number of unique characters
embed_dim = 128   # embedding dimension
hidden_dim = 512  # LSTM hidden dimensions
num_layers = 4  # number of LSTM layers

model = Model(vocab_size, embed_dim, hidden_dim, num_layers)


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.01)


In [9]:
import torch.nn.functional as F

def temperature_sampling(logits, temperature=0.7):
    # Scale logits by temperature
    scaled_logits = logits / temperature
    # Convert logits to probabilities
    probs = F.softmax(scaled_logits, dim=-1)
    # Sample from the probabilities
    return torch.multinomial(probs, num_samples=1)

# Validation Loop with Temperature Sampling
model.eval()
initial_input = encode('First Citizen:').unsqueeze(0)
generated_text = []
hidden = None  # Hidden state initialization

with torch.no_grad():
    for _ in range(512):  # Generate 64 characters
        output, hidden = model(initial_input, hidden)  # Ensure model accepts and returns hidden state
        predicted = temperature_sampling(output[-1], temperature=0.8)
        generated_text.append(predicted)
        initial_input = predicted.unsqueeze(0)

print("First Citizen:"+decode(torch.stack(generated_text).flatten()))


Ù°;åtì6ÁÜèæ[¡û6x³ÎèYT¦sÏ@`a­ÒTeÙ0Á~+s8[èj)z×4¡¨0ÌJH¬²æ)þ*ª4PâC°VQéåf¨4ÝÃ±[÷¸+5mxhïÀD1x$nµ²ªv­úEàe°5×~~%\ã(V¯ÇµM&Fy¨­¤Í_Ï{¡ùÿÎÜÍ¿ÙûRøá8Or¯O\ì+!²4ÊÒ÷c£y ÑsjVµõ³¡Û)BQ>ÖþL£¢}â³÷aÚUeÐj"ÙËAóý5´Ì$}¦#Å:Ïèã¹³ºÖK|


In [10]:
epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    with tqdm.tqdm(enumerate(sequences), total=len(sequences)) as pbar:
        for i, seq in pbar:
            optimizer.zero_grad()

            # Encode sequence and prepare inputs and targets
            story = encode(seq).view(1, -1)
            inputs = story[:, :-1]  # All characters except the last
            targets = story[:, 1:].flatten()  # All characters except the first, flattened for loss calculation

            # Forward pass
            outputs, _ = model(inputs)  # Outputs now includes hidden states which are ignored during training
            loss = criterion(outputs, targets)  # Loss calculation between outputs and shifted targets

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            pbar.set_description(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / (i + 1):.4f}")


Epoch 1/20, Loss: 3.3516:  23%|██▎       | 247/1089 [19:32<1:06:38,  4.75s/it]


: 