In [1]:
import torch
import torch.nn.functional as F
from torch import nn
import pandas as pd
import matplotlib.pyplot as plt # for making figures
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from pprint import pprint

In [2]:
torch.__version__

'2.4.1+cpu'

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [10]:
file_path = 'text_data.txt'  # Update this to the name of your text file

# Try reading the file with a specified encoding
try:
    with open(file_path, 'r', encoding='utf-8') as file:
        paras = file.read()
except UnicodeDecodeError:
    print("Error: Could not decode the file. Trying a different encoding.")
    with open(file_path, 'r', encoding='latin-1') as file:
        paras = file.read()
    
import re
paras = re.sub('[^a-zA-Z0-9 \.]', ' ', paras)

paras = paras.lower()
paras = paras.strip()

paras = list(paras.split("\n\n")) # This will help in training the data.

paras = [para.strip() for para in paras]


# # Randomly shuffle the lines
import random
random.shuffle(paras)


  paras = re.sub('[^a-zA-Z0-9 \.]', ' ', paras)


In [11]:
# build the vocabulary of words and mappings to/from integers
words = sorted(set(list(' '.join(paras).split(" "))))
words.remove("")
stoi = {s:i for i,s in enumerate(words)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}
print(itos)



In [12]:
block_size = 10 # context length: how many characters do we take to predict the next one?
X, Y = [], []
for w in paras:
    
  
  #print(w)
    w = w.split(" ")
    w.append(".")
    context = [0] * block_size
    for ch in w:
        if ch == "":
            continue
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
#         print(' '.join(itos[i] for i in context), '--->', itos[ix])
        context = context[1:] + [ix] # crop and append    


X = torch.tensor(X).to(device)
Y = torch.tensor(Y).to(device)

In [13]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([109072, 10]), torch.int64, torch.Size([109072]), torch.int64)

In [14]:
# Embedding layer for the context

emb_dim = 64
emb = torch.nn.Embedding(len(stoi), emb_dim)
emb.weight

Parameter containing:
tensor([[ 1.2345, -0.0838, -1.3284,  ...,  0.3621,  1.0709,  0.1232],
        [-0.3838, -0.9779, -0.3184,  ..., -1.7644, -0.1187, -0.3270],
        [-0.5450,  1.7308, -0.2248,  ..., -0.5736, -0.8149, -0.3271],
        ...,
        [ 0.1761, -0.5393,  0.1353,  ..., -0.1084, -1.6816, -0.5334],
        [ 0.7977,  0.0664, -0.5760,  ..., -0.3912,  0.1443, -2.2000],
        [ 0.3022, -0.8053, -1.0026,  ..., -0.0357, -0.8692, -0.7600]],
       requires_grad=True)

In [34]:
emb.weight.shape

torch.Size([9959, 64])

In [35]:
class NextWord(nn.Module):
    def __init__(self, block_size, vocab_size, emb_dim, hidden_size):
        super().__init__()
        self.emb = nn.Embedding(vocab_size, emb_dim)
        self.lin1 = nn.Linear(block_size * emb_dim, hidden_size)
        self.lin2 = nn.Linear(hidden_size, vocab_size)

    def forward(self, x):
        x = self.emb(x)
        x = x.view(x.shape[0], -1)
        x = torch.relu(self.lin1(x))
        x = self.lin2(x)
        return x

In [36]:
# Generate names from untrained model


model = NextWord(block_size, len(stoi), emb_dim, 10).to(device)
# model = torch.compile(model)
# model = model.to(device)

g = torch.Generator()
g.manual_seed(4000002)
def generate_sentence(model, itos, stoi, block_size, max_len=10):
    context = [0] * block_size
    sentence = ''
    for i in range(max_len):
        x = torch.tensor(context).view(1, -1).to(device)
        y_pred = model(x)
        ix = torch.distributions.categorical.Categorical(logits=y_pred).sample().item()
        word = itos[ix]
        if word == '.':
            break
        sentence += word
        sentence += " "
        context = context[1:] + [ix]
    return sentence

for i in range(10):
    print(generate_sentence(model, itos, stoi, block_size))

raising murmured braving married english cabby since. heavens indicating anywhere 
who altogether swift wandering subtle luxurious cumbrous. wouldn lain augustine. 
attain govern congratulate meadow. arat yourselves scores motion perfection skirmishes 
deposed engines ledger hastened nervous knew chuckling. class. cheerless geniality 
slide notorious nonentity. angry seize square. timid sheets ray knock 
colour. tangible saturday knelt scrawl eightpence virtue. freemason difficulty hair 
rug. hercules. 4th. sacrificing relative unclaspings communicative silk landau slung 
willows meet ankles splash enlarged curse weigh incoherent dangerously swindon 
cal. cabinet myth. hardy enlarged station. valley. bag forced knowledge 
inherit harris books beds. golden labyrinth unmarried deductions warmth. vacancies 


#### Ask question about how compilation work because this was the output when torch.compile(model) was not used.


In [37]:
# Train the model

loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.AdamW(model.parameters(), lr=0.01)
import time
# Mini-batch training
batch_size = 1000
print_every = 100
elapsed_time = []
for epoch in range(1001):
    start_time = time.time()
    for i in range(0, X.shape[0], batch_size):
        x = X[i:i+batch_size]
        y = Y[i:i+batch_size]
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        loss.backward()
        opt.step()
        opt.zero_grad()
    end_time = time.time()
    elapsed_time.append(end_time - start_time)
    if epoch % print_every == 0:
        print(epoch, loss.item())

0 8.326860427856445
100 0.6354828476905823
200 0.40594133734703064
300 0.19073447585105896
400 0.45135122537612915
500 0.8067220449447632
600 0.3872928023338318
700 0.15210774540901184
800 0.2474779337644577
900 0.6655786037445068
1000 0.46604204177856445


In [38]:
# Generate names from trained model

for i in range(10):
    print(generate_sentence(model, itos, stoi, block_size))

the project gutenberg project gutenberg electronic electronic works i do 
the project gutenberg electronic ebook there will not only the 
the project gutenberg ebook of the strange with holder you 
the project gutenberg project gutenberg electronic license to not founded 
the project gutenberg project gutenberg electronic electronic works there then 
the copyright project gutenberg project gutenberg electronic works with you 
the project is or how mr. rucastle were a two 
the project gutenberg project gutenberg electronic electronic works there dad 
the copyright project became equally volunteers and it will charming 
the project gutenberg project gutenberg electronic works if you horrify 


#### Creating Streamlit application

In [None]:
!streamlit run app.py

import streamlit as st
import torch

# Load your pre-trained PyTorch models
model_5_64_relu = torch.load('5_64_relu.pth', weights_only=True)
model_5_64_tanh = torch.load('5_64_tanh.pth', weights_only=True)
model_5_128_relu = torch.load('5_128_relu.pth', weights_only=True)
model_5_128_tanh = torch.load('5_128_tanh.pth',weights_only=True)
model_10_64_relu = torch.load('10_64_relu.pth', weights_only=True)
model_10_64_tanh = torch.load('10_64_tanh.pth', weights_only=True)
model_10_128_relu = torch.load('10_128_relu.pth', weights_only=True)
model_10_128_tanh =torch.load('10_128_tanh.pth', weights_only=True)

g = torch.Generator()
g.manual_seed(4000002)
def generate_sentence(model, itos, stoi, block_size, k, word):
    context = [0] * block_size
    context[-1] = stoi[word]
    sentence = word + " "
    for i in range(k):
        x = torch.tensor(context).view(1, -1).to(device)
        y_pred = model(x)
        ix = torch.distributions.categorical.Categorical(logits=y_pred).sample().item()
        word = itos[ix]
        if word == '.':
            break
        sentence += word
        sentence += " "
        context = context[1:] + [ix]
    return sentence

# Streamlit app
st.title("Text Generator")

seed_text = st.text_input("Enter a seed text:")
num_generate = st.number_input("Number of words to generate:", min_value=1, max_value=30)

# Parameter selection (adjust based on your model's parameters)
embedding_size = st.selectbox("Embedding Size:", [64, 128])
context_length = st.selectbox("Context Length:", [5, 10])
activation_function = st.selectbox("Activation Function:", ["ReLU", "tanh"])

# Model selection
if embedding_size == 64 and context_length == 5 and activation_function == "ReLU":
  model = model_5_64_relu
elif embedding_size == 64 and context_length == 5 and activation_function == "tanh":
  model = model_5_64_tanh
elif embedding_size == 128 and context_length == 5 and activation_function == "ReLU":
  model = model_5_128_relu
elif embedding_size == 128 and context_length == 5 and activation_function == "tanh":
  model = model_5_128_tanh
elif embedding_size == 64 and context_length == 10 and activation_function == "ReLU":
  model = model_10_64_relu
elif embedding_size == 64 and context_length == 10 and activation_function == "tanh":
  model = model_10_64_tanh
elif embedding_size == 128 and context_length == 10 and activation_function == "ReLU":
  model = model_10_128_relu
elif embedding_size == 128 and context_length == 10 and activation_function == "tanh":
  model = model_10_128_tanh



if st.button("Generate"):
  generated_text = generate_sentence(model, itos, stoi, context_length, num_generate, seed_text)
  st.write(generated_text)