In [1]:
#defining the configuration for the model
from types import SimpleNamespace
import torch
my_config=SimpleNamespace(vocab_size=90,
batch_size=64,
block_size=256,
max_iters=6500,
eval_interval=500,
lr=3e-4,
eval_iters=200,
embed_size=384,
num_heads=6,
head_size=64,
n_blocks=6,
dropout=0.2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

batch_size = my_config.batch_size
block_size = my_config.block_size
vocab_size = my_config.vocab_size
embed_size = my_config.embed_size
num_heads = my_config.num_heads
head_size = my_config.head_size
n_blocks = my_config.n_blocks
dropout = my_config.dropout
lr = my_config.lr
max_iters = my_config.max_iters
eval_interval = my_config.eval_interval
eval_iters = my_config.eval_iters

In [2]:
torch.manual_seed(1337)

with open('output.txt', 'r',encoding='utf-8') as f:
    complete_text = f.read()

#getting all the characters in the text
chars=sorted(list(set(complete_text)))

In [3]:
vocab_size=len(chars)
char_to_id={ch:id for id,ch in enumerate(chars)}
id_to_char={id:ch for id,ch in enumerate(chars)}

In [4]:
#encoding the text to a list of integers
def encode_text(text):
    encoded_vector=[]
    for char in text:
        encoded_vector.append(char_to_id[char])
    return encoded_vector

def decode_text(encoded_vector):
    decoded_text=""
    for id in encoded_vector:
        decoded_text+=id_to_char[id]
    return decoded_text


In [5]:
text='This is a Zoom meeting'
print('The number of characters in given text is',len(text))
encoded_vector=encode_text(text)
print('encoded vector - ',encoded_vector)
print('The number of characters in encoded text is',len(encoded_vector))

The number of characters in given text is 22
encoded vector -  [51, 68, 69, 79, 2, 69, 79, 2, 61, 2, 57, 75, 75, 73, 2, 73, 65, 65, 80, 69, 74, 67]
The number of characters in encoded text is 22


In [6]:
print(decode_text(encoded_vector))

This is a Zoom meeting


In [7]:
#encoding the whole text
data=torch.tensor(encode_text(complete_text),dtype=torch.long)

In [8]:
#splitting the data into training and validation data
n=int(len(data)*0.9)
train_data=data[:n]
val_data=data[n:]

In [9]:
#batching the data
def get_batch(split):
    data=train_data if split=='train' else val_data
    ix=torch.randint(0,len(data)-block_size,(batch_size,))
    x=torch.stack([data[i:i+block_size] for i in ix])
    y=torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x.to(device),y.to(device)

In [10]:
from model import SmallLanguageModel
model=SmallLanguageModel(my_config).to(device)

Number of parameters: 10.808154 M


In [11]:
optimizer=torch.optim.AdamW(model.parameters(),lr=lr)

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
#getting the loss for a 300 random batches and then taking their mean
@torch.no_grad()
def estimte_loss():
    out={}
    model.eval()
    for split in ['train','val']:
        losses=torch.zeros(eval_iters)
        for k in range(eval_iters):
            x,y=get_batch(split)
            logits,loss=model(x,y)
            losses[k]=loss.item()
        out[split+'_loss']=losses.mean()
    model.train()
    return out

In [13]:
for iter in range(max_iters):
    if(iter%eval_interval==0):
        losses=estimte_loss()
        print(f'Iter {iter:5d} Train loss {losses["train_loss"]:.4f} Val loss {losses["val_loss"]:.4f}')
    x,y=get_batch('train')
    logits,loss=model(x,y)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

KeyboardInterrupt: 