In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
with open('shaytonat 1-3.txt', 'r') as f:
  text = f.read()
bag = list(set(text))
n_bag = len(bag)
print(f'Beliglar soni: {n_bag}')

encode = lambda s: [bag.index(l) for l in s]
decode = lambda ids: "".join([bag[id] for id in ids])

Beliglar soni: 93


In [21]:
n_data = len(text)
val_size = 0.1
n_train = int((1 - val_size) * n_data)
n_val = n_data - n_train

train_data = torch.tensor(encode(text[:n_train]), dtype=torch.int32)
val_data = torch.tensor(encode(text[n_train:]), dtype=torch.int32)
print("O'rgatuvchida: ", n_train)
print("Sinovda: ", n_val)

O'rgatuvchida:  1848219
Sinovda:  205358


In [24]:
block_size = 8
batch_size = 4

def get_batch(split='train'):
    data = train_data if split == 'train' else val_data
    xb = []
    yb = []
    for i in range(batch_size):
        idx = np.random.randint(0, len(data) - block_size - 1)
        xb.append(data[idx:idx+block_size])
        yb.append(data[idx+1:idx+block_size+1])
    
    xb = torch.stack(xb)
    yb = torch.stack(yb).to(torch.int64)

    return xb, yb

In [None]:
xb, yb = get_batch()
for b in range(batch_size):
    for c in range(block_size):
        print("Context:", xb[b, :c+1], "Target:", yb[b, c].item())

In [None]:
class BigramLM(nn.Module):

    def __init__(self,
                 vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, 
                                                  vocab_size)
    
    def forward(self, xb, yb=None):

        # xb -> (batch_size, block_size) => (4, 8)
        # yb -> (batch_size, block_size) => (4, 8)
        # (batch_size, block_size, vocab_size)
        # (4, 8, 93)
        logits = self.token_embedding_table(xb)
        
        if yb is not None:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            yb = yb.view(B*T)
            loss = F.cross_entropy(logits, yb)
        else:
            loss = None

        return logits, loss
    
    def generate(self, idx, max_new_token):
        for _ in range(max_new_token):
            # (batch_size, block_size, vocab_size)
            # (1, 8, 93)
            logits, loss = self(idx)
            logits = logits[:, -1, :]
            probs = F.softmax(logits, dim=-1)

            idx_next = torch.multinomial(probs, num_samples=1)
            
            idx = torch.cat((idx, idx_next), dim=1)
        return idx

In [65]:
model = BigramLM(n_bag)
model.to(device)
optimizer = optim.AdamW(model.parameters(), lr=1e-3)

In [None]:
n_steps = 1_000_000
for step in range(n_steps):
    xb, yb = get_batch('train')
    xb = xb.to(device)
    yb = yb.to(device)

    logits, loss = model(xb, yb)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(f"Step {step+1}: {loss:.4f}")

In [103]:
idx = torch.tensor([encode(' ')], dtype=torch.int64, device=device)
gen_idx = model.generate(idx, max_new_token=100)
print(decode(gen_idx[0]))

 чода киберг ўйиб. сачоа мамайванг» ҳулила, бонади.
Ангамл йнг. Унлч изажоар Зоридами:
— Кўта км Мезл
