<a href="https://colab.research.google.com/github/safaet/mufti-llm/blob/main/LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

In [None]:
# Example training text
text = "hello world"

In [None]:
# Build vocabulary
chars = sorted(set(text))
vocab_size = len(chars)
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for ch, i in stoi.items()}

In [None]:
print("stoi = ", stoi)
print("itos = ", itos)

stoi =  {' ': 0, 'd': 1, 'e': 2, 'h': 3, 'l': 4, 'o': 5, 'r': 6, 'w': 7}
itos =  {0: ' ', 1: 'd', 2: 'e', 3: 'h', 4: 'l', 5: 'o', 6: 'r', 7: 'w'}


In [None]:
# Encode text as integers
def encode(s): return [stoi[c] for c in s]
def decode(l): return ''.join([itos[i] for i in l])

data = torch.tensor(encode(text), dtype=torch.long)

In [None]:
print(data)

tensor([3, 2, 4, 4, 5, 0, 7, 5, 6, 4, 1])


# Tiny Dataset Loader

In [None]:
# Generate training samples
def get_batch(seq_len=4):
    ix = random.randint(0, len(data) - seq_len - 1)
    x = data[ix:ix + seq_len]
    y = data[ix + 1:ix + seq_len + 1]
    return x.unsqueeze(0), y.unsqueeze(0)


# 3. Mini GPT-like Model

In [None]:
class TinyGPT(nn.Module):
    def __init__(self, vocab_size, embed_dim=16):
        super().__init__()
        self.token_embedding = nn.Embedding(vocab_size, embed_dim)
        self.linear = nn.Linear(embed_dim, vocab_size)

    def forward(self, x):
        x = self.token_embedding(x)  # (B, T, C)
        x = self.linear(x)
        return x


# 4. Training the Model

In [None]:
model = TinyGPT(vocab_size)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

# Train loop
for step in range(1000):
    x, y = get_batch()
    logits = model(x)  # (B, T, vocab_size)
    loss = loss_fn(logits.view(-1, vocab_size), y.view(-1))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if step % 100 == 0:
        print(f"Step {step}, loss {loss.item():.4f}")


Step 0, loss 2.5056
Step 100, loss 0.5925
Step 200, loss 0.7400
Step 300, loss 0.3668
Step 400, loss 0.6539
Step 500, loss 0.3085
Step 600, loss 0.4347
Step 700, loss 0.3061
Step 800, loss 0.8848
Step 900, loss 0.6265


# 5. Generate Text

In [None]:
def generate(model, start_text='h', length=20):
    model.eval()
    context = torch.tensor([stoi[start_text]], dtype=torch.long).unsqueeze(0)
    result = [start_text]

    for _ in range(length):
        logits = model(context)
        probs = F.softmax(logits[:, -1, :], dim=-1)
        next_id = torch.multinomial(probs, num_samples=1).item()
        next_char = itos[next_id]
        result.append(next_char)

        context = torch.cat([context, torch.tensor([[next_id]])], dim=1)

    return ''.join(result)

print(generate(model, start_text='h'))


helllorlo wo wo wo wo


In [None]:
https://github.com/karpathy/nanoGPT/blob/master/data/shakespeare/prepare.py

https://github.com/berkerdemirel/GPT-from-scratch?utm_source=chatgpt.com

https://github.com/karpathy/minGPT

https://github.com/Hannibal046/nanoRWKV?utm_source=chatgpt.com

https://github.com/endlessreform/nanogpt-candle