<a href="https://colab.research.google.com/github/snpsuen/Deep_Learning_Data/blob/main/script/MiniGPT_example02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
!wget https://www.gutenberg.org/cache/epub/1504/pg1504.txt -O corpus.txt
import torch
import torch.nn as nn
import torch.nn.functional as F
import re

# --- Config ---
device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size, block_size = 16, 64
max_iters, eval_interval = 500, 100
lr, n_embd, n_head, n_layer = 1e-3, 128, 4, 2
dropout = 0.1

--2025-07-19 19:43:26--  https://www.gutenberg.org/cache/epub/1504/pg1504.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 112380 (110K) [text/plain]
Saving to: ‘corpus.txt’


2025-07-19 19:43:27 (2.00 MB/s) - ‘corpus.txt’ saved [112380/112380]



In [8]:
# --- Load and tokenize corpus (word-level) ---
with open('corpus.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# Tokenize into words and punctuation
words = re.findall(r"\b\w+\b|[^\w\s]", text)
vocab = sorted(set(words))
vocab_size = len(vocab)
stoi = {w:i for i,w in enumerate(vocab)}
itos = {i:w for w,i in stoi.items()}
encode = lambda s: [stoi[w] for w in re.findall(r"\b\w+\b|[^\w\s]", s) if w in stoi]
decode = lambda idxs: ' '.join([itos[i] for i in idxs])

print(f"Vocab size (word-level): {vocab_size}")

# Convert full corpus to token IDs
data = torch.tensor(encode(text), dtype=torch.long)
x, y = data[:-1], data[1:]

# --- Batching ---
def get_batch():
    ix = torch.randint(len(x) - block_size, (batch_size,))
    xb = torch.stack([x[i:i+block_size] for i in ix]).to(device)
    yb = torch.stack([y[i:i+block_size] for i in ix]).to(device)
    return xb, yb

Vocab size (word-level): 3387


In [9]:
# --- Model Components ---
class Head(nn.Module):
    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B, T, C = x.shape
        k = self.key(x)
        q = self.query(x)
        wei = q @ k.transpose(-2, -1) * C**-0.5
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))
        wei = F.softmax(wei, dim=-1)
        wei = self.dropout(wei)
        v = self.value(x)
        return wei @ v

class MultiHead(nn.Module):
    def __init__(self):
        super().__init__()
        head_size = n_embd // n_head
        self.heads = nn.ModuleList([Head(head_size) for _ in range(n_head)])
        self.proj = nn.Linear(n_embd, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.dropout(self.proj(torch.cat([h(x) for h in self.heads], dim=-1)))

class FeedForward(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    def __init__(self):
        super().__init__()
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)
        self.sa = MultiHead()
        self.ff = FeedForward()

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ff(self.ln2(x))
        return x

class MiniLLM(nn.Module):
    def __init__(self):
        super().__init__()
        self.tok_emb = nn.Embedding(vocab_size, n_embd)
        self.pos_emb = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block() for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd)
        self.head = nn.Linear(n_embd, vocab_size)

    def forward(self, idx, targets=None):
        B, T = idx.size()
        tok = self.tok_emb(idx)
        pos = self.pos_emb(torch.arange(T, device=device))
        x = tok + pos
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.head(x)
        loss = F.cross_entropy(logits.view(-1, vocab_size), targets.view(-1)) if targets is not None else None
        return logits, loss

    def generate(self, idx, max_new):
        for _ in range(max_new):
            idx_cond = idx[:, -block_size:]
            logits, _ = self(idx_cond)
            probs = F.softmax(logits[:, -1], dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat([idx, idx_next], dim=1)
        return idx

# --- Initialize model ---
model = MiniLLM().to(device)
opt = torch.optim.AdamW(model.parameters(), lr=lr)

In [10]:
# --- Training ---
for it in range(max_iters):
    xb, yb = get_batch()
    _, loss = model(xb, yb)
    opt.zero_grad()
    loss.backward()
    opt.step()
    if it % eval_interval == 0:
        print(f"Iter {it} | Loss: {loss.item():.4f}")


Iter 0 | Loss: 8.2971
Iter 100 | Loss: 5.2861
Iter 200 | Loss: 4.6306
Iter 300 | Loss: 4.1066
Iter 400 | Loss: 3.7800


In [11]:
# --- Interactive Prompt ---
print("\n🎭 MiniLLM Interactive Mode (word-level) — type 'exit' to quit.")
while True:
    prompt = input("\nYou > ").strip()
    if prompt.lower() in ['exit', 'quit']:
        print("Goodbye!")
        break
    if not prompt:
        continue
    try:
        context = torch.tensor([encode(prompt)], dtype=torch.long).to(device)
    except Exception as e:
        print(f"⚠️ Error: {e}")
        continue
    out = model.generate(context, max_new=50)[0]
    result = decode(out.tolist())
    print("\nMiniLLM >", result[len(prompt.split()):])



🎭 MiniLLM Interactive Mode (word-level) — type 'exit' to quit.

You > DUKE:

MiniLLM > UKE : lifeless distracted thy brawls , warm incorporate , and beast . Patience charge a streets a deep who evil lost wreck of these witches ’ d calm , my sword than for arrival here and splitted return and again , and truce : English links provide league and not fetch

You > What is the matter, sir? I know you not

MiniLLM > he matter , sir ? I know you not . Quoth age no additional woman me be post for the Duke from wayward home . ANTIPHOLUS OF SYRACUSE . ANTIPHOLUS OF SYRACUSE . Thither told it under our small staff there , Which . Marry , and a charge saffron creature , who easy mayst , Would , worse

You > Good sir, where is Antipholus?

MiniLLM > sir , where is Antipholus ? DROMIO OF SYRACUSE . one ’ s man , that I am beaten ? then , Courtesan here you have him ? DROMIO OF SYRACUSE . She idly ’ s the abbey here , worse , that we thought home ? DROMIO OF SYRACUSE . And in flouting , the

You > A