<a href="https://colab.research.google.com/github/snpsuen/Deep_Learning_Data/blob/main/script/MiniGPT_example01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# — Read corpus —
!wget https://www.gutenberg.org/cache/epub/1504/pg1504.txt -O corpus.txt

--2025-07-19 18:34:57--  https://www.gutenberg.org/cache/epub/1504/pg1504.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 112380 (110K) [text/plain]
Saving to: ‘corpus.txt’


2025-07-19 18:34:57 (719 KB/s) - ‘corpus.txt’ saved [112380/112380]



In [11]:
import torch, torch.nn as nn, torch.nn.functional as F

# — Config —
device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size, block_size = 16, 64
max_iters, eval_interval = 500, 100
lr, n_embd, n_head, n_layer = 1e-3, 128, 4, 2
dropout = 0.1


In [12]:
# — Load corpus —
with open('corpus.txt', 'r', encoding='utf-8') as f:
    text = f.read()
chars = sorted(set(text))
vocab_size = len(chars)
stoi = {ch:i for i,ch in enumerate(chars)}
itos = {i:ch for ch,i in stoi.items()}
encode = lambda s: [stoi[ch] for ch in s if ch in stoi]
decode = lambda idxs: ''.join(itos[i] for i in idxs)

data = torch.tensor(encode(text), dtype=torch.long)
x, y = data[:-1], data[1:]

def get_batch():
    ix = torch.randint(len(x)-block_size, (batch_size,))
    xb = torch.stack([x[i:i+block_size] for i in ix]).to(device)
    yb = torch.stack([y[i:i+block_size] for i in ix]).to(device)
    return xb, yb

In [13]:
# — Model definition —
class Head(nn.Module):
    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        B,T,C = x.size()
        k = self.key(x); q = self.query(x)
        wei = q @ k.transpose(-2,-1) * C**-0.5
        wei = wei.masked_fill(self.tril[:T,:T]==0, float('-inf'))
        wei = F.softmax(wei, dim=-1)
        wei = self.dropout(wei)
        v = self.value(x)
        return wei @ v

class MultiHead(nn.Module):
    def __init__(self):
        super().__init__()
        head_size = n_embd // n_head
        self.heads = nn.ModuleList([Head(head_size) for _ in range(n_head)])
        self.proj = nn.Linear(n_embd, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.dropout(self.proj(torch.cat([h(x) for h in self.heads], dim=-1)))

class FeedForward(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4*n_embd), nn.ReLU(),
            nn.Linear(4*n_embd, n_embd), nn.Dropout(dropout))

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    def __init__(self):
        super().__init__()
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)
        self.sa = MultiHead()
        self.ff = FeedForward()
    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ff(self.ln2(x))
        return x

class MiniLLM(nn.Module):
    def __init__(self):
        super().__init__()
        self.tok_emb = nn.Embedding(vocab_size, n_embd)
        self.pos_emb = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block() for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd)
        self.head = nn.Linear(n_embd, vocab_size)

    def forward(self, idx, targets=None):
        B,T = idx.size()
        tok = self.tok_emb(idx)
        pos = self.pos_emb(torch.arange(T, device=device))
        x = tok + pos
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.head(x)
        loss = None
        if targets is not None:
            loss = F.cross_entropy(logits.view(-1, vocab_size), targets.view(-1))
        return logits, loss

    def generate(self, idx, max_new):
        for _ in range(max_new):
            idx_cond = idx[:, -block_size:]
            logits, _ = self(idx_cond)
            probs = F.softmax(logits[:, -1], dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat([idx, idx_next], dim=1)
        return idx


In [14]:
# — Train & sample —
model = MiniLLM().to(device)
opt = torch.optim.AdamW(model.parameters(), lr=lr)

for it in range(max_iters):
    xb, yb = get_batch()
    logits, loss = model(xb, yb)
    opt.zero_grad(); loss.backward(); opt.step()
    if it % eval_interval == 0:
        print(f"It {it} | Loss {loss.item():.4f}")

It 0 | Loss 4.6514
It 100 | Loss 2.6050
It 200 | Loss 2.2844
It 300 | Loss 2.1604
It 400 | Loss 2.3240


In [15]:
# — Generate some text —
context = torch.zeros((1,1), dtype=torch.long, device=device)
out = model.generate(context, max_new=300)
print(decode(out[0].tolist()))


 OF RACEMISE.
And lever ware se, and mon will, ge me ar wond cove, ing walff a jald airk th
le thanve napt thes alaven my in ar ine to sow.

ANThain Af cor E.
Tor i .

Haicend he yo’so wat Fort way,lok; fful wwh.t, sher I ingar
of if an eve mageNe.

ANTIPHOLUS OF 9 nand I EPHESUK.
 nont I.
Tor ver c


In [16]:
# --- Interactive generation loop ---
print("\n🎭 MiniLLM Interactive Mode (type 'exit' to quit)")
while True:
    prompt = input("\nYou > ").strip()
    if prompt.lower() in ['exit', 'quit']:
        print("Goodbye!")
        break
    if not prompt:
        continue
    try:
        context = torch.tensor([encode(prompt)], dtype=torch.long).to(device)
    except KeyError as e:
        print(f"⚠️ Unknown character in input: {e}")
        continue
    out = model.generate(context, max_new=300)[0]
    result = decode(out.tolist())
    print("\nMiniLLM > " + result[len(prompt):])  # Show only generated part



🎭 MiniLLM Interactive Mode (type 'exit' to quit)

You > DUKE:

MiniLLM > 
EN.

DRONHESE.
My Istre ics, hantus and ouiked,

Dan, tio [_Enasthe co hak.Exun of anc?
Whepear hob? And that se be and’y.


Thesm Terestannbe dach thanout  el he.

Tho EPHOLUSy I To ingont, Un the ondea,
I yo car yot may youghy, shve ilct?

DROMIO Oll SERYRmat the st gourke fe rorke fors.

Dy End 

You > What is the matter, sir? I know you not:

MiniLLM > 
I wanthe hayner mping hobe and eve in.
Whestior th you co muskee min,
 hald   on foree (mse ronngoordest; tot’ to f you alritfin a fardd he uct.
Thy wod murgh  And sof wold jevand bonka dide.
It be, he wit wist patence in ang ove,
Thernd fond of me me to thago Ep. Lastacaver hour. Or and cait?

We 

You > Good sir, where is Antipholus?

MiniLLM > 
Bunc ch meed
Tis, a and it and And alt wer me with me,
Epy thery, rown wI’ser bre Tit hif o grighiste, ge yed me,
Wely nowal will wor colly hasearn to nordibkern was hy burky’d-buvim’st if,
comir gonds my canson Ifg