In [13]:
# Mini-GPT visual con Gradio en Colab

# Paso 1: Instalación
!pip install torch sentencepiece gradio --quiet

In [14]:
# Paso 2: Importación
import torch
import torch.nn as nn
import torch.nn.functional as F
import sentencepiece as spm
import gradio as gr


In [15]:
# Paso 3: Configuración
BLOCK_SIZE = 8
EMBED_DIM = 64
N_HEADS = 2
N_LAYERS = 2
LEARNING_RATE = 1e-3
MODEL_PREFIX = "bpe"

In [21]:
import os

CORPUS = "corpus.txt"
texto_entrenamiento = """
La luna se asoma detrás del monte,
como un ojo que espía el silencio.
El viento arrastra hojas secas,
dibujando círculos en el aire.

La noche canta con voz de agua,
y las estrellas tiemblan en su lecho.
Un farol parpadea en la distancia,
como un suspiro que no quiere irse.

Bajo un cielo bordado de sombra,
las palabras se ocultan en el pecho.
Todo calla, todo espera,
como si el mundo respirara lento.

Los árboles murmuran viejas historias,
y el río sueña con su nacimiento.
Una flor se abre en plena oscuridad,
y el universo guarda el secreto.
"""

# Guardar corpus si no existe
if not os.path.exists(CORPUS):
    with open(CORPUS, "w", encoding="utf-8") as f:
        f.write(texto_entrenamiento)

# Entrenar tokenizador si no existe
if not os.path.exists(f"{MODEL_PREFIX}.model"):
    import sentencepiece as spm
    spm.SentencePieceTrainer.Train(
        input=CORPUS,
        model_prefix=MODEL_PREFIX,
        vocab_size=100,
        model_type="bpe"
    )

sp = spm.SentencePieceProcessor(model_file=f"{MODEL_PREFIX}.model")
vocab_size = sp.get_piece_size()
sp = spm.SentencePieceProcessor(model_file=f"{MODEL_PREFIX}.model")
vocab_size = sp.get_piece_size()

In [22]:
# Paso 5: Tokenización
encode = lambda s: sp.encode(s, out_type=int)
decode = lambda l: sp.decode(l)

In [23]:
# Paso 6: Modelo MiniGPT
class SelfAttentionHead(nn.Module):
    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(EMBED_DIM, head_size, bias=False)
        self.query = nn.Linear(EMBED_DIM, head_size, bias=False)
        self.value = nn.Linear(EMBED_DIM, head_size, bias=False)
        self.register_buffer("tril", torch.tril(torch.ones(BLOCK_SIZE, BLOCK_SIZE)))

    def forward(self, x):
        B, T, C = x.shape
        k = self.key(x)
        q = self.query(x)
        wei = q @ k.transpose(-2, -1) / (C**0.5)
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))
        wei = F.softmax(wei, dim=-1)
        v = self.value(x)
        return wei @ v

class MultiHeadAttention(nn.Module):
    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([SelfAttentionHead(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(num_heads * head_size, EMBED_DIM)

    def forward(self, x):
        return self.proj(torch.cat([h(x) for h in self.heads], dim=-1))

class FeedForward(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(EMBED_DIM, 4 * EMBED_DIM),
            nn.ReLU(),
            nn.Linear(4 * EMBED_DIM, EMBED_DIM),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    def __init__(self):
        super().__init__()
        self.sa = MultiHeadAttention(N_HEADS, EMBED_DIM // N_HEADS)
        self.ffwd = FeedForward()
        self.ln1 = nn.LayerNorm(EMBED_DIM)
        self.ln2 = nn.LayerNorm(EMBED_DIM)

    def forward(self, x):
        x = x + self.sa(self.ln1(x))
        x = x + self.ffwd(self.ln2(x))
        return x

class MiniGPT(nn.Module):
    def __init__(self):
        super().__init__()
        self.token_emb = nn.Embedding(vocab_size, EMBED_DIM)
        self.pos_emb = nn.Embedding(BLOCK_SIZE, EMBED_DIM)
        self.blocks = nn.Sequential(*[Block() for _ in range(N_LAYERS)])
        self.ln_f = nn.LayerNorm(EMBED_DIM)
        self.head = nn.Linear(EMBED_DIM, vocab_size)

    def forward(self, idx):
        B, T = idx.shape
        tok_emb = self.token_emb(idx)
        pos_emb = self.pos_emb(torch.arange(T, device=idx.device))
        x = tok_emb + pos_emb
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.head(x)
        return logits

    def generate(self, idx, max_new_tokens, temperature=1.0, top_k=10):
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -BLOCK_SIZE:]
            logits = self(idx_cond)[:, -1, :] / temperature
            probs = F.softmax(logits, dim=-1)
            topk_probs, topk_idx = torch.topk(probs, top_k)
            probs = torch.zeros_like(probs).scatter_(1, topk_idx, topk_probs)
            probs = probs / probs.sum(dim=-1, keepdim=True)
            next_id = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, next_id), dim=1)
        return idx


In [24]:
# Paso 7: Cargar o entrenar modelo MiniGPT
model = MiniGPT()

if not os.path.exists("minigpt_model.pt"):
    print("Entrenando modelo MiniGPT...")
    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
    data = torch.tensor(encode(texto_entrenamiento), dtype=torch.long)
    def get_batch(data):
        ix = torch.randint(len(data) - BLOCK_SIZE, (4,))
        x = torch.stack([data[i:i+BLOCK_SIZE] for i in ix])
        y = torch.stack([data[i+1:i+BLOCK_SIZE+1] for i in ix])
        return x, y
    for step in range(300):
        x, y = get_batch(data)
        logits = model(x)
        loss = F.cross_entropy(logits.view(-1, vocab_size), y.view(-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step % 50 == 0:
            print(f"Paso {step} - Pérdida: {loss.item():.4f}")
    torch.save(model.state_dict(), "minigpt_model.pt")
    print("Modelo entrenado y guardado como minigpt_model.pt")
else:
    model.load_state_dict(torch.load("minigpt_model.pt"))
    print("Modelo cargado desde minigpt_model.pt")
model = MiniGPT()
model.load_state_dict(torch.load("minigpt_model.pt"))
model.eval()


Modelo cargado desde minigpt_model.pt


MiniGPT(
  (token_emb): Embedding(100, 64)
  (pos_emb): Embedding(8, 64)
  (blocks): Sequential(
    (0): Block(
      (sa): MultiHeadAttention(
        (heads): ModuleList(
          (0-1): 2 x SelfAttentionHead(
            (key): Linear(in_features=64, out_features=32, bias=False)
            (query): Linear(in_features=64, out_features=32, bias=False)
            (value): Linear(in_features=64, out_features=32, bias=False)
          )
        )
        (proj): Linear(in_features=64, out_features=64, bias=True)
      )
      (ffwd): FeedForward(
        (net): Sequential(
          (0): Linear(in_features=64, out_features=256, bias=True)
          (1): ReLU()
          (2): Linear(in_features=256, out_features=64, bias=True)
        )
      )
      (ln1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (ln2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
    )
    (1): Block(
      (sa): MultiHeadAttention(
        (heads): ModuleList(
          (0-1): 2 x SelfAtte

In [25]:
def generar(input_text, tokens, temp, topk):
    context = torch.tensor([encode(input_text)], dtype=torch.long)
    out = model.generate(context, max_new_tokens=tokens, temperature=temp, top_k=topk)[0].tolist()
    token_str = ', '.join(str(t) for t in out)
    texto = decode(out)
    token_pieces = ', '.join(sp.id_to_piece(t) for t in out)
    return f"Tokens: [{token_str}]\nSubpalabras: [{token_pieces}]\n\nTexto generado:\n{texto}"

iface = gr.Interface(
    fn=generar,
    inputs=[
        gr.Textbox(label="Texto inicial"),
        gr.Slider(5, 100, value=20, step=1, label="Cantidad de tokens a generar"),
        gr.Slider(0.1, 2.0, value=1.0, step=0.1, label="Creatividad (Temperature)"),
        gr.Slider(1, 50, value=10, step=1, label="Top-K (limita opciones por paso)")
    ],
    outputs="text",
    title="Mini-GPT Generador Poético"
)

iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e6961eca9fb3b39e03.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


