# 💬 Transformer Chatbot Treinamento
Este notebook treina um mini modelo Transformer para responder perguntas simples com base em um dataset estilo FAQ.

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import json
from model import SimpleTransformer
import random

In [None]:
def top_k_top_p_filtering(logits, top_k=0, top_p=0.0):
    top_k = min(top_k, logits.size(-1))
    if top_k > 0:
        values, _ = torch.topk(logits, top_k)
        min_values = values[:, -1].unsqueeze(-1)
        logits = torch.where(logits < min_values, torch.full_like(logits, float('-inf')), logits)
    if top_p > 0.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
        sorted_indices_to_remove = cumulative_probs > top_p
        sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone()
        sorted_indices_to_remove[:, 0] = 0
        indices_to_remove = sorted_indices[sorted_indices_to_remove]
        logits[0, indices_to_remove] = float('-inf')
    return logits

In [None]:
class QADataset(Dataset):
    def __init__(self, filepath, seq_len=20):
        with open(filepath, "r", encoding="utf-8") as f:
            lines = f.readlines()

        tokens = []
        for line in lines:
            line = line.strip().lower()
            if not line: continue
            tokens.extend(line.split() + ['<eos>'])
        vocab = sorted(set(tokens))
        self.token_to_id = {tok: i for i, tok in enumerate(vocab)}
        self.id_to_token = {i: tok for tok, i in self.token_to_id.items()}
        self.data = [self.token_to_id[tok] for tok in tokens]
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        x = torch.tensor(self.data[idx:idx+self.seq_len])
        y = torch.tensor(self.data[idx+1:idx+self.seq_len+1])
        return x, y

In [None]:
dataset = QADataset("textDataset.txt", seq_len=10)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [None]:
model = SimpleTransformer(
    vocab_size=len(dataset.token_to_id),
    embed_dim=128,
    num_heads=8,
    num_layers=4
)

# Exibe total de parâmetros
total_params = sum(p.numel() for p in model.parameters())
print(f"Modelo criado com {total_params:,} parâmetros.")

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.train()
model.to(device)
for epoch in range(15):
    total_loss = 0
    for x, y in dataloader:
        x, y = x.to(device), y.to(device)
        logits, _ = model(x)
        loss = F.cross_entropy(logits.view(-1, logits.size(-1)), y.view(-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Época {epoch+1}, Loss: {total_loss/len(dataloader):.4f}")

In [None]:
torch.save(model.state_dict(), "modelo_transformer.pt")
with open("vocab_transformer.json", "w", encoding="utf-8") as f:
    json.dump(dataset.token_to_id, f)
print("Modelo e vocabulário salvos com sucesso.")

In [None]:
model.eval()
prompt = "pergunta: qual a capital do brasil? resposta:"
tokens = prompt.lower().split()
input_ids = torch.tensor([[dataset.token_to_id[t] for t in tokens]]).to(device)

generated = input_ids
temperature = 1.0
top_k = 5
top_p = 0.9
max_tokens = 10

for _ in range(max_tokens):
    logits, _ = model(generated)
    logits = logits[:, -1, :] / temperature
    filtered_logits = top_k_top_p_filtering(logits.clone(), top_k=top_k, top_p=top_p)
    probs = F.softmax(filtered_logits, dim=-1)
    next_token = torch.multinomial(probs, num_samples=1)
    generated = torch.cat([generated, next_token], dim=1)

texto_final = " ".join([dataset.id_to_token[i] for i in generated[0].tolist()])
print("\nTexto gerado:")
print(texto_final)