In [1]:
%pip install torch torchvision





In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np


In [3]:
# Configuración inicial
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)


In [4]:
# Datos simulados
class TextDataset(Dataset):
    def __init__(self, texts, labels, vocab_size=1000, seq_length=10):
        self.texts = texts
        self.labels = labels
        self.vocab_size = vocab_size
        self.seq_length = seq_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        # Relleno o truncado de la secuencia
        text = text[:self.seq_length] + [0] * max(0, self.seq_length - len(text))
        return torch.tensor(text, dtype=torch.long), torch.tensor(label, dtype=torch.long)


In [5]:
# Generar datos aleatorios
def generate_data(num_samples=1000, vocab_size=1000, seq_length=10):
    texts = [np.random.randint(1, vocab_size, size=np.random.randint(5, seq_length + 1)).tolist() for _ in range(num_samples)]
    labels = np.random.randint(0, 2, size=num_samples).tolist()  # Etiquetas binarias
    return texts, labels


In [6]:
# Datos de entrenamiento y validación
train_texts, train_labels = generate_data(800)
val_texts, val_labels = generate_data(200)

train_dataset = TextDataset(train_texts, train_labels)
val_dataset = TextDataset(val_texts, val_labels)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [7]:
# Modelo Transformer
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, emb_dim, n_heads, num_layers, ff_dim, num_classes, max_seq_len):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        self.positional_encoding = nn.Parameter(torch.zeros(1, max_seq_len, emb_dim))

        encoder_layer = nn.TransformerEncoderLayer(d_model=emb_dim, nhead=n_heads, dim_feedforward=ff_dim)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.fc = nn.Linear(emb_dim, num_classes)

    def forward(self, x):
        embedded = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
        encoded = self.encoder(embedded)
        output = self.fc(encoded.mean(dim=1))  # Promedio sobre la secuencia
        return output


In [9]:
# Instanciar el modelo
vocab_size = 1000
emb_dim = 128
n_heads = 4
num_layers = 2
ff_dim = 256
num_classes = 2
max_seq_len = 10
model = TransformerModel(vocab_size, emb_dim, n_heads, num_layers, ff_dim, num_classes, max_seq_len).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)




In [10]:
# Entrenamiento
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=5):
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for texts, labels in train_loader:
            texts, labels = texts.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(texts)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validación
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for texts, labels in val_loader:
                texts, labels = texts.to(device), labels.to(device)
                outputs = model(texts)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_loader):.4f}, "
              f"Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {correct/total:.4f}")


In [11]:
# Entrenar el modelo
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10)


Epoch 1/10, Train Loss: 0.7222, Val Loss: 0.7006, Val Accuracy: 0.4700
Epoch 2/10, Train Loss: 0.6863, Val Loss: 0.6912, Val Accuracy: 0.5400
Epoch 3/10, Train Loss: 0.6448, Val Loss: 0.6904, Val Accuracy: 0.5850
Epoch 4/10, Train Loss: 0.5797, Val Loss: 0.7794, Val Accuracy: 0.4900
Epoch 5/10, Train Loss: 0.4920, Val Loss: 0.9459, Val Accuracy: 0.4750
Epoch 6/10, Train Loss: 0.3834, Val Loss: 1.0476, Val Accuracy: 0.5350
Epoch 7/10, Train Loss: 0.3156, Val Loss: 1.1766, Val Accuracy: 0.5050
Epoch 8/10, Train Loss: 0.2701, Val Loss: 1.4347, Val Accuracy: 0.5200
Epoch 9/10, Train Loss: 0.2143, Val Loss: 1.4707, Val Accuracy: 0.5050
Epoch 10/10, Train Loss: 0.1910, Val Loss: 1.5714, Val Accuracy: 0.5100
