In [8]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

In [9]:

# Cargar datos de entrenamiento
df = pd.read_csv("train.csv")
df = df.drop("id", axis=1)  # Eliminar columna id de entrenamiento

# Cargar datos de prueba
test_df = pd.read_csv("test.csv")
test_ids = test_df["id"]  # Guardar los IDs de prueba

# Manejar valores faltantes en categóricas
categorical_cols = ["Brand", "Material", "Size", "Style", "Color"]
for col in categorical_cols:
    # Aplicar a ambos conjuntos de datos
    df[col] = df[col].replace("", "Missing").fillna("Missing")
    test_df[col] = test_df[col].replace("", "Missing").fillna("Missing")

# Convertir variables binarias a 0/1
binary_cols = ["Laptop Compartment", "Waterproof"]
df[binary_cols] = df[binary_cols].replace({"Yes": 1, "No": 0})
test_df[binary_cols] = test_df[binary_cols].replace({"Yes": 1, "No": 0})

In [10]:
# Separar features y target de entrenamiento
X = df.drop("Price", axis=1)
y = df["Price"].values

# Dividir en train y val
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Eliminar columna id de test
test_df_processed = test_df.drop("id", axis=1)

In [11]:
# One-Hot Encoding para categóricas
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
X_train_cat = encoder.fit_transform(X_train[categorical_cols])
X_val_cat = encoder.transform(X_val[categorical_cols])
X_test_cat = encoder.transform(test_df_processed[categorical_cols])

# Normalizar numéricas
numerical_cols = ["Compartments", "Weight Capacity (kg)"]
scaler = StandardScaler()
X_train_num = scaler.fit_transform(X_train[numerical_cols])
X_val_num = scaler.transform(X_val[numerical_cols])
X_test_num = scaler.transform(test_df_processed[numerical_cols])

# Combinar features procesadas
binary_train = X_train[binary_cols].values
binary_val = X_val[binary_cols].values
binary_test = test_df_processed[binary_cols].values

X_train_processed = np.hstack([X_train_cat, X_train_num, binary_train])
X_val_processed = np.hstack([X_val_cat, X_val_num, binary_val])
X_test_processed = np.hstack([X_test_cat, X_test_num, binary_test])


In [13]:
X_train_processed

array([[ 0.        ,  0.        ,  0.        , ...,  1.3296494 ,
         1.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ..., -1.3580505 ,
         1.        ,  1.        ],
       [ 1.        ,  0.        ,  0.        , ..., -0.93768987,
         1.        ,  0.        ],
       ...,
       [ 0.        ,  0.        ,  0.        , ..., -0.46501675,
         0.        ,  1.        ],
       [ 1.        ,  0.        ,  0.        , ...,  0.43560247,
         0.        ,  0.        ],
       [ 1.        ,  0.        ,  0.        , ..., -0.61712947,
         0.        ,  1.        ]])

In [16]:
# Dataset y DataLoader
class BagDataset(Dataset):
    def __init__(self, X, y=None):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32) if y is not None else None

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if self.y is not None:
            return self.X[idx], self.y[idx]
        return self.X[idx]

# Crear datasets
train_dataset = BagDataset(X_train_processed, y_train)
val_dataset = BagDataset(X_val_processed, y_val)
test_dataset = BagDataset(X_test_processed)  # Sin etiquetas para predicción

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [17]:
# Modelo (mismo que antes)
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.layers(x).squeeze()

# Inicializar el modelo
input_size = X_train_processed.shape[1]
model = NeuralNetwork(input_size)

# Pérdida y optimizador
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

In [18]:

# Entrenamiento
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validación
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_val_batch, y_val_batch in val_loader:
            y_val_pred = model(X_val_batch)
            val_loss += criterion(y_val_pred, y_val_batch).item()
    
    print(f"Epoch {epoch+1}, Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")


Epoch 1, Train Loss: nan, Val Loss: nan
Epoch 2, Train Loss: nan, Val Loss: nan


KeyboardInterrupt: 