In [None]:
import numpy as np
import pandas as pd
import gc
import os
import matplotlib.pyplot as plt
import polars as pl
from sklearn.metrics import mean_squared_error, mean_absolute_error
from joblib import Parallel, delayed
from more_itertools import chunked
from functools import reduce
from typing import List
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import pandas as pd
import joblib
import os
import torch.nn as nn

In [None]:
df_full = pd.read_parquet('./data/train_val_NN_TORCH.parquet', engine='fastparquet')

In [None]:
target_col = 'CLASE_DELTA_LOG1P_Z'

# Columnas categóricas a embeddings
cat_cols = ['ID_CAT1', 'ID_CAT2', 'ID_CAT3', 'ID_BRAND', 'SKU_SIZE',
            'CUSTOMER_RANK_BIN', 'PRODUCT_RANK_BIN']

# Codificación para embeddings
for col in cat_cols:
    df_full[col] = df_full[col].astype('category').cat.codes

# Excluir columnas que no deben ir al modelo
excluir = ['PERIODO', 'CUSTOMER_ID', 'PRODUCT_ID', 'CLASE_DELTA_LOG1P_Z', 'ORDINAL']

# Features numéricas normalizadas
feature_cols = [col for col in df_full.columns if col.endswith('_Z') and col not in excluir]


In [None]:
# No los incluyas en ninguna de estas dos listas
assert 'CUSTOMER_ID' not in feature_cols
assert 'CUSTOMER_ID' not in cat_cols
assert 'PRODUCT_ID' not in feature_cols
assert 'PRODUCT_ID' not in cat_cols
assert 'PERIODO' not in feature_cols
assert 'PERIODO' not in cat_cols
assert 'CLASE_DELTA_LOG1P_Z' not in feature_cols
assert 'CLASE_DELTA_LOG1P_Z' not in cat_cols
assert 'ORDINAL' not in feature_cols
assert 'ORDINAL' not in cat_cols


In [None]:
# Separar conjuntos
df_train = df_full[df_full['PERIODO'] <= 201908].copy()
df_val = df_full[(df_full['PERIODO'] >= 201909) & (df_full['PERIODO'] <= 201910)].copy()
df_pred = df_full[df_full['PERIODO'] == 201912].copy()

In [None]:
from torch.utils.data import Dataset
import torch

class TabularDataset(Dataset):
    def __init__(self, df, cat_cols, num_cols, target_col=None):
        self.cat_data = torch.tensor(df[cat_cols].values, dtype=torch.long)
        self.num_data = torch.tensor(df[num_cols].values, dtype=torch.float32)
        self.has_target = target_col is not None
        if self.has_target:
            self.y = torch.tensor(df[target_col].values, dtype=torch.float32).unsqueeze(1)
        else:
            self.y = None

    def __len__(self):
        return len(self.cat_data)

    def __getitem__(self, idx):
        if self.has_target:
            return self.cat_data[idx], self.num_data[idx], self.y[idx]
        else:
            return self.cat_data[idx], self.num_data[idx]


In [None]:
""" import torch.nn as nn

class WeightedMAELoss(nn.Module):
    def __init__(self, epsilon=1e-3):
        super(WeightedMAELoss, self).__init__()
        self.epsilon = epsilon

    def forward(self, y_pred, y_true):
        weights = 1.0 + torch.abs(y_true)
        loss = weights * torch.abs(y_pred - y_true)
        return loss.mean() """


In [None]:
""" class NonlinearWeightedMSELoss(nn.Module):
    def __init__(self, alpha=0.5):
        super().__init__()
        self.alpha = alpha

    def forward(self, pred, target):
        error = pred - target
        weights = 1.0 + self.alpha * torch.abs(target)
        return torch.mean(weights * error ** 2)
 """

In [None]:
""" import torch.nn.functional as F

class CustomWeightedLoss(nn.Module):
    def __init__(self, tn_index: int, alpha: float = 0.5):
        super().__init__()
        self.tn_index = tn_index
        self.alpha = alpha

    def forward(self, preds, targets, x_num):
        tn_values = x_num[:, self.tn_index]
        weights = 1 + self.alpha * tn_values.abs()
        loss = (weights * (preds - targets).pow(2)).mean()
        return loss """

In [None]:
class WeightedMSELossMulti(nn.Module):
    def __init__(self, penalty_indices, alpha=0.5):
        super().__init__()
        self.penalty_indices = penalty_indices
        self.alpha = alpha

    def forward(self, preds, targets, x_num):
        penalty = 1 + self.alpha * sum(x_num[:, i].abs() for i in self.penalty_indices)
        error = (preds.squeeze() - targets.squeeze()) ** 2
        return (penalty * error).mean()


In [None]:
penalty_cols = ['TN_Z', 'TN_LAG_01_Z', 'TN_LAG_02_Z', 'TN_LAG_03_Z', 'TN_LAG_04_Z','TN_LAG_05_Z','TN_LAG_06_Z','TN_LAG_07_Z',
'TN_LAG_08_Z','TN_LAG_09_Z','TN_LAG_10_Z','TN_LAG_11_Z','TN_LAG_12_Z']
penalty_indices = [feature_cols.index(col) for col in penalty_cols]
print(penalty_cols)
print(penalty_indices)
loss_fn = WeightedMSELossMulti(penalty_indices=penalty_cols, alpha=0.5)

In [None]:
assert all(col in df_train.columns for col in cat_cols), "Faltan columnas categóricas"
assert all(col in df_train.columns for col in feature_cols), "Faltan columnas numéricas"
assert target_col in df_train.columns, "Falta la variable objetivo"


In [None]:
batch_size = 1024

train_dataset = TabularDataset(df_train, cat_cols, feature_cols, target_col)
val_dataset = TabularDataset(df_val, cat_cols, feature_cols, target_col)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [None]:
import torch.nn.functional as F

class TabularNNImproved(nn.Module):
    def __init__(self, embedding_sizes, num_numerical, hidden_sizes=[512, 512, 256, 128], dropout=0.1):
        super().__init__()
        
        # Embedding layers
        self.embeddings = nn.ModuleList([
            nn.Embedding(ni, nf) for ni, nf in embedding_sizes
        ])
        embedding_dim = sum([nf for _, nf in embedding_sizes])
        self.embedding_dropout = nn.Dropout(dropout)

        # Total input size after embedding + numerical
        input_size = embedding_dim + num_numerical

        # Hidden layers
        layers = []
        for h in hidden_sizes:
            layers.append(nn.Linear(input_size, h))
            layers.append(nn.BatchNorm1d(h))
            layers.append(nn.GELU())
            layers.append(nn.Dropout(dropout))
            input_size = h

        # Output layer
        layers.append(nn.Linear(input_size, 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x_cat, x_num):
        x = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x = torch.cat(x, dim=1)
        x = self.embedding_dropout(x)
        x = torch.cat([x, x_num], dim=1)
        return self.model(x)


In [None]:
import torch

# Detectar si hay GPU disponible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Definir tamaños de embedding
embedding_sizes = [
    (df_full[col].nunique() + 1, min(50, (df_full[col].nunique() + 1) // 2))
    for col in cat_cols
]

# Crear el modelo
model = TabularNNImproved(
    embedding_sizes=embedding_sizes,
    num_numerical=len(feature_cols),
    hidden_sizes=[4096,2048,1024,512, 512, 256, 128],
    dropout=0.3
).to(device)
model.to(device)
print(model)


In [None]:
from torchinfo import summary
summary(model, input_data=[torch.zeros(1, len(cat_cols), dtype=torch.long).to(device),
                           torch.zeros(1, len(feature_cols)).to(device)])


In [None]:
from sklearn.metrics import mean_absolute_error, r2_score
import torch
import numpy as np

def train_model(model, train_loader, val_loader, n_epochs=20, lr=1e-3, alpha=0.5, patience=3, penalty_indices=None):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    #criterion = CustomWeightedLoss(tn_index=7, alpha=0.5)
    criterion = WeightedMSELossMulti(penalty_indices=penalty_indices,alpha=0.5)
    #criterion = NonlinearWeightedMSELoss(alpha=0.5)  # podés ajustar alpha  #WeightedMSELoss(alpha=alpha)

    best_val_loss = float('inf')
    best_model_state = None
    epochs_without_improvement = 0

    for epoch in range(n_epochs):
        # Entrenamiento
        model.train()
        train_loss = 0.0
        for cats, conts, y in train_loader:
            cats, conts, y = cats.to(device), conts.to(device), y.to(device)
            optimizer.zero_grad()
            y_pred = model(cats, conts)
            loss = criterion(y_pred, y, conts)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * y.size(0)

        train_loss /= len(train_loader.dataset)

        # Validación
        model.eval()
        val_loss = 0.0
        y_true_list = []
        y_pred_list = []

        with torch.no_grad():
            for cats, conts, y in val_loader:
                cats, conts, y = cats.to(device), conts.to(device), y.to(device)
                y_pred = model(cats, conts)
                loss = criterion(y_pred, y, conts)
                val_loss += loss.item() * y.size(0)

                y_true_list.append(y.cpu().numpy())
                y_pred_list.append(y_pred.cpu().numpy())

        val_loss /= len(val_loader.dataset)
        y_true = np.concatenate(y_true_list)
        y_pred = np.concatenate(y_pred_list)

        mae = mean_absolute_error(y_true, y_pred)
        r2 = r2_score(y_true, y_pred)

        print(f"Epoch {epoch+1}/{n_epochs} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | MAE: {mae:.4f} | R²: {r2:.4f}")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print("🔴 Early stopping triggered")
                break

    # Restaurar el mejor modelo
    if best_model_state:
        model.load_state_dict(best_model_state)

    # Retornar valores verdaderos y predichos del último paso
    return y_true, y_pred


# Búsqueda de hiperparámetros (Grid Search)
Probamos distintas combinaciones de hiperparámetros y seleccionamos la que da mejor MAE en validación.


In [None]:
""" from itertools import product

# Definir el espacio de búsqueda
param_grid = {
    'lr': [1e-3, 5e-4],
    'dropout': [0.2, 0.3],
    'hidden_sizes': [
        [1024, 512, 256],
        [2048, 1024, 512, 256]
    ],
    'alpha': [0.3, 0.5, 0.7]
}

# Generar todas las combinaciones
param_combinations = list(product(
    param_grid['lr'],
    param_grid['dropout'],
    param_grid['hidden_sizes'],
    param_grid['alpha']
))

results = []
for lr, dropout, hidden_sizes, alpha in param_combinations:
    # Crear modelo
    model = TabularNNImproved(
        embedding_sizes=embedding_sizes,
        num_numerical=len(feature_cols),
        hidden_sizes=hidden_sizes,
        dropout=dropout
    ).to(device)
    # Entrenar modelo (menos épocas para grid search)
    y_true_gs, y_pred_gs = train_model(
        model, train_loader, val_loader,
        n_epochs=8, lr=lr, alpha=alpha, patience=2
    )
    mae = mean_absolute_error(y_true_gs, y_pred_gs)
    results.append({
        'lr': lr,
        'dropout': dropout,
        'hidden_sizes': hidden_sizes,
        'alpha': alpha,
        'mae': mae
    })
    print(f"Params: lr={lr}, dropout={dropout}, hidden_sizes={hidden_sizes}, alpha={alpha} -> MAE={mae:.4f}")
 """

In [None]:
import sys
print(sys.executable)

In [None]:
import torch
print(torch.cuda.is_available())
print(torch.version.cuda)
print(torch.backends.cudnn.enabled)



In [None]:
from itertools import product
import torch
import gc
import pandas as pd
from sklearn.metrics import mean_absolute_error

# Definir el espacio de búsqueda
param_grid = {
    'lr': [1e-3, 5e-4],
    'dropout': [0.2, 0.3],
    'hidden_sizes': [
        [1024, 512, 256],
        [2048, 1024, 512, 256]
    ],
    'alpha': [0.3, 0.5, 0.7]
}

# Generar todas las combinaciones posibles
param_combinations = list(product(
    param_grid['lr'],
    param_grid['dropout'],
    param_grid['hidden_sizes'],
    param_grid['alpha']
))

results = []
best_mae = float('inf')

# Loop de entrenamiento por combinación
for lr, dropout, hidden_sizes, alpha in param_combinations:
    print(f"\n🔧 Entrenando con: lr={lr}, dropout={dropout}, hidden_sizes={hidden_sizes}, alpha={alpha}")

    # Crear modelo y mover a dispositivo
    model = TabularNNImproved(
        embedding_sizes=embedding_sizes,
        num_numerical=len(feature_cols),
        hidden_sizes=hidden_sizes,
        dropout=dropout
    ).to(device)

    # Entrenamiento corto para tuning
    y_true_gs, y_pred_gs = train_model(
        model, train_loader, val_loader,
        n_epochs=8, lr=lr, alpha=alpha, patience=2,penalty_indices=penalty_indices
    )

    mae = mean_absolute_error(y_true_gs, y_pred_gs)

    # Guardar resultados
    results.append({
        'lr': lr,
        'dropout': dropout,
        'hidden_sizes': hidden_sizes,
        'alpha': alpha,
        'mae': mae
    })

    print(f"✅ MAE = {mae:.4f}")

    # Guardar modelo si es el mejor
    if mae < best_mae:
        best_mae = mae
        torch.save(model.state_dict(), f"best_model_mae{mae:.4f}_lr{lr}_do{dropout}_a{alpha}.pth")
        print("💾 Modelo guardado (mejor hasta ahora)")

    # Limpiar memoria GPU
    del model
    torch.cuda.empty_cache()
    gc.collect()

# Convertir a DataFrame y mostrar top 5
results_df = pd.DataFrame(results).sort_values(by='mae')
print("\n📊 Mejores combinaciones:")
print(results_df.head())

# Guardar resultados a disco
results_df.to_csv("gridsearch_results.csv", index=False)


In [None]:

# Seleccionar las 3 mejores combinación de cara a un ensemble
results.sort(key=lambda x: x['mae'])
print("\nResultados ordenados por MAE:")
for res in results:
    print(f"Params: lr={res['lr']}, dropout={res['dropout']}, hidden_sizes={res['hidden_sizes']}, alpha={res['alpha']} -> MAE={res['mae']:.4f}")
#best_params = min(results, key=lambda x: x['mae'])
#print("Mejores hiperparámetros encontrados:", best_params)


In [None]:
import torch
from torch.utils.data import DataLoader

# 🔧 Tus parámetros finales para los 3 mejores modelos
model_configs = [
    {
        "name": "modelo_m1",
        "hidden_sizes": [1024, 512, 256],
        "dropout": 0.2,
        "lr": 0.001,
        "alpha": 0.7
    },
    {
        "name": "modelo_m2",
        "hidden_sizes": [2048, 1024, 512, 256],
        "dropout": 0.2,
        "lr": 0.0005,
        "alpha": 0.3
    },
    {
        "name": "modelo_m3",
        "hidden_sizes": [1024, 512, 256],
        "dropout": 0.2,
        "lr": 0.0005,
        "alpha": 0.3
    },
]


In [None]:
from torch.utils.data import ConcatDataset


# 📦 Dataset completo ya procesado
# Usamos el mismo dataset de entrenamiento ya creado
# Concatenar train_dataset y val_dataset
train_val_dataset = ConcatDataset([train_dataset, val_dataset])


In [None]:
train_loader_full = DataLoader(train_val_dataset, batch_size=1024, shuffle=True)


In [None]:

# 🧠 Función de pérdida personalizada
class CustomLoss(torch.nn.Module):
    def __init__(self, alpha=0.3):
        super().__init__()
        self.alpha = alpha
        self.mae = torch.nn.L1Loss()

    def forward(self, preds, targets):
        base_loss = self.mae(preds, targets)
        penalty = torch.mean(torch.abs(targets))  # o cualquier criterio adicional
        return (1 - self.alpha) * base_loss + self.alpha * penalty

# 🚂 Función de entrenamiento final sin validación
def train_final_model(model, train_loader, n_epochs=20, lr=0.001, alpha=0.3):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = CustomLoss(alpha=alpha)
    model.train()

    for epoch in range(n_epochs):
        running_loss = 0.0
        for X_cat, X_num, y_batch in train_loader:
            X_cat, X_num, y_batch = X_cat.to(device), X_num.to(device), y_batch.to(device)
            optimizer.zero_grad()
            preds = model(X_cat, X_num)
            #loss = loss_fn(preds.squeeze(), y_batch)
            loss = loss_fn(preds.squeeze(), y_batch.squeeze())
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"[{model.__class__.__name__}] Epoch {epoch+1}/{n_epochs} | Train Loss: {running_loss / len(train_loader):.4f}")

    return model

# ⚙️ Entrenamiento de los 3 modelos
for cfg in model_configs:
    print(f"\n🔵 Entrenando {cfg['name']}...")

    model = TabularNNImproved(
        embedding_sizes=embedding_sizes,
        num_numerical=len(feature_cols),
        hidden_sizes=cfg["hidden_sizes"],
        dropout=cfg["dropout"]
    ).to(device)

    model = train_final_model(
        model,
        train_loader=train_loader_full,
        n_epochs=20,
        lr=cfg["lr"],
        alpha=cfg["alpha"]
    )

    torch.save(model.state_dict(), f"{cfg['name']}.pth")
    print(f"✅ Modelo {cfg['name']} guardado.\n")


In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

# 📌 Filtrar periodo 201912
df_pred = df_full[df_full["PERIODO"] == 201912].copy()
print(df_pred.shape)


In [None]:

# 🚧 Preparar inputs
X_cat_pred = torch.tensor(df_pred[cat_cols].values, dtype=torch.long)
X_num_pred = torch.tensor(df_pred[numerical_cols].values, dtype=torch.float)

# 📦 Dataset y DataLoader sin target
pred_dataset = TensorDataset(X_cat_pred, X_num_pred)
pred_loader = DataLoader(pred_dataset, batch_size=1024, shuffle=False)

# 📁 Modelos a cargar
model_paths = [
    ("modelo_m1", [1024, 512, 256]),
    ("modelo_m2", [2048, 1024, 512, 256]),
    ("modelo_m3", [1024, 512, 256]),
]

# 🧠 Clase del modelo: asegurate de tener TabularNNImproved definido

# 📤 Función para predecir
def predict_model(path, hidden_sizes):
    model = TabularNNImproved(
        embedding_sizes=embedding_sizes,
        num_numerical=len(numerical_cols),
        hidden_sizes=hidden_sizes,
        dropout=0.2
    ).to(device)

    model.load_state_dict(torch.load(f"{path}.pth"))
    model.eval()

    preds = []
    with torch.no_grad():
        for X_cat_batch, X_num_batch in pred_loader:
            X_cat_batch = X_cat_batch.to(device)
            X_num_batch = X_num_batch.to(device)
            outputs = model(X_cat_batch, X_num_batch).squeeze().cpu().numpy()
            preds.extend(outputs)
    return np.array(preds)

# 🔁 Predecir con los 3 modelos
preds_dict = {}
for name, h_sizes in model_paths:
    print(f"📡 Prediciendo con {name}...")
    preds_dict[name] = predict_model(name, h_sizes)

# 🔀 Ensemble (promedio)
ensemble_pred = np.mean(np.stack(list(preds_dict.values()), axis=0), axis=0)

# ✅ Guardar predicciones
df_pred["PRED_LOG1P_Z"] = ensemble_pred

# (Opcional) si querés ver distribución
import matplotlib.pyplot as plt
plt.hist(ensemble_pred, bins=100)
plt.title("Distribución de predicciones (log1p z-score)")
plt.show()
