In [None]:
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor

# ======================
# 1Ô∏è‚É£ Cargar datos hist√≥ricos
# ======================
train_df = pd.read_csv("train.csv", sep=";")

# ======================
# 2Ô∏è‚É£ Eliminar columnas que NO queremos usar
# ======================
cols_to_drop = ["image_embedding", "num_stores", "num_sizes", "weekly_demand"]
train_df = train_df.drop(columns=cols_to_drop, errors="ignore")

# ======================
# 3Ô∏è‚É£ Identificar variables categ√≥ricas
# ======================
categorical_cols = train_df.select_dtypes(include=["object"]).columns.tolist()
categorical_cols = [c for c in categorical_cols if c not in ["ID", "Production"]]

# ======================
# 4Ô∏è‚É£ Separar X e y
# ======================
X_train = train_df.drop(columns=["ID", "Production"])
y_train = train_df["Production"]

# ======================
# 5Ô∏è‚É£ Rellenar NaN en train
# ======================
X_train = X_train.fillna(0)

# ======================
# 6Ô∏è‚É£ Entrenar modelo CatBoost
# ======================
model = CatBoostRegressor(
    iterations=600,
    learning_rate=0.05,
    depth=8,
    loss_function="RMSE",
    verbose=200
)
model.fit(X_train, y_train, cat_features=categorical_cols)

# ======================
# 7Ô∏è‚É£ Cargar CSV de test y limpiar columnas
# ======================
test_df = pd.read_csv("test.csv", sep=";")
# Eliminar columnas sin nombre
test_df = test_df.loc[:, ~test_df.columns.str.contains("^Unnamed")]
# Eliminar columnas que no usamos
test_df = test_df.drop(columns=cols_to_drop, errors="ignore")

# ======================
# 8Ô∏è‚É£ Detectar columnas faltantes y agregarlas con 0
# ======================
for col in X_train.columns:
    if col not in test_df.columns:
        test_df[col] = 0  # Agregamos columna faltante con 0

# ======================
# 9Ô∏è‚É£ Preparar X_test
# ======================
X_test = test_df[X_train.columns]  # Alinear orden y columnas
X_test = X_test.fillna(0)

# ======================
# üîü Predecir producci√≥n
# ======================
preds = model.predict(X_test)

# ======================
# 1Ô∏è‚É£1Ô∏è‚É£ Crear CSV de predicciones
# ======================
output_df = pd.DataFrame({
    "ID": test_df["ID"],
    "Production": preds.astype(int)  # unidades enteras
})

output_df.to_csv("predicciones_finales.csv", index=False)
print("Archivo 'predicciones_finales.csv' generado con √©xito")

0:	learn: 33529.1671874	total: 118ms	remaining: 1m 10s
200:	learn: 9594.7610794	total: 22.7s	remaining: 45.1s
400:	learn: 7707.1968637	total: 46.1s	remaining: 22.9s
599:	learn: 6762.2804778	total: 1m 9s	remaining: 0us
Archivo 'predicciones_finales.csv' generado con √©xito
