# Construcción del dataset

In [1]:
import pandas as pd
from mlforecast import MLForecast
from lightgbm import LGBMRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
import numpy as np

from window_ops.rolling import rolling_mean
import optuna
from sklearn.model_selection import TimeSeriesSplit
from utilsforecast.feature_engineering import fourier
from sklearn.preprocessing import StandardScaler

import joblib

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

df = pd.read_csv('../data/sell-in.txt', sep='\t', encoding='utf-8')
df.head()

Unnamed: 0,periodo,customer_id,product_id,plan_precios_cuidados,cust_request_qty,cust_request_tn,tn
0,201701,10234,20524,0,2,0.053,0.053
1,201701,10032,20524,0,1,0.13628,0.13628
2,201701,10217,20524,0,1,0.03028,0.03028
3,201701,10125,20524,0,1,0.02271,0.02271
4,201701,10012,20524,0,11,1.54452,1.54452


In [3]:
df_productos_predecir = pd.read_csv('../data/product_id_apredecir201912.txt', sep='\t', encoding='utf-8')
df_productos_predecir.head()

Unnamed: 0,product_id
0,20001
1,20002
2,20003
3,20004
4,20005


In [4]:
df['periodo'].sort_values().unique()

array([201701, 201702, 201703, 201704, 201705, 201706, 201707, 201708,
       201709, 201710, 201711, 201712, 201801, 201802, 201803, 201804,
       201805, 201806, 201807, 201808, 201809, 201810, 201811, 201812,
       201901, 201902, 201903, 201904, 201905, 201906, 201907, 201908,
       201909, 201910, 201911, 201912])

In [5]:
df_pivot = df.pivot_table(
    index=['product_id', 'customer_id'],
    columns='periodo',
    values='tn',
    aggfunc='sum',
    fill_value=None
)
df_pivot = df_pivot.reset_index()
df_pivot.columns.name = None
df_pivot.head()

Unnamed: 0,product_id,customer_id,201701,201702,201703,201704,201705,201706,201707,201708,...,201903,201904,201905,201906,201907,201908,201909,201910,201911,201912
0,20001,10001,99.43861,198.84365,92.46537,13.29728,101.00563,128.04792,101.20711,43.3393,...,130.54927,364.37071,439.90647,65.92436,144.78714,33.63991,109.05244,176.0298,236.65556,180.21938
1,20001,10002,35.72806,6.79415,29.94128,22.81133,31.22847,47.57025,21.84874,17.08052,...,31.97079,55.41679,30.87299,144.07021,37.14616,,72.08551,17.40806,45.61495,113.33165
2,20001,10003,143.49426,20.48319,137.87537,68.89292,135.1219,171.01785,64.66196,83.6341,...,170.89924,230.00152,1.84835,,138.23391,162.07198,233.20532,76.00625,86.14415,102.27517
3,20001,10004,184.72927,104.03894,295.43924,247.65632,188.37819,195.02683,379.4427,237.16848,...,102.64484,91.67799,389.02653,66.71971,228.62366,96.11402,288.34205,324.96172,195.67828,34.6481
4,20001,10005,19.08407,5.17117,5.17117,0.86186,37.95546,19.08407,43.35049,67.53856,...,6.90049,22.18016,15.89578,,8.25595,,12.804,17.13921,12.22149,19.60368


In [6]:
# Remove from df_pivot the products that are not in df_productos_predecir
df_pivot = df_pivot[df_pivot['product_id'].isin(df_productos_predecir['product_id'])]

In [7]:
# df_mlforecast = df_pivot[df_pivot['customer_id'] == 10001].copy()
df_mlforecast = df_pivot.copy()

In [8]:
df_mlforecast.head()

Unnamed: 0,product_id,customer_id,201701,201702,201703,201704,201705,201706,201707,201708,...,201903,201904,201905,201906,201907,201908,201909,201910,201911,201912
0,20001,10001,99.43861,198.84365,92.46537,13.29728,101.00563,128.04792,101.20711,43.3393,...,130.54927,364.37071,439.90647,65.92436,144.78714,33.63991,109.05244,176.0298,236.65556,180.21938
1,20001,10002,35.72806,6.79415,29.94128,22.81133,31.22847,47.57025,21.84874,17.08052,...,31.97079,55.41679,30.87299,144.07021,37.14616,,72.08551,17.40806,45.61495,113.33165
2,20001,10003,143.49426,20.48319,137.87537,68.89292,135.1219,171.01785,64.66196,83.6341,...,170.89924,230.00152,1.84835,,138.23391,162.07198,233.20532,76.00625,86.14415,102.27517
3,20001,10004,184.72927,104.03894,295.43924,247.65632,188.37819,195.02683,379.4427,237.16848,...,102.64484,91.67799,389.02653,66.71971,228.62366,96.11402,288.34205,324.96172,195.67828,34.6481
4,20001,10005,19.08407,5.17117,5.17117,0.86186,37.95546,19.08407,43.35049,67.53856,...,6.90049,22.18016,15.89578,,8.25595,,12.804,17.13921,12.22149,19.60368


In [9]:
df_mlforecast.shape

(262805, 38)

In [10]:
# Filtrar todos los DataFrames para conservar solo los registros con customer_id 10001
# df_long = df_long[df_long['customer_id'] == 10001]

In [11]:
# --- PASO 1: TRANSFORMACIÓN DE DATOS A FORMATO LARGO ---
# Este es el formato conveniente que usaremos en ambos casos.
print("\n--- 1. Transformando datos a formato largo ---")
df_long = df_mlforecast.melt(
    id_vars=['product_id', 'customer_id'],
    var_name='periodo',
    value_name='y' # MLForecast usa 'y' como nombre de la variable objetivo
)

df_long.head()


--- 1. Transformando datos a formato largo ---


Unnamed: 0,product_id,customer_id,periodo,y
0,20001,10001,201701,99.43861
1,20001,10002,201701,35.72806
2,20001,10003,201701,143.49426
3,20001,10004,201701,184.72927
4,20001,10005,201701,19.08407


In [12]:
df_long.head()

Unnamed: 0,product_id,customer_id,periodo,y
0,20001,10001,201701,99.43861
1,20001,10002,201701,35.72806
2,20001,10003,201701,143.49426
3,20001,10004,201701,184.72927
4,20001,10005,201701,19.08407


In [13]:
# import joblib

# def objective(trial, df_entrenamiento_cust):
#     n_estimators = trial.suggest_int('n_estimators', 50, 300)
#     max_depth = trial.suggest_int('max_depth', 3, 12)
#     learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3)
#     num_leaves = trial.suggest_int('num_leaves', 10, 100)

#     model = LGBMRegressor(
#         random_state=42,
#         n_estimators=n_estimators,
#         max_depth=max_depth,
#         learning_rate=learning_rate,
#         num_leaves=num_leaves
#     )

#     fcst = MLForecast(
#         models=model,
#         freq='MS',
#         lags=[1, 2, 3, 6, 12],
#         date_features=['month', 'year']
#     )
    
#     fcst.fit(df_entrenamiento_cust, static_features=[])
#     pred = fcst.predict(h=2)
#     y_true = df_entrenamiento_cust['y'][-2:]
#     y_pred = pred['LGBMRegressor'][:2]
#     return mean_squared_error(y_true, y_pred)

# FECHA_CORTE = '2019-10-01'
# horizonte_prediccion = 2
# customer_ids = df_mlforecast['customer_id'].unique()
# df_pred_final = pd.DataFrame()


# for cid in customer_ids:
#     df_pivot_cust = df_pivot[df_pivot['customer_id'] == cid]
#     if df_pivot_cust.empty:
#         continue

#     df_long_cust = df_pivot_cust.melt(
#         id_vars=['product_id', 'customer_id'],
#         var_name='periodo',
#         value_name='y'
#     )
#     df_long_cust = df_long_cust.fillna(0)
#     df_long_cust['unique_id'] = df_long_cust['product_id'].astype(str) + "_" + df_long_cust['customer_id'].astype(str)
#     df_long_cust['ds'] = pd.to_datetime(df_long_cust['periodo'], format='%Y%m')
#     df_final_cust = df_long_cust[['unique_id', 'ds', 'y']].sort_values(by=['unique_id', 'ds']).reset_index(drop=True)
#     df_final_cust = df_final_cust.loc[:, ~df_final_cust.columns.duplicated()]
#     df_entrenamiento_cust = df_final_cust[df_final_cust['ds'] <= FECHA_CORTE].copy()

#     # Optuna optimization
#     study = optuna.create_study(direction='minimize')
#     study.optimize(lambda trial: objective(trial, df_entrenamiento_cust), n_trials=20, show_progress_bar=False)
#     best_params = study.best_params

#     best_model = LGBMRegressor(
#         random_state=42,
#         n_estimators=best_params['n_estimators'],
#         max_depth=best_params['max_depth'],
#         learning_rate=best_params['learning_rate'],
#         num_leaves=best_params['num_leaves']
#     )

#     fcst_cust = MLForecast(
#         models=best_model,
#         freq='MS',
#         lags=[1, 2, 3, 6, 12],
#         date_features=['month', 'year']
#     )

#     fcst_cust.fit(df_entrenamiento_cust, static_features=[])

#     # Guardar el modelo
#     joblib.dump(fcst_cust, f'modelo_fcst_cliente_{cid}.pkl')

#     pred_cust = fcst_cust.predict(h=horizonte_prediccion)
#     pred_cust['customer_id'] = cid
#     pred_cust['y'] = pred_cust['LGBMRegressor']

#     pred_cust_201912 = pred_cust[pred_cust['ds'] == '2019-12-01'].copy()
#     pred_cust_201912['product_id'] = pred_cust_201912['unique_id'].str.split('_').str[0].astype(int)
#     pred_cust_201912.rename(columns={'y': 'tn'}, inplace=True)

#     df_pred_final = pd.concat([df_pred_final, pred_cust_201912[['product_id', 'tn']]], ignore_index=True)

# df_pred_sum = df_pred_final.groupby('product_id', as_index=False)['tn'].sum()
# print(df_pred_sum)


In [15]:
import joblib

df_pred_final = pd.DataFrame()
pred_cust_202002 = pd.DataFrame()
FECHA_CORTE = '2019-12-01'
horizonte_prediccion = 2
customer_ids = df_long['customer_id'].unique()

for cid in customer_ids:
    # Cargar el modelo guardado
    fcst_cust = joblib.load(f'modelo_fcst_cliente_{cid}.pkl')
    
    # Filtrar los datos para el cliente actual
    df_long_cust = df_long[df_long['customer_id'] == cid].copy()
    if df_long_cust.empty:
        continue

    df_long_cust = df_long_cust.fillna(0)
    df_long_cust['unique_id'] = df_long_cust['product_id'].astype(str) + "_" + df_long_cust['customer_id'].astype(str)
    df_long_cust['ds'] = pd.to_datetime(df_long_cust['periodo'].astype(str), format='%Y%m')
    df_final_cust = df_long_cust[['unique_id', 'ds', 'y']].sort_values(by=['unique_id', 'ds']).reset_index(drop=True)
    df_final_cust = df_final_cust.loc[:, ~df_final_cust.columns.duplicated()]
    df_entrenamiento_cust = df_final_cust[df_final_cust['ds'] <= FECHA_CORTE].copy()

    # Generar predicciones usando el modelo cargado
    pred_cust = fcst_cust.predict(h=horizonte_prediccion)
    pred_cust['customer_id'] = cid
    pred_cust['y'] = pred_cust['LGBMRegressor']

    pred_cust_202002 = pred_cust[pred_cust['ds'] == '2020-02-01'].copy()
    pred_cust_202002['product_id'] = pred_cust_202002['unique_id'].str.split('_').str[0].astype(int)
    pred_cust_202002.rename(columns={'y': 'tn'}, inplace=True)

    df_pred_final = pd.concat([df_pred_final, pred_cust_202002[['product_id', 'tn']]], ignore_index=True)

df_pred_sum_202002 = df_pred_final.groupby('product_id', as_index=False)['tn'].sum()
print(df_pred_sum_202002)


  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)


Empty DataFrame
Columns: [product_id, tn]
Index: []


  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)


In [None]:
import joblib

df_pred_final = pd.DataFrame()
FECHA_CORTE = '2019-10-01'
horizonte_prediccion = 2
customer_ids = df_mlforecast['customer_id'].unique()

for cid in customer_ids:
    # Cargar el modelo guardado
    fcst_cust = joblib.load(f'modelo_fcst_cliente_{cid}.pkl')
    
    # Preparar los datos de entrada para la predicción
    df_pivot_cust = df_pivot[df_pivot['customer_id'] == cid]
    if df_pivot_cust.empty:
        continue

    df_long_cust = df_pivot_cust.melt(
        id_vars=['product_id', 'customer_id'],
        var_name='periodo',
        value_name='y'
    )
    df_long_cust = df_long_cust.fillna(0)
    df_long_cust['unique_id'] = df_long_cust['product_id'].astype(str) + "_" + df_long_cust['customer_id'].astype(str)
    df_long_cust['ds'] = pd.to_datetime(df_long_cust['periodo'], format='%Y%m')
    df_final_cust = df_long_cust[['unique_id', 'ds', 'y']].sort_values(by=['unique_id', 'ds']).reset_index(drop=True)
    df_final_cust = df_final_cust.loc[:, ~df_final_cust.columns.duplicated()]
    df_entrenamiento_cust = df_final_cust[df_final_cust['ds'] <= FECHA_CORTE].copy()

    # Generar predicciones usando el modelo cargado
    pred_cust = fcst_cust.predict(h=horizonte_prediccion)
    pred_cust['customer_id'] = cid
    pred_cust['y'] = pred_cust['LGBMRegressor']

    pred_cust_201912 = pred_cust[pred_cust['ds'] == '2019-12-01'].copy()
    pred_cust_201912['product_id'] = pred_cust_201912['unique_id'].str.split('_').str[0].astype(int)
    pred_cust_201912.rename(columns={'y': 'tn'}, inplace=True)

    df_pred_final = pd.concat([df_pred_final, pred_cust_201912[['product_id', 'tn']]], ignore_index=True)

df_pred_sum = df_pred_final.groupby('product_id', as_index=False)['tn'].sum()
print(df_pred_sum)

  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)


     product_id          tn
0         20001  634.226702
1         20002  711.901500
2         20003  545.409694
3         20004  500.076066
4         20005  425.803136
..          ...         ...
775       21263    1.962647
776       21265    3.978385
777       21266    3.985516
778       21267    0.457560
779       21276    0.379534

[780 rows x 2 columns]


  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)


In [14]:
df_pred_sum.head()

Unnamed: 0,product_id,tn
0,20001,634.226702
1,20002,711.9015
2,20003,545.409694
3,20004,500.076066
4,20005,425.803136


In [15]:
# 1. Extraer los valores reales de df_long para 201912 y sumarlos por producto
df_validacion = (
    df_long[df_long['periodo'] == 201912]  # sin comillas, como int
    .groupby('product_id', as_index=False)['y']
    .sum()
    .rename(columns={'y': 'tn_real'})
)

# 2. Unir con las predicciones
df_eval = df_validacion.merge(df_pred_sum, on='product_id', how='inner')

y_real = df_eval['tn_real']
y_pred = df_eval['tn']

# 3. Calcular las métricas
mae = mean_absolute_error(y_real, y_pred)
mse = mean_squared_error(y_real, y_pred)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(y_real, y_pred)
r2 = r2_score(y_real, y_pred)

# 4. Imprimir resultados
print("\n" + "="*40)
print(" MÉTRICAS DE RENDIMIENTO DEL MODELO")
print("="*40)
print(f"Error Absoluto Medio (MAE):       {mae:.2f} unidades")
print(f"Raíz del Error Cuadrático (RMSE): {rmse:.2f} unidades")
print(f"Error Porcentual Absoluto (MAPE): {mape:.2%}")
print(f"Coeficiente de Determinación (R²): {r2:.2f}")
print("="*40)

print("\nInterpretación:")
print(f"- En promedio, el modelo se equivoca en {mae:.2f} toneladas (o la unidad que estés usando).")
print(f"- El error porcentual promedio es de {mape:.2%}.")
print(f"- Un R² de {r2:.2f} indica qué proporción de la varianza de los datos es explicada por el modelo (más cercano a 1 es mejor).")



 MÉTRICAS DE RENDIMIENTO DEL MODELO
Error Absoluto Medio (MAE):       17.28 unidades
Raíz del Error Cuadrático (RMSE): 43.10 unidades
Error Porcentual Absoluto (MAPE): 1696.93%
Coeficiente de Determinación (R²): 0.80

Interpretación:
- En promedio, el modelo se equivoca en 17.28 toneladas (o la unidad que estés usando).
- El error porcentual promedio es de 1696.93%.
- Un R² de 0.80 indica qué proporción de la varianza de los datos es explicada por el modelo (más cercano a 1 es mejor).


In [21]:
df_pred_final_202002 = pd.DataFrame()

for cid in customer_ids:
    fcst_cust = joblib.load(f'modelo_fcst_cliente_{cid}.pkl')
    df_pivot_cust = df_pivot[df_pivot['customer_id'] == cid]
    if df_pivot_cust.empty:
        continue

    df_long_cust = df_pivot_cust.melt(
        id_vars=['product_id', 'customer_id'],
        var_name='periodo',
        value_name='y'
    )
    df_long_cust = df_long_cust.fillna(0)
    df_long_cust['unique_id'] = df_long_cust['product_id'].astype(str) + "_" + df_long_cust['customer_id'].astype(str)
    df_long_cust['ds'] = pd.to_datetime(df_long_cust['periodo'], format='%Y%m')
    df_final_cust = df_long_cust[['unique_id', 'ds', 'y']].sort_values(by=['unique_id', 'ds']).reset_index(drop=True)
    df_final_cust = df_final_cust.loc[:, ~df_final_cust.columns.duplicated()]
    df_entrenamiento_cust = df_final_cust[df_final_cust['ds'] <= FECHA_CORTE_FINAL].copy()

    pred_cust = fcst_cust.predict(h=2)
    pred_cust['customer_id'] = cid
    pred_cust['y'] = pred_cust['LGBMRegressor']

    pred_cust_202002 = pred_cust[pred_cust['ds'] == '2020-02-01'].copy()
    pred_cust_202002['product_id'] = pred_cust_202002['unique_id'].str.split('_').str[0].astype(int)
    pred_cust_202002.rename(columns={'y': 'tn'}, inplace=True)

    df_pred_final_202002 = pd.concat([df_pred_final_202002, pred_cust_202002[['product_id', 'tn']]], ignore_index=True)

df_pred_sum_202002 = df_pred_final_202002.groupby('product_id', as_index=False)['tn'].sum()
print(df_pred_sum_202002)


  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)


Empty DataFrame
Columns: [product_id, tn]
Index: []


  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)
  df_long_cust = df_long_cust.fillna(0)


In [24]:
df_pred_final_202002.head()

Unnamed: 0,product_id,tn


In [22]:
df_pred_sum_202002.head()

Unnamed: 0,product_id,tn


In [23]:
df_pred_sum_202002.shape

(0, 2)

In [None]:
df_pred_sum_202002.to_csv('prediccion_tn_por_cliente.csv', index=False)

In [None]:
# Separar product_id de unique_id y sumar LGBMRegressor por product_id
df_pred_sum = prediccion_target.copy()
df_pred_sum['product_id'] = df_pred_sum['unique_id'].str.split('_').str[0].astype(int)
df_pred_sum_grouped = df_pred_sum.groupby('product_id', as_index=False)['LGBMRegressor'].sum()
df_pred_sum_grouped.head()

In [None]:
# Renombrar la columna y exportar a CSV
df_pred_sum_grouped.rename(columns={'LGBMRegressor': 'tn'}, inplace=True)
df_pred_sum_grouped.to_csv('prediccion_tn_por_cliente.csv', index=False)