In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from feature_bagging import FeatureBaggingWithHyperparamTuning
import statsmodels.api as sm


In [2]:
df_tb_productos = pd.read_csv('../data/tb_productos.txt', sep='\t')
df_sell_in = pd.read_csv('../data/sell-in.txt', sep='\t')
df_predecir = pd.read_csv('../data/productos_a_predecir.txt', sep='\t')
df_tb_stocks = pd.read_csv('../data/tb_stocks.txt', sep='\t')


In [3]:

df_tb_stocks['periodo'] = pd.to_datetime(df_tb_stocks['periodo'], format='%Y%m')
df_tb_stocks['product_id'] = df_tb_stocks['product_id'].astype(int)
df_tb_stocks['stock_final'] = df_tb_stocks['stock_final'].astype(float)
df_tb_productos['product_id'] = df_tb_productos['product_id'].astype(int)
df_tb_productos['sku_size'] = df_tb_productos['sku_size'].astype(int)
df_sell_in['periodo'] = pd.to_datetime(df_sell_in['periodo'], format='%Y%m')
df_sell_in['product_id'] = df_sell_in['product_id'].astype(int)
df_sell_in['customer_id'] = df_sell_in['customer_id'].astype(int)
df_sell_in['cust_request_qty'] = df_sell_in['cust_request_qty'].astype(int)
df_sell_in['cust_request_tn'] = df_sell_in['cust_request_tn'].astype(float)
df_sell_in['tn'] = df_sell_in['tn'].astype(float)
df_sell_in['plan_precios_cuidados'] = df_sell_in['plan_precios_cuidados'].astype(bool)



# Join tb_productos to sell_in on product_id
df_sell_in_merged = pd.merge(df_sell_in, df_tb_productos, on='product_id', how='left')
# Join tb_stocks to sell_in_merged on both product_id and periodo
df_final = pd.merge(df_sell_in_merged, df_tb_stocks, on=['product_id', 'periodo'], how='left')

df_final['fecha'] = pd.to_datetime(df_final['periodo'], format='%Y%m')
#all_periods = pd.DataFrame()
#all_periods['fecha'] = pd.date_range(start='2017-01-01', periods=36, freq='MS')
#df_final = pd.merge(all_periods, df_final, on=['fecha'], how='left')

df_final = df_final[df_final['product_id'].isin(df_predecir['product_id'])]
df_final.set_index('fecha', inplace=True)
df_final.index = df_final.index.to_period('M')
df_final.fillna(0, inplace=True)


In [4]:
# Create lag variables for 'cust_request_qty', 'cust_request_tn', and 'tn' from lag -1 to -12
for lag in range(1, 13):
    df_final[f'cust_request_tn_lag_{lag}'] = df_final['cust_request_tn'].shift(lag)
    df_final[f'stock_final_lag_{lag}'] = df_final['stock_final'].shift(lag)
    df_final[f'tn_lag_{lag}'] = df_final['tn'].shift(lag)



In [5]:
df_final = df_final.groupby(['fecha','product_id'])[['tn', 'cust_request_tn_lag_1',
       'stock_final_lag_1', 'tn_lag_1', 'cust_request_tn_lag_2',
       'stock_final_lag_2', 'tn_lag_2', 'cust_request_tn_lag_3',
       'stock_final_lag_3', 'tn_lag_3', 'cust_request_tn_lag_4',
       'stock_final_lag_4', 'tn_lag_4', 'cust_request_tn_lag_5',
       'stock_final_lag_5', 'tn_lag_5', 'cust_request_tn_lag_6',
       'stock_final_lag_6', 'tn_lag_6', 'cust_request_tn_lag_7',
       'stock_final_lag_7', 'tn_lag_7', 'cust_request_tn_lag_8',
       'stock_final_lag_8', 'tn_lag_8', 'cust_request_tn_lag_9',
       'stock_final_lag_9', 'tn_lag_9', 'cust_request_tn_lag_10',
       'stock_final_lag_10', 'tn_lag_10', 'cust_request_tn_lag_11',
       'stock_final_lag_11', 'tn_lag_11', 'cust_request_tn_lag_12',
       'stock_final_lag_12', 'tn_lag_12']].sum().reset_index()
df_final.set_index('fecha', inplace=True)


In [6]:
data = df_final.loc['2018-01-01':'2020-11-01']
#data = data[data['product_id'] == 20001] 
data = data.copy()
data.fillna(0, inplace=True)


#TEST
true =df_final.loc['2019-11-01':'2019-12-01']
true = true[true['product_id'] == 20001] 


In [43]:
predictions = []
# Define parameter bounds for Bayesian optimization
param_bounds = {
    'num_leaves': (20, 40),
    'learning_rate': (0.01, 0.1),
    'n_estimators': (50, 200),
    'min_child_samples': (5, 30),
    'subsample': (0.5, 1.0),
    'colsample_bytree': (0.5, 1.0),
    'max_depth': (5, 15)
}
# Custom optimization target function
def multinacional_metric(y_true, y_pred):
     return abs(sum(y_true - y_pred)) / sum(y_true)

products = data['product_id'].unique()
tot = len(products)
nro = 0
for producto in products:
    print(f'Fitting and predicting for product_id: {producto}')
    # Filtrar los datos del producto
    df_producto = data[data['product_id'] == producto]
    X = df_producto.drop(columns=['tn'])
    X = X.copy()
    y = df_producto['tn']
    y= y.copy()
    # Create the feature bagging model
    feature_bagging_model = FeatureBaggingWithHyperparamTuning(
        X, y, n_models=10, feature_fraction=0.5, sample_fraction=0.8, param_bounds=param_bounds, random_state=30000841,optimization_target=multinacional_metric)

    # Fit the model with a single seed
    feature_bagging_model.fit()
    single_seed_predictions = feature_bagging_model.predict(X)

    # Define multiple seeds
    #seeds = [10000019, 20000379, 30000841, 40001387, 50001863]
     # Fit the model with multiple seeds and get combined predictions
    #combined_predictions = feature_bagging_model.fit_multiple_seeds(seeds)
   
    # Realizar el pronóstico para 2 meses adelante
    forecast = feature_bagging_model.forecast(X, n_periods=2)
    
    # Obtener la predicción del segundo mes
    second_month_prediction = forecast[1]  # .iloc[1] obtiene el segundo valor predicho

    # Almacenar el producto_id y la predicción en la lista
    predictions.append({'product_id': producto, 'tn': second_month_prediction})
    print({'product_id': producto, 'tn': second_month_prediction})
    print(true[true['product_id'] == producto]['tn'])
    nro = nro + 1 
    print(str(nro) +' / '+ str(tot))

Fitting and predicting for product_id: 21159
|   iter    |  target   | colsam... | learni... | max_depth | min_ch... | n_esti... | num_le... | subsample |
-------------------------------------------------------------------------------------------------------------




| [0m1        [0m | [0mnan      [0m | [0m0.8139   [0m | [0m0.0454   [0m | [0m7.984    [0m | [0m7.189    [0m | [0m66.62    [0m | [0m24.07    [0m | [0m0.7891   [0m |




| [0m2        [0m | [0mnan      [0m | [0m0.54     [0m | [0m0.06031  [0m | [0m13.12    [0m | [0m28.45    [0m | [0m193.0    [0m | [0m34.26    [0m | [0m0.5443   [0m |




| [0m3        [0m | [0mnan      [0m | [0m0.8948   [0m | [0m0.03978  [0m | [0m11.17    [0m | [0m14.75    [0m | [0m149.0    [0m | [0m33.42    [0m | [0m0.7108   [0m |




| [0m4        [0m | [0mnan      [0m | [0m0.5371   [0m | [0m0.02088  [0m | [0m9.178    [0m | [0m11.53    [0m | [0m94.31    [0m | [0m27.47    [0m | [0m0.7426   [0m |




| [0m5        [0m | [0mnan      [0m | [0m0.6106   [0m | [0m0.0693   [0m | [0m12.67    [0m | [0m17.06    [0m | [0m114.1    [0m | [0m30.35    [0m | [0m0.909    [0m |


ValueError: Input y contains NaN.

In [58]:
# Convertir la lista a un DataFrame
df_predictions = pd.DataFrame(predictions)
df_predictions.to_csv('../data/predicciones.csv', index=False,header=True)
df_predictions.head(10)

Unnamed: 0,product_id,tn
0,20032,590.33366
1,21153,0.589564
2,21159,0.513512
3,21168,0.488598
4,20286,49.778057
5,20442,31.269903
6,20491,15.661783
7,20548,20.247307
8,20620,15.088071
9,20623,26.421832
