#### Imports

La idea de esta variante, es predecir solamente los productos que tengan mas de 12 meses de datos (usando los ultimos 12 solamente), y aquellos con menos datos, predecir la media

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from skopt import BayesSearchCV
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from scikeras.wrappers import KerasRegressor
from keras.callbacks import EarlyStopping
import numpy as np
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from keras.metrics import MeanSquaredError
from keras.regularizers import l2
from keras.layers import BatchNormalization
from skopt.space import Categorical
from sklearn.metrics import mean_squared_error
import json

In [None]:
final_dataset = pd.read_csv('../../Datasets/final_dataset.csv', sep='\t')

In [None]:
final_dataset.head()

#### Pre procesamiento

In [None]:
final_dataset['periodo'] = pd.to_datetime(final_dataset['periodo'], format='%Y%m')


In [None]:
# Promediamos las vtas de agosto, como las de julio y setpiembre
july_september_data = final_dataset[
    final_dataset['periodo'].isin([pd.Timestamp('2019-07-01'), pd.Timestamp('2019-09-01')])
]

# Agrupar por producto y calcular el promedio
august_average = july_september_data.groupby('product_id')['y'].mean().reset_index()
august_average['periodo'] = pd.Timestamp('2019-08-01')

# Reemplazar los valores de agosto en el DataFrame original
final_dataset = final_dataset.set_index(['product_id', 'periodo'])
final_dataset.update(august_average.set_index(['product_id', 'periodo']))
final_dataset = final_dataset.reset_index()

In [None]:
# Agreog el MES del quarter, en vez de si es close_quarter
final_dataset['month'] = final_dataset['periodo'].dt.month

def month_in_quarter(month):
    if month in [1, 4, 7, 10]:
        return 1  # Primer mes del trimestre
    elif month in [2, 5, 8, 11]:
        return 2  # Segundo mes del trimestre
    elif month in [3, 6, 9, 12]:
        return 3  # Tercer mes del trimestre

final_dataset['month_in_quarter'] = final_dataset['month'].apply(month_in_quarter)

# Remover la columna 'month' si no es necesaria
final_dataset = final_dataset.drop(columns=['month'])

In [None]:
columns = ['plan_precios_cuidados', 'cust_request_qty', 'cust_request_tn', 'month_in_quarter', 'brand', 'sku_size', 'cat1', 'cat2', 'cat3','y'] #'close_quarter'
non_scalable_columns = ['cat1', 'cat2', 'cat3', 'brand', 'sku_size', 'plan_precios_cuidados', 'month_in_quarter'] # 'close_quarter'
n_features = len(columns)

Valido el promedio de agosto (este es el mes en que la empresa decidio no vender por unos dias)

In [None]:
# Filtrar los datos para los productos en julio, agosto y septiembre de 2020
august_data = final_dataset[
    final_dataset['periodo'] == pd.Timestamp('2019-08-01')
]

july_data = final_dataset[
    final_dataset['periodo'] == pd.Timestamp('2019-07-01')
]

september_data = final_dataset[
    final_dataset['periodo'] == pd.Timestamp('2019-09-01')
]

# Mostrar algunos datos de agosto para revisar que el promedio se haya aplicado
print("Datos de agosto de 2019:")
display(august_data.head())

# Mostrar algunos datos de julio para comparar
print("\nDatos de julio de 2019:")
display(july_data.head())

# Mostrar algunos datos de septiembre para comparar
print("\nDatos de septiembre de 2019:")
display(september_data.head())

# Verificar el cálculo del promedio manualmente para algunos productos
product_ids = august_data['product_id'].unique()

for product_id in product_ids[:5]:  # Limitar a los primeros 5 productos para revisar
    july_sales = july_data[july_data['product_id'] == product_id]['y'].values
    september_sales = september_data[september_data['product_id'] == product_id]['y'].values
    august_sales = august_data[august_data['product_id'] == product_id]['y'].values

    print(f"\nProducto ID: {product_id}")
    print(f"Ventas de julio: {july_sales}")
    print(f"Ventas de septiembre: {september_sales}")
    print(f"Ventas de agosto (calculadas): {august_sales}")
    if len(july_sales) > 0 and len(september_sales) > 0:
        calculated_average = (july_sales[0] + september_sales[0]) / 2
        print(f"Promedio calculado: {calculated_average}")


Muetro cual es el primer mes de venta de cada producto, para debug

In [None]:
first_months = final_dataset.groupby('product_id')['periodo'].min().reset_index()
first_months.columns = ['product_id', 'first_month']

# Mostrar los resultados
print("Primer mes de cada producto:")
display(first_months.sort_values(by=['first_month', 'product_id']).tail(150))

In [None]:
categorical_columns = ['cat1', 'cat2', 'cat3', 'brand', 'sku_size']

# Transformar las columnas categóricas en numéricas
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    final_dataset[col] = le.fit_transform(final_dataset[col])
    label_encoders[col] = le

In [None]:
grouped = final_dataset.groupby(['product_id', 'cat1', 'cat2', 'cat3'])

for name, group in grouped:
    print(f"Group name: {name}")
    print(group.head())
    print("\n")
    # break 

In [None]:
scalers = {col: {} for col in columns}
scaled_data_list = []

for (product_id, _, _, _ ), group in grouped:
    scaled_group = group.copy()
    for col in columns:
        if col not in non_scalable_columns:
            scaler = StandardScaler()
            # print(col)
            scaled_group[col] = scaler.fit_transform(group[[col]])
            scalers[col][product_id] = scaler
    scaled_data_list.append(scaled_group)

# Combinar todos los datos escalados en un solo DataFrame
scaled_data = pd.concat(scaled_data_list)

In [None]:
display(scaled_data[columns])

In [None]:
# scaled_data[scaled_data['cat1'] != 0]

#### Funcion para preparar los datos y crear el modelo

El objetivo es predecir 2 dias en el futuro, por lo que la idea es re-armar el dataset. Donde el valor de X sera el conjunto de datos hasta N-2 e Y va a ser N (siendo N la cantidad de ventas para ese producto)

In [None]:
timeframe = 14

In [None]:
def prepare_data(data,validation=False):
    X, y = [], []
    unique_product_ids = data['product_id'].unique()
    predicciones = {}

    for product_id in unique_product_ids:
        product_data = data[data['product_id'] == product_id].copy()
        product_values = product_data[columns].values
        
        if len(product_values) < timeframe + 2:
            if validation:
                predicciones[product_id] = np.mean(product_values)
            continue
        
        
        if validation:
            X.append(product_values[-timeframe:]) # Los ultimos 14 meses para re-entrenar el modelo antes de las predicciones finales
            y.append(product_id)
        else:
            # Todas las observaciones menos las últimas dos. Como despues necesito agregar esos 2 registros
            # para predecir el future, agrego dos registros previos
            X.append(product_values[-(timeframe + 2):-2])
            y.append(product_values[-1, -1])      # Última observación
    
    if validation:
        return np.array(X), np.array(y), predicciones
    else:
        return np.array(X), np.array(y), predicciones


Definimos la funcion para crear el modelo LSTM, sobre este se ejecutara la optimizacion bayesiana

In [None]:
n_steps = 2  # número de pasos de tiempo
epochs = 100
batch_size = 32
predictions = []

In [None]:
data = scaled_data[columns].values
X, y, _ = prepare_data(scaled_data)

In [None]:
display(X.shape)
display(y.shape)

#### Revisamos que la funcion genere registros coherentes

In [None]:
# Muestro todos los registros del ultimo producto
unique_product_ids = scaled_data['product_id'].unique()
product_data = scaled_data[scaled_data['product_id'] == unique_product_ids[0]] #<= cambiar el indice entre 0 y -1 para ver la diferencia agregada por el padleft
product_values = product_data[columns].values
display(product_values)

In [None]:
display(X[0]) #<= cambiar el indice entre 0 y -1 para ver la diferencia agregada por el padleft

In [None]:
display(y[0]) #<= cambiar el indice entre 0 y -1

In [None]:
display(len(X[0]))

#### Entrenamiento del modelo

In [None]:
from keras.regularizers import l2

def create_model(units, learning_rate, optimizer='adam', dropout=0.1, activation="tanh", depth=2, l2_penalty=1e-6):
    display(f"Creating model with parameters: units={units}, learning_rate={learning_rate}, "
                f"optimizer={optimizer}, dropout={dropout}, activation={activation}, "
                f"depth={depth}, l2_penalty={l2_penalty}")
    
    model = Sequential()
    
    for i in range(depth):
        if i == 0:
            model.add(LSTM(units=units, return_sequences=True, input_shape=(timeframe, n_features), activation=activation, kernel_regularizer=l2(l2_penalty)))
            model.add(BatchNormalization())
        else:
            model.add(LSTM(units=units, return_sequences=True, activation=activation, kernel_regularizer=l2(l2_penalty)))
            model.add(Dropout(dropout))
            model.add(BatchNormalization())
    
    model.add(LSTM(units=units, return_sequences=False, activation=activation, kernel_regularizer=l2(l2_penalty)))
    model.add(Dropout(dropout))
    
    model.add(Dense(1))
    
    if optimizer == 'adam':
        optimizer = Adam(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        optimizer = RMSprop(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        optimizer = SGD(learning_rate=learning_rate)
    
    # Usamos el error cuadratico medio, porque penaliza errores grandes. Como tenemos ciertos
    # productos "estrella", para los cuales hay mas ventas, y tener un error en lo mismos
    # puede implicar una gran diferencia de las estimaciones, decidimos probar con esta funcion de perdida.
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=[MeanSquaredError()])
    return model

#### Armado de los modelos

In [None]:
param_space = {
    'depth': [1, 2, 3, 4],#, 5, 6],
    'activation': ['relu', 'tanh'], #'selu', 'swish'
    'units': [64, 128, 256, 512],
    'dropout': [0.1, 0.2, 0.3],
    'learning_rate': Categorical(categories=(0.0001, 0.001), prior=None),#0.01
    'epochs': [10, 20, 30, 50, 100],
    'batch_size': Categorical([32, 128], prior=None),#[32, 64, 128],
    'optimizer': ['adam', 'rmsprop', 'sgd'],
    'l2_penalty': [1e-6, 1e-2, 1e-1]
}

In [None]:
# early_stopping = EarlyStopping(monitor='val_loss', patience=5) mean_squared_error
early_stopping = EarlyStopping(monitor='mean_squared_error', patience=5)
callbacks = [early_stopping]

model = KerasRegressor(
    build_fn=create_model,
    verbose=1,
    units=64,
    learning_rate=0.01,
    dropout=0.1,
    activation="tanh",
    depth=1,
    l2_penalty=1e-6,
    callbacks = callbacks
)

# Como ahora cada observacion, representa la serie de un producto, no hace falta usar 
# un cv especifico de time series, con el cv normal deberia ser suficiente.
# cv = TimeSeriesSplit(n_splits=5).split(X)

bayes_search = BayesSearchCV(
    model,
    param_space,
    # n_iter=50,
    cv=5,
    verbose=1,
    # n_jobs=-1,
    scoring='neg_mean_squared_error',
)

In [None]:
bayes_search.fit(X, y, verbose=1)

#### Guardo los mejores hiperparametros

In [254]:
best_params = bayes_search.cv_results_['params'][bayes_search.best_index_]
display(best_params)

best_model = create_model(
    units=best_params['units'],
    learning_rate=best_params['learning_rate'],
    optimizer=best_params['optimizer'],
    dropout=best_params['dropout'],
    activation=best_params['activation'],
    depth=best_params['depth'],
    l2_penalty=best_params['l2_penalty'],
)
best_model.fit(X, y, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=1)

display(best_params)
display(best_model)

OrderedDict([('activation', 'tanh'),
             ('batch_size', 32),
             ('depth', 2),
             ('dropout', 0.1),
             ('epochs', 50),
             ('l2_penalty', 0.01),
             ('learning_rate', 0.001),
             ('optimizer', 'sgd'),
             ('units', 128)])

'Creating model with parameters: units=128, learning_rate=0.001, optimizer=sgd, dropout=0.1, activation=tanh, depth=2, l2_penalty=0.01'

  super().__init__(**kwargs)


Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 41ms/step - loss: 5.5061 - mean_squared_error: 1.2081
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - loss: 5.1077 - mean_squared_error: 0.8130
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 4.9761 - mean_squared_error: 0.6848
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - loss: 4.9742 - mean_squared_error: 0.6863
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 4.9485 - mean_squared_error: 0.6640
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - loss: 4.9107 - mean_squared_error: 0.6296
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 4.8580 - mean_squared_error: 0.5803
Epoch 8/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 4.7

OrderedDict([('activation', 'tanh'),
             ('batch_size', 32),
             ('depth', 2),
             ('dropout', 0.1),
             ('epochs', 50),
             ('l2_penalty', 0.01),
             ('learning_rate', 0.001),
             ('optimizer', 'sgd'),
             ('units', 128)])

<Sequential name=sequential_757, built=True>

In [245]:
y_pred = best_model.predict(X)
mean_squared_error(y, y_pred)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step


0.4681141331008797

In [246]:
# Obtener los mejores hiperparámetros
best_params = bayes_search.cv_results_['params'][bayes_search.best_index_]

# Guardar los mejores hiperparámetros en un archivo JSON
with open('best_params.json', 'w') as file:
    json.dump(best_params, file)

print('Los mejores hiperparámetros han sido guardados en best_params.json.')


Los mejores hiperparámetros han sido guardados en best_params.json.


In [247]:
with open('best_params.json', 'r') as file:
    best_params = json.load(file)

# Reconstruir el modelo con los mejores hiperparámetros
mejor_modelo = create_model(units=best_params['units'], learning_rate=best_params['learning_rate'], optimizer=best_params['optimizer'], dropout=best_params['dropout'], activation=best_params['activation'], depth=best_params['depth'], l2_penalty=best_params['l2_penalty'])
mejor_modelo.fit(X, y, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=1)
# Verificar la configuración del nuevo modelo
print("Configuración del nuevo modelo:")
print(mejor_modelo.summary())

'Creating model with parameters: units=128, learning_rate=0.001, optimizer=sgd, dropout=0.1, activation=tanh, depth=2, l2_penalty=0.01'

  super().__init__(**kwargs)


Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - loss: 5.5248 - mean_squared_error: 1.2432
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 5.1021 - mean_squared_error: 0.8238
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - loss: 4.9799 - mean_squared_error: 0.7049
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 4.9951 - mean_squared_error: 0.7235
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - loss: 4.9216 - mean_squared_error: 0.6533
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - loss: 4.8556 - mean_squared_error: 0.5907
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 4.8449 - mean_squared_error: 0.5834
Epoch 8/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - loss: 4.93

None


#### Hago las predicciones

In [248]:
# best_params = BayesSearchCV.best_params_

# best_units = best_params['build_fn__units']
# best_dropout_rate = best_params['build_fn__dropout_rate']
# best_optimizer = best_params['build_fn__optimizer']

# best_model = create_model(units=best_units, dropout_rate=best_dropout_rate, optimizer=best_optimizer)
# best_model.fit(X, y)

In [249]:
X_f, product_ids, predictions = prepare_data(scaled_data, validation=True)

In [250]:
scaled_predictions = mejor_modelo.predict(X_f)
display(scaled_predictions)

[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step


array([[-0.5522946 ],
       [-0.02378039],
       [-0.8571388 ],
       [-0.8028174 ],
       [-1.1526424 ],
       [-0.8424837 ],
       [-1.1430153 ],
       [-0.92409587],
       [-0.4799639 ],
       [-0.21263538],
       [-0.32411158],
       [-1.1567881 ],
       [-1.1697465 ],
       [-1.007281  ],
       [-0.9369471 ],
       [-0.5025943 ],
       [-0.91128397],
       [-0.83229166],
       [-0.776789  ],
       [-0.4455114 ],
       [-0.27961433],
       [-0.27254945],
       [-0.76956034],
       [-0.41575664],
       [-0.35105467],
       [-0.42303056],
       [-1.2769738 ],
       [-0.9180758 ],
       [-0.8619456 ],
       [-0.43105364],
       [-0.4817462 ],
       [-1.0778322 ],
       [-0.27950776],
       [-1.0575445 ],
       [-0.549862  ],
       [-0.24492672],
       [-0.2829991 ],
       [-0.5391534 ],
       [-0.5314253 ],
       [-0.75191194],
       [-0.8885122 ],
       [-0.8488881 ],
       [-0.7904655 ],
       [-0.9987295 ],
       [-0.48027986],
       [-1

In [251]:
scaled_predictions_1d =  scaled_predictions.reshape(-1)
display(scaled_predictions_1d)

array([-0.5522946 , -0.02378039, -0.8571388 , -0.8028174 , -1.1526424 ,
       -0.8424837 , -1.1430153 , -0.92409587, -0.4799639 , -0.21263538,
       -0.32411158, -1.1567881 , -1.1697465 , -1.007281  , -0.9369471 ,
       -0.5025943 , -0.91128397, -0.83229166, -0.776789  , -0.4455114 ,
       -0.27961433, -0.27254945, -0.76956034, -0.41575664, -0.35105467,
       -0.42303056, -1.2769738 , -0.9180758 , -0.8619456 , -0.43105364,
       -0.4817462 , -1.0778322 , -0.27950776, -1.0575445 , -0.549862  ,
       -0.24492672, -0.2829991 , -0.5391534 , -0.5314253 , -0.75191194,
       -0.8885122 , -0.8488881 , -0.7904655 , -0.9987295 , -0.48027986,
       -1.0969629 , -0.39811164, -0.5114534 , -0.78024423, -1.0618914 ,
       -1.2282729 , -0.93727726, -1.2807448 , -1.2120043 , -1.2143723 ,
       -0.95463705, -0.62277794, -1.1669822 , -0.5551786 , -0.5639812 ,
       -1.1035061 , -0.07911017, -0.9545539 , -0.08043619, -1.0029228 ,
       -0.4916042 , -1.1251634 , -0.78448427, -1.1170795 , -0.80

In [252]:
predictions_df = pd.DataFrame({
    'product_id': product_ids,
    'predicted_y': scaled_predictions_1d
})

# Iterar sobre cada fila del DataFrame predictions_df
for index, row in predictions_df.iterrows():
    # Obtener el ID del producto y la predicción escalada para este producto
    product_id = row['product_id']
    scaled_prediction = row['predicted_y']
    
    # Obtener el escalador correspondiente a 'predicted_y' para este producto
    scaler = scalers['y'][(product_id)]
    
    # Aplicar la inversa del escalador a la predicción 'predicted_y' para este producto
    inverse_scaled_prediction = scaler.inverse_transform([[scaled_prediction]])[0][0]
    
    # Reemplazar la predicción escalada con la predicción invertida en el DataFrame final
    predictions_df.at[index, 'predicted_y'] = inverse_scaled_prediction

  predictions_df.at[index, 'predicted_y'] = inverse_scaled_prediction


In [253]:
prediction_list = []
for product_id, pronostico in predictions.items():
    prediction_list.append({'product_id': product_id, 'predicted_y': pronostico})

# Convert the list of dictionaries to a DataFrame
predictions_df = pd.concat([predictions_df, pd.DataFrame(prediction_list)], ignore_index=True)

# Save the final DataFrame to a CSV file
predictions_df.to_csv('predictions.csv', index=False)

print('Todas las predicciones han sido generadas y guardadas en predictions.csv después de aplicar la inversa de los scalers.')
display(predictions_df.shape)

Todas las predicciones han sido generadas y guardadas en predictions.csv después de aplicar la inversa de los scalers.


(780, 2)