In [1]:
import pandas as pd
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import os
from datetime import datetime
import pdb;
from grocery_ml_tensorflow import GroceryML
from grocery_ml_core import GroceryMLCore
from hidden_layer_param_builder import HiddenLayerParamSetBuilder
import tensorflow as tf

pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
pd.set_option("display.float_format", lambda x: f"{x:.6f}")
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 2000)

print(os.getcwd())
# print("GPUs Available:", tf.config.list_physical_devices('GPU'))
#tf.debugging.set_log_device_placement(True)

# def run_all_experiments(df, model_param_sets, output_dir):
#     total = len(model_param_sets)
#     print(f"run_all_experiments() when: {datetime.now()}  output_dir: {output_dir}");
#     for index, params in enumerate(model_param_sets, 1):
#         print(f"Running Exp {index}/{total}...")
#         groceryML.run_experiment(df,  params["buildParams"], params["trainParams"], output_dir)


try:
    groceryML = GroceryML();
    groceryMLCore = GroceryMLCore();
    groceryML.build_training_df()
    if groceryML.training_df is None:
        raise();
    ts = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    groceryML.training_df.to_csv(f"training_df-{ts}.csv");
except Exception as ex: 
    print(ex)
    ts = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    groceryML.training_df.to_csv(f"training_df-{ts}-exception.csv");


C:\Users\steve\source\repos\grocery-ml
_build_combined_df()
_build_sources()
creating target col: didBuy_target
insert_negative_samples()
compute_expected_gap_ewma()
create_item_supply_level_feat()
add_item_total_purchase_count_feat()
build_trip_interveral_feautres()
drop_rare_purchases()
validate_no_empty_columns()
self._build_combined_df() done


In [2]:
import optuna 
def objective(trial):

    lr = trial.suggest_float("learning_rate", 0.00001, 0.01, step=0.00001)
    depth = trial.suggest_int("depth", 1, 50)
    units = trial.suggest_int("units", 5, 2048)
    embedding_dim = trial.suggest_int("embedding_dim", 5, 2048)
    epochs = trial.suggest_int("epochs", 10, 500)
    output_activation = trial.suggest_categorical("output_activation", ["sigmoid", "linear"])
    metrics = ["Accuracy", "MAE", "MSE", "MAPE", "MSLE", "Precision", "Recall", "AUC",  "BinaryCrossentropy", "RootMeanSquaredError"]
    callbacks = [tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
    ]
    
    layers_cfg = []
    for _ in range(depth):
        layers_cfg.append({ "units": units, "activation": "relu" })

    build_params = {
        "embedding_dim": embedding_dim,
        "layers": layers_cfg,
        "output_activation": output_activation,
        "optimizer": "adam",
        "learning_rate": lr,
        "loss": "binary_crossentropy" if output_activation == "sigmoid" else "mse",
        "metrics": metrics
    }

    train_params = {
        "epochs": epochs,
        "batch_size": 32,
        "callbacks": callbacks
    }

    groceryML.run_experiment(
        groceryML.training_df,
        build_params,
        train_params,
        "exp/keras/optuna"
    )

    return (
        groceryML.last_val_auc
        if output_activation == "sigmoid"
        else -groceryML.last_val_mse
    )
############################################################################


sampler = optuna.samplers.TPESampler()

study_name = f"grocery_ml_tuning_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
study = optuna.create_study(
    study_name= study_name,
    sampler=sampler,
    direction="maximize",   # or minimize — see note below
    storage="sqlite:///optuna_grocery.db",
    load_if_exists=True
)


study.optimize(objective, n_trials=1000)

[I 2026-01-11 23:45:38,180] A new study created in RDB with name: grocery_ml_tuning_20260111_234537


Creating dir: exp/keras/optuna\e371_l579-579-579-579-579-579-5
run_experiment()  exp_dir: exp/keras/optuna\e371_l579-579-579-579-579-579-5  
run_experiment()  when: 2026-01-11 23:45:38.235948 params: {'epochs': 420, 'batch_size': 32, 'callbacks': [<keras.callbacks.EarlyStopping object at 0x000001C6368E3EB0>]}  
normalize_features()
train_model()
build_prediction_input() prediction_date=2026-01-11 23:58:33.977394
_build_combined_df()
_build_sources()
creating target col: didBuy_target
insert_negative_samples()


  additional_rcpts_df["date"] = pd.to_datetime(additional_rcpts_df["date"])


compute_expected_gap_ewma()
create_item_supply_level_feat()
add_item_total_purchase_count_feat()
build_trip_interveral_feautres()
drop_rare_purchases()
validate_no_empty_columns()
self._build_combined_df() done
create_item_supply_level_feat()
normalize_features()
build_prediction_input() is done
Running Model.Predict()
Exporting extra_dataframes:
grocery_ml_tensorflow.export_dataframes_to_excel()
Writing XLSX: exp/keras/optuna\e371_l579-579-579-579-579-579-5\normalized_training_df-e371_l579-579-579-579-579-579-5.xlsx
   XLSX Done: exp/keras/optuna\e371_l579-579-579-579-579-579-5\normalized_training_df-e371_l579-579-579-579-579-579-5.xlsx
Writing XLSX: exp/keras/optuna\e371_l579-579-579-579-579-579-5\predictions-e371_l579-579-579-579-579-579-5.xlsx
   XLSX Done: exp/keras/optuna\e371_l579-579-579-579-579-579-5\predictions-e371_l579-579-579-579-579-579-5.xlsx
[save_model] starting artifact save → exp/keras/optuna\e371_l579-579-579-579-579-579-5
[save_model] writing training_df snapshot (



INFO:tensorflow:Assets written to: exp/keras/optuna\e371_l579-579-579-579-579-579-5\model\assets


INFO:tensorflow:Assets written to: exp/keras/optuna\e371_l579-579-579-579-579-579-5\model\assets
[W 2026-01-11 23:59:44,786] Trial 0 failed with parameters: {'learning_rate': 0.00542, 'depth': 38, 'units': 579, 'embedding_dim': 371, 'epochs': 420, 'output_activation': 'linear'} because of the following error: TypeError('Object of type EarlyStopping is not JSON serializable').
Traceback (most recent call last):
  File "C:\ProgramData\miniconda3\envs\grocery-ml-keras\lib\site-packages\optuna\study\_optimize.py", line 205, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\steve\AppData\Local\Temp\ipykernel_12460\1679849619.py", line 34, in objective
    groceryML.run_experiment(
  File "C:\Users\steve\source\repos\grocery-ml\grocery_ml_tensorflow.py", line 409, in run_experiment
    self.save_experiment(model, training_df, dataframes, history,  modelBuildParams, modelTrainParams, exp_dir_path)
  File "C:\Users\steve\source\repos\grocery-ml\grocery_ml_tensorflow.py", line 47

[save_model] saving model weights (separate file)
[save_model] all artifacts saved successfully → exp/keras/optuna\e371_l579-579-579-579-579-579-5\model


TypeError: Object of type EarlyStopping is not JSON serializable