In [1]:
import pandas as pd
import numpy as np

np.random.seed(0)

# Load the store sales dataset
excel_file = pd.ExcelFile('data/store_sales.xlsx')
sheet_names = excel_file.sheet_names
features = ["F", "D", "Unit.Price"]
target = "Total.Volume"

sheet_names, len(sheet_names), features, target

(['StoreWeekSalescarbbev_modify',
  'StoreWeekSalescigets',
  'StoreWeekSalescoffee',
  'StoreWeekSalescoldcer',
  'StoreWeekSalesdeod',
  'StoreWeekSalesdiapers',
  'StoreWeekSalesfactiss',
  'StoreWeekSalesfzdinent',
  'StoreWeekSalesfzpizza',
  'StoreWeekSaleshotdog',
  'StoreWeekSaleslaundet',
  'StoreWeekSalesmargbutr',
  'StoreWeekSalesmayo',
  'StoreWeekSalesmustketc',
  'StoreWeekSalespaptowl',
  'StoreWeekSalespeanbutr',
  'StoreWeekSalesshamp',
  'StoreWeekSalessoup',
  'StoreWeekSalesspagsauc',
  'StoreWeekSalessugarsub',
  'StoreWeekSalestoitisu',
  'StoreWeekSalestoothpa',
  'StoreWeekSalesyogurt',
  'StoreWeekSalesbeer_modify'],
 24,
 ['F', 'D', 'Unit.Price'],
 'Total.Volume')

In [2]:
import pandas as pd
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
import time


def evaluate_model(model, X, y, n=1, agg=True):
    data = []

    for random_state in range(n):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=random_state)

        start_time = time.time()
        model.fit(X_train, y_train)
        end_time = time.time()
        train_time = end_time - start_time

        start_time = time.time()
        predictions = model.predict(X_test)
        end_time = time.time()
        comp_time = end_time - start_time

        r2 = r2_score(y_test, predictions)
        mae = mean_absolute_error(y_test, predictions)
        mse = mean_squared_error(y_test, predictions)

        data.append([r2, mae, mse, train_time, comp_time])

    df = pd.DataFrame(
        data, columns=['r2', 'mae', 'mse', 'train_time', 'comp_time'])
    
    return df.mean() if agg else df

In [3]:
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import GridSearchCV


def best_estimator(model, param_grid, X, y):
    search = GridSearchCV(model, param_grid)
    search.fit(X, y)
    return search.best_estimator_

In [4]:
from sklearn.experimental import enable_halving_search_cv
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, BaggingRegressor, ExtraTreesRegressor, HistGradientBoostingRegressor, StackingRegressor, VotingRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, BayesianRidge, ARDRegression, SGDRegressor, PassiveAggressiveRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor

from models_sklearn import MLP, FONN1, FONN2, TREENN1, TREENN2


def test_models(X, y):
    # Initialize standard models
    models = {
        "Linear Regression": LinearRegression(),
        "Ridge Regression": Ridge(),
        "Lasso Regression": Lasso(random_state=42),
        "ElasticNet Regression": ElasticNet(random_state=42),
        "Bayesian Ridge Regression": BayesianRidge(),
        "ARD Regression": ARDRegression(),
        "SGD Regressor": SGDRegressor(random_state=42),
        "Passive Aggressive Regressor": PassiveAggressiveRegressor(random_state=42),
        "Support Vector Regression": SVR(),
        "MLP Regressor": MLPRegressor(hidden_layer_sizes=(100,), max_iter=10000, random_state=42),
        "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
        "Gradient Boosting Regressor": GradientBoostingRegressor(random_state=42),
        "XGBoost Regressor": XGBRegressor(random_state=42),
        "AdaBoost Regressor": AdaBoostRegressor(random_state=42),
        "Bagging Regressor": BaggingRegressor(random_state=42),
        "ExtraTrees Regressor": ExtraTreesRegressor(random_state=42),
        "HistGradientBoosting Regressor": HistGradientBoostingRegressor(random_state=42),
        "Stacking Regressor": StackingRegressor(estimators=[
            ('lr', LinearRegression()),
            ('rf', RandomForestRegressor(n_estimators=10, random_state=42))
        ], final_estimator=Ridge(random_state=42)),
        "Voting Regressor": VotingRegressor(estimators=[
            ('lr', LinearRegression()),
            ('rf', RandomForestRegressor(n_estimators=10, random_state=42)),
            ('gb', GradientBoostingRegressor(random_state=42))
        ])
    }

    param_grid = {
        'max_iter': [5000],
        'learning_rate': ['constant'],
        'learning_rate_init': [1e-2],
        'tol': [1e-4],
        'early_stopping': [True]
    }

    models["Custom MLP 5"] = best_estimator(MLP(5), param_grid, X, y)
    models["Custom MLP 10"] = best_estimator(MLP(10), param_grid, X, y)
    models["Custom MLP 40"] = best_estimator(MLP(40), param_grid, X, y)
    models["FONN1 5 5"] = best_estimator(FONN1(5, (10,)), param_grid, X, y)
    models["FONN1 5 10"] = best_estimator(FONN1(5, (15,)), param_grid, X, y)
    models["FONN1 5 40"] = best_estimator(FONN1(5, (45,)), param_grid, X, y)
    models["FONN2 5 5"] = best_estimator(FONN2(5, (10,)), param_grid, X, y)
    models["FONN2 5 10"] = best_estimator(FONN2(5, (15,)), param_grid, X, y)
    models["FONN2 5 40"] = best_estimator(FONN2(5, (45,)), param_grid, X, y)
    models["TREENN1 5"] = best_estimator(TREENN1((6,)), param_grid, X, y)
    models["TREENN1 10"] = best_estimator(TREENN1((11,)), param_grid, X, y)
    models["TREENN1 40"] = best_estimator(TREENN1((41,)), param_grid, X, y)
    models["TREENN2 5"] = best_estimator(TREENN2((6,)), param_grid, X, y)
    models["TREENN2 10"] = best_estimator(TREENN2((11,)), param_grid, X, y)
    models["TREENN2 40"] = best_estimator(TREENN2((41,)), param_grid, X, y)

    # Train and evaluate models
    results = []
    for name, model in models.items():
        result = evaluate_model(model, X, y, n=5).values
        results.append([name, *result])

    results_df = pd.DataFrame(
        results, columns=["model", "r2", "mae", "mse", "train_time", "comp_time"])
    results_df.set_index("model", inplace=True)

    return results_df

In [5]:
from sklearn.preprocessing import StandardScaler


def test_sheet(sheet_name):
    df = pd.read_excel(excel_file, sheet_name=sheet_name)
    iri_key_counts = df["IRI_KEY"].value_counts()
    iri_keys = list(map(int, iri_key_counts[iri_key_counts > 300].index))
    print(sheet_name, len(iri_keys))

    sheet_data = []

    for iri_key in iri_keys:
        iri_key = int(iri_key)
        df_iri = df[df["IRI_KEY"] == iri_key]
        X = df_iri[features]
        y = df_iri[target].values

        scaler_X = StandardScaler()
        X = scaler_X.fit_transform(X)
        scaler_y = StandardScaler()
        y = scaler_y.fit_transform(y.reshape(-1, 1)).ravel()

        # print(sheet_name, iri_key, X.shape, y.shape)

        results = test_models(X, y)
        results[f"mse_{sheet_name}_{iri_key}"] = results["mse"]
        sheet_data.append(results[f"mse_{sheet_name}_{iri_key}"])

    sheet_df = pd.DataFrame(sheet_data)

    return sheet_df

In [6]:
from sklearn.utils import shuffle

sheet_names = shuffle(sheet_names, random_state=0, n_samples=10)

dataset_mse = []

for sheet_name in sheet_names:  # type: ignore
    results = test_sheet(sheet_name)
    dataset_mse.append(results)

dataset_mse = pd.concat(dataset_mse).T

dataset_mse.insert(0, "Avg mse", dataset_mse.mean(axis=1))
dataset_mse.to_csv("output/dataset_mse.csv")
dataset_mse

StoreWeekSalesmargbutr 16
StoreWeekSaleslaundet 16
StoreWeekSalesyogurt 16
StoreWeekSalespaptowl 16
StoreWeekSalestoitisu 16
StoreWeekSalescigets 16
StoreWeekSalesmustketc 16
StoreWeekSalesbeer_modify 16
StoreWeekSalesshamp 16
StoreWeekSalesfzpizza 16


Unnamed: 0_level_0,Avg mse,mse_StoreWeekSalesmargbutr_6001821,mse_StoreWeekSalesmargbutr_648368,mse_StoreWeekSalesmargbutr_279300,mse_StoreWeekSalesmargbutr_400003,mse_StoreWeekSalesmargbutr_270862,mse_StoreWeekSalesmargbutr_231720,mse_StoreWeekSalesmargbutr_252570,mse_StoreWeekSalesmargbutr_659827,mse_StoreWeekSalesmargbutr_241565,...,mse_StoreWeekSalesfzpizza_252570,mse_StoreWeekSalesfzpizza_659827,mse_StoreWeekSalesfzpizza_241565,mse_StoreWeekSalesfzpizza_237277,mse_StoreWeekSalesfzpizza_291276,mse_StoreWeekSalesfzpizza_273920,mse_StoreWeekSalesfzpizza_232633,mse_StoreWeekSalesfzpizza_233246,mse_StoreWeekSalesfzpizza_532639,mse_StoreWeekSalesfzpizza_533864
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Linear Regression,0.548602,0.355567,0.484945,0.520159,0.378966,0.316732,0.290991,0.284322,0.256639,0.320135,...,0.722451,0.411698,0.687117,0.691498,0.551589,0.344872,0.383916,0.875491,0.758739,0.373452
Ridge Regression,0.548028,0.355327,0.427111,0.51978,0.378686,0.316533,0.291032,0.28392,0.254793,0.319829,...,0.722363,0.411111,0.686883,0.690716,0.551285,0.344576,0.38377,0.875489,0.758829,0.372672
Lasso Regression,0.974918,0.888471,0.927499,0.870215,0.847519,0.915595,0.986919,0.924782,0.703821,0.944144,...,1.104965,0.788848,0.828385,1.080876,0.922579,1.029197,1.012589,1.157299,1.073932,0.789398
ElasticNet Regression,0.860812,0.719238,0.708975,0.754125,0.709708,0.672113,0.747656,0.680342,0.448074,0.727572,...,1.104965,0.665217,0.828255,1.054212,0.8932,0.751267,0.81584,1.157299,1.073932,0.5791
Bayesian Ridge Regression,0.54711,0.354828,0.403679,0.518439,0.378008,0.316311,0.291131,0.283368,0.254748,0.319477,...,0.722029,0.410199,0.684457,0.687523,0.550523,0.344274,0.383578,0.875932,0.76012,0.371755
ARD Regression,0.548626,0.356751,0.368726,0.524197,0.380377,0.317033,0.292752,0.283093,0.254538,0.321645,...,0.724757,0.406461,0.686922,0.704816,0.551072,0.344359,0.384141,0.877077,0.757179,0.3733
SGD Regressor,0.546061,0.352409,0.36613,0.516831,0.376937,0.317468,0.292839,0.278072,0.245936,0.319559,...,0.720894,0.410237,0.685106,0.680448,0.551947,0.344941,0.384328,0.874388,0.759179,0.370523
Passive Aggressive Regressor,1.603998,0.783293,1.252662,1.922411,0.944816,1.822582,0.850152,0.927511,0.883649,1.01612,...,1.388543,2.242455,1.412354,3.227107,2.516483,1.61978,0.814784,1.658131,2.576677,1.19483
Support Vector Regression,0.542697,0.327081,0.331024,0.456361,0.358456,0.340349,0.348847,0.356293,0.193621,0.31948,...,0.62551,0.40059,0.621565,0.679358,0.555546,0.319346,0.411374,0.960605,0.801345,0.354202
MLP Regressor,0.500179,0.292458,0.357969,0.439745,0.320362,0.292278,0.172948,0.266801,0.210045,0.263986,...,0.572415,0.330353,0.679459,0.59179,0.537986,0.303753,0.380198,0.928383,0.752324,0.337058


In [9]:
dataset_mse

Unnamed: 0_level_0,Avg mse,mse_StoreWeekSalesmargbutr_6001821,mse_StoreWeekSalesmargbutr_648368,mse_StoreWeekSalesmargbutr_279300,mse_StoreWeekSalesmargbutr_400003,mse_StoreWeekSalesmargbutr_270862,mse_StoreWeekSalesmargbutr_231720,mse_StoreWeekSalesmargbutr_252570,mse_StoreWeekSalesmargbutr_659827,mse_StoreWeekSalesmargbutr_241565,...,mse_StoreWeekSalesfzpizza_252570,mse_StoreWeekSalesfzpizza_659827,mse_StoreWeekSalesfzpizza_241565,mse_StoreWeekSalesfzpizza_237277,mse_StoreWeekSalesfzpizza_291276,mse_StoreWeekSalesfzpizza_273920,mse_StoreWeekSalesfzpizza_232633,mse_StoreWeekSalesfzpizza_233246,mse_StoreWeekSalesfzpizza_532639,mse_StoreWeekSalesfzpizza_533864
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Linear Regression,0.548602,0.355567,0.484945,0.520159,0.378966,0.316732,0.290991,0.284322,0.256639,0.320135,...,0.722451,0.411698,0.687117,0.691498,0.551589,0.344872,0.383916,0.875491,0.758739,0.373452
Ridge Regression,0.548028,0.355327,0.427111,0.51978,0.378686,0.316533,0.291032,0.28392,0.254793,0.319829,...,0.722363,0.411111,0.686883,0.690716,0.551285,0.344576,0.38377,0.875489,0.758829,0.372672
Lasso Regression,0.974918,0.888471,0.927499,0.870215,0.847519,0.915595,0.986919,0.924782,0.703821,0.944144,...,1.104965,0.788848,0.828385,1.080876,0.922579,1.029197,1.012589,1.157299,1.073932,0.789398
ElasticNet Regression,0.860812,0.719238,0.708975,0.754125,0.709708,0.672113,0.747656,0.680342,0.448074,0.727572,...,1.104965,0.665217,0.828255,1.054212,0.8932,0.751267,0.81584,1.157299,1.073932,0.5791
Bayesian Ridge Regression,0.54711,0.354828,0.403679,0.518439,0.378008,0.316311,0.291131,0.283368,0.254748,0.319477,...,0.722029,0.410199,0.684457,0.687523,0.550523,0.344274,0.383578,0.875932,0.76012,0.371755
ARD Regression,0.548626,0.356751,0.368726,0.524197,0.380377,0.317033,0.292752,0.283093,0.254538,0.321645,...,0.724757,0.406461,0.686922,0.704816,0.551072,0.344359,0.384141,0.877077,0.757179,0.3733
SGD Regressor,0.546061,0.352409,0.36613,0.516831,0.376937,0.317468,0.292839,0.278072,0.245936,0.319559,...,0.720894,0.410237,0.685106,0.680448,0.551947,0.344941,0.384328,0.874388,0.759179,0.370523
Passive Aggressive Regressor,1.603998,0.783293,1.252662,1.922411,0.944816,1.822582,0.850152,0.927511,0.883649,1.01612,...,1.388543,2.242455,1.412354,3.227107,2.516483,1.61978,0.814784,1.658131,2.576677,1.19483
Support Vector Regression,0.542697,0.327081,0.331024,0.456361,0.358456,0.340349,0.348847,0.356293,0.193621,0.31948,...,0.62551,0.40059,0.621565,0.679358,0.555546,0.319346,0.411374,0.960605,0.801345,0.354202
MLP Regressor,0.500179,0.292458,0.357969,0.439745,0.320362,0.292278,0.172948,0.266801,0.210045,0.263986,...,0.572415,0.330353,0.679459,0.59179,0.537986,0.303753,0.380198,0.928383,0.752324,0.337058


In [7]:
# import seaborn as sns
# import matplotlib.pyplot as plt
# import pandas as pd

# # Convert cv_results_ to a DataFrame
# results = pd.DataFrame(search.cv_results_)

# # Pivot the DataFrame to create a matrix for the heatmap
# heatmap_data = results.sort_values("iter").pivot_table(
#             index="param_learning_rate",
#             columns="param_epochs",
#             values="mean_test_score",
#             aggfunc="last",
#         )

# # Plot the heatmap
# plt.figure(figsize=(10, 6))
# sns.heatmap(heatmap_data, annot=True, cmap='viridis')
# plt.title('HalvingGridSearchCV Results')
# plt.xlabel('Number of Estimators')
# plt.ylabel('Learning Rate')
# plt.show()


In [8]:
results

model,Linear Regression,Ridge Regression,Lasso Regression,ElasticNet Regression,Bayesian Ridge Regression,ARD Regression,SGD Regressor,Passive Aggressive Regressor,Support Vector Regression,MLP Regressor,...,FONN1 5 40,FONN2 5 5,FONN2 5 10,FONN2 5 40,TREENN1 5,TREENN1 10,TREENN1 40,TREENN2 5,TREENN2 10,TREENN2 40
mse_StoreWeekSalesfzpizza_6001821,0.832112,0.832764,1.195429,1.195429,0.839103,0.839518,0.837842,1.927508,0.798625,0.708725,...,0.856051,0.827643,0.873153,0.846287,0.925741,1.011545,1.036716,0.924225,0.965875,0.972675
mse_StoreWeekSalesfzpizza_648368,0.41871,0.41857,1.099785,0.856469,0.418505,0.424027,0.416177,1.6729,0.417378,0.373155,...,0.543038,0.427801,0.398206,0.474546,0.526624,0.651182,0.531517,0.402802,0.418292,0.404016
mse_StoreWeekSalesfzpizza_279300,0.601383,0.601604,1.127962,1.076503,0.602718,0.603212,0.601971,1.90953,0.609198,0.599168,...,0.83515,0.644613,0.667811,0.597478,0.807809,0.77923,0.842911,0.616653,0.603429,0.58087
mse_StoreWeekSalesfzpizza_400003,0.615842,0.616041,0.966194,0.966194,0.618851,0.61926,0.617068,1.075662,0.626619,0.572011,...,0.722769,0.621074,0.765832,0.592468,0.762946,0.721731,0.879163,0.628546,0.648808,0.702201
mse_StoreWeekSalesfzpizza_270862,0.771595,0.771566,1.084358,1.082112,0.771936,0.780082,0.771314,1.585301,0.727869,0.736852,...,1.014647,0.78485,0.804308,0.729081,0.917553,0.916481,0.979046,0.945572,0.792381,0.732292
mse_StoreWeekSalesfzpizza_231720,0.823944,0.82399,1.184465,1.184465,0.825204,0.824138,0.822959,1.704419,0.746829,0.657655,...,0.873784,0.798837,0.85691,0.732439,1.0097,0.99961,0.972565,0.804445,0.813119,0.710332
mse_StoreWeekSalesfzpizza_252570,0.722451,0.722363,1.104965,1.104965,0.722029,0.724757,0.720894,1.388543,0.62551,0.572415,...,0.672554,0.694609,0.612159,0.567756,0.906006,0.879627,0.929473,0.732937,0.626158,0.65266
mse_StoreWeekSalesfzpizza_659827,0.411698,0.411111,0.788848,0.665217,0.410199,0.406461,0.410237,2.242455,0.40059,0.330353,...,0.558305,0.531981,0.423327,0.451025,0.587671,0.521485,0.809603,0.407455,0.437452,0.395624
mse_StoreWeekSalesfzpizza_241565,0.687117,0.686883,0.828385,0.828255,0.684457,0.686922,0.685106,1.412354,0.621565,0.679459,...,0.873056,0.808906,0.72367,0.738346,0.973113,0.882891,0.927118,0.721211,0.683749,0.720705
mse_StoreWeekSalesfzpizza_237277,0.691498,0.690716,1.080876,1.054212,0.687523,0.704816,0.680448,3.227107,0.679358,0.59179,...,0.901209,0.630778,0.644022,0.58848,0.795235,0.852738,0.766873,0.6106,0.703248,0.585401
