In [1]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, BayesianRidge, ARDRegression, SGDRegressor, PassiveAggressiveRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, BaggingRegressor, ExtraTreesRegressor, HistGradientBoostingRegressor, StackingRegressor, VotingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

from models import MLP, Ensemble, FONN1, FONN2, TREENN1, TREENN2

In [2]:
# Load the store sales dataset
excel_file = pd.ExcelFile('data/store_sales.xlsx')
sheet_names = excel_file.sheet_names

# Read the data
df = pd.read_excel(excel_file, sheet_name=sheet_names[0])
iri_key_counts = df["IRI_KEY"].value_counts()
iri_keys = iri_key_counts[iri_key_counts > 300].index


features = ["F", "D", "Unit.Price"]
target = "Total.Volume"

df = df[df["IRI_KEY"] == iri_keys[0]]
X = df[features]
y = df[target].values.reshape(-1, 1)

scaler_X = StandardScaler()
X = scaler_X.fit_transform(X)
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y)

sheet_names, iri_keys, X.shape, y.shape

(['StoreWeekSalescarbbev_modify',
  'StoreWeekSalescigets',
  'StoreWeekSalescoffee',
  'StoreWeekSalescoldcer',
  'StoreWeekSalesdeod',
  'StoreWeekSalesdiapers',
  'StoreWeekSalesfactiss',
  'StoreWeekSalesfzdinent',
  'StoreWeekSalesfzpizza',
  'StoreWeekSaleshotdog',
  'StoreWeekSaleslaundet',
  'StoreWeekSalesmargbutr',
  'StoreWeekSalesmayo',
  'StoreWeekSalesmustketc',
  'StoreWeekSalespaptowl',
  'StoreWeekSalespeanbutr',
  'StoreWeekSalesshamp',
  'StoreWeekSalessoup',
  'StoreWeekSalesspagsauc',
  'StoreWeekSalessugarsub',
  'StoreWeekSalestoitisu',
  'StoreWeekSalestoothpa',
  'StoreWeekSalesyogurt',
  'StoreWeekSalesbeer_modify'],
 Index([6001821.0,  648368.0,  279300.0,  400003.0,  270862.0,  231720.0,
         252570.0,  659827.0,  241565.0,  237277.0,  291276.0,  273920.0,
         232633.0,  233246.0,  532639.0,  533864.0],
       dtype='float64', name='IRI_KEY'),
 (313, 3),
 (313, 1))

In [3]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((250, 3), (63, 3), (250, 1), (63, 1))

In [4]:
# Function to train and evaluate a model
def train_evaluate_model(model, X_train, X_test, y_train, y_test):
    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()
    train_time = end_time - start_time

    start_time = time.time()
    predictions = model.predict(X_test)
    end_time = time.time()
    comp_time = end_time - start_time

    r2 = r2_score(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)

    return r2, mae, mse, train_time, comp_time


# Train and evaluate models
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "ElasticNet Regression": ElasticNet(),
    "Bayesian Ridge Regression": BayesianRidge(),
    "ARD Regression": ARDRegression(),
    "SGD Regressor": SGDRegressor(),
    "Passive Aggressive Regressor": PassiveAggressiveRegressor(),
    "Support Vector Regression": SVR(),
    "MLP Regressor": MLPRegressor(hidden_layer_sizes=(100,), max_iter=10000, random_state=42),
    "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting Regressor": GradientBoostingRegressor(random_state=42),
    "XGBoost Regressor": XGBRegressor(random_state=42),
    "AdaBoost Regressor": AdaBoostRegressor(random_state=42),
    "Bagging Regressor": BaggingRegressor(random_state=42),
    "ExtraTrees Regressor": ExtraTreesRegressor(random_state=42),
    "HistGradientBoosting Regressor": HistGradientBoostingRegressor(random_state=42),
    "Stacking Regressor": StackingRegressor(estimators=[
        ('lr', LinearRegression()),
        ('rf', RandomForestRegressor(n_estimators=10, random_state=42))
    ], final_estimator=Ridge()),
    "Voting Regressor": VotingRegressor(estimators=[
        ('lr', LinearRegression()),
        ('rf', RandomForestRegressor(n_estimators=10, random_state=42)),
        ('gb', GradientBoostingRegressor(random_state=42))
    ])
}


In [5]:
# Initialize custom models
input_dim = X_train.shape[1]
hidden_dim = 10
output_dim = 1
learning_rate = 0.0001
epochs = 5000

models["Custom MLP"] = MLP(input_dim, hidden_dim, output_dim,
                           learning_rate=learning_rate, epochs=epochs)
#    learning_rate=0.01, epochs=1000)

num_trees_input = 10
models["FONN1"] = FONN1(input_dim, hidden_dim, output_dim, num_trees_input,
                        learning_rate=learning_rate, epochs=epochs)
# learning_rate=0.01, epochs=1000)
models["Tree-based Predictions (FONN1)"] = models["FONN1"].trees

models["TREENN1"] = TREENN1(input_dim, hidden_dim, output_dim,
                            learning_rate=learning_rate, epochs=epochs)
# learning_rate=0.01, epochs=40000)
models["Tree-based Predictions (TREENN1)"] = models["TREENN1"].trees

num_trees_hidden = 10
models["FONN2"] = FONN2(input_dim, hidden_dim, output_dim, num_trees_hidden,
                        learning_rate=learning_rate, epochs=epochs)
# learning_rate=0.01, epochs=epochs)
models["Tree-based Predictions (FONN2)"] = models["FONN2"].trees

models["TREENN2"] = TREENN2(input_dim, hidden_dim, output_dim,
                            learning_rate=learning_rate, epochs=epochs)
# learning_rate=0.01, epochs=epochs)
models["Tree-based Predictions (TREENN2)"] = models["TREENN2"].trees

models["Ensemble of 10 Trees"] = Ensemble(10)

In [6]:
# Train and evaluate models
results = {}
for name, model in models.items():
    print(name)
    r2, mae, mse, fit_time, comp_time = train_evaluate_model(
        model, X_train, X_test, y_train, y_test)
    results[name] = {"R² Score": r2, "MAE": mae, "MSE": mse,
                     "Train Time (s)": fit_time, "Comp Time (s)": comp_time}

# Convert results to a DataFrame for better visualization
results_df = pd.DataFrame(results).T
results_df

Linear Regression
Ridge Regression
Lasso Regression
ElasticNet Regression
Bayesian Ridge Regression
ARD Regression
SGD Regressor
Passive Aggressive Regressor
Support Vector Regression
MLP Regressor
Random Forest Regressor


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


Gradient Boosting Regressor
XGBoost Regressor
AdaBoost Regressor
Bagging Regressor
ExtraTrees Regressor


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?
  y = column_or_1d(y, warn=True)
  return column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


HistGradientBoosting Regressor
Stacking Regressor
Voting Regressor
Custom MLP
Epoch 0, Loss: 2.332253297947431


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Epoch 200, Loss: 2.039656515571925
Epoch 400, Loss: 1.7695310984913744
Epoch 600, Loss: 1.5228322346979315
Epoch 800, Loss: 1.3011573019062648
Epoch 1000, Loss: 1.105607303388324
Epoch 1200, Loss: 0.9371089580624984
Epoch 1400, Loss: 0.7972627418352238
Epoch 1600, Loss: 0.6868182764293276
Epoch 1800, Loss: 0.6059237012570642
Epoch 2000, Loss: 0.5553057052935357
Epoch 2200, Loss: 0.5267683409072498
Epoch 2400, Loss: 0.506213596716881
Epoch 2600, Loss: 0.4891975572070979
Epoch 2800, Loss: 0.47411277321401774
Epoch 3000, Loss: 0.4606441987814585
Epoch 3200, Loss: 0.4485326725880083
Epoch 3400, Loss: 0.4375890861100146
Epoch 3600, Loss: 0.42770240116006425
Epoch 3800, Loss: 0.4187450539265366
Epoch 4000, Loss: 0.4104768670938656
Epoch 4200, Loss: 0.4027012377637639
Epoch 4400, Loss: 0.3953460406795414
Epoch 4600, Loss: 0.3883465830002699
Epoch 4800, Loss: 0.3816997035417579
FONN1
Epoch 0, Loss: 4.101260590626675
Epoch 200, Loss: 2.905295631524287
Epoch 400, Loss: 2.126013431444516
Epoch 60

Unnamed: 0,R² Score,MAE,MSE,Train Time (s),Comp Time (s)
Linear Regression,0.529052,0.377791,0.213412,0.006936,0.000164
Ridge Regression,0.533819,0.375965,0.211252,0.000772,9e-05
Lasso Regression,-0.080442,0.614635,0.489608,0.000456,6.7e-05
ElasticNet Regression,0.407711,0.434701,0.268399,0.000369,6.2e-05
Bayesian Ridge Regression,0.536318,0.374995,0.21012,0.001589,8.7e-05
ARD Regression,0.53722,0.37564,0.209711,0.001158,6.9e-05
SGD Regressor,0.558037,0.366831,0.200278,0.000514,8.1e-05
Passive Aggressive Regressor,-0.838104,0.737453,0.832947,0.00038,5.4e-05
Support Vector Regression,0.645601,0.304216,0.160598,0.002666,0.000538
MLP Regressor,0.555846,0.327371,0.201271,0.180774,0.00015


In [7]:
# Get and print tree importances
# tree_importances = models["FONN2"].trees.get_tree_importances()