In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

from pycaret.regression import *

In [47]:
root = os.path.join('..', 'data', 'imputed')
folder = 'mice'
df = pd.read_csv(os.path.join(root, f'{folder}_df.csv'))

In [48]:
df['date'] = pd.to_datetime(df['date'])

In [49]:
df = df[['day', 'date', 'zone_a_mwr_patients', 'zone_a_mwr_cat_3',
       'zone_a_mwr_cat_4', 'zone_a_mwr_sets_of_emews',
       'zone_a_mwr_deescalations', 'zone_a_mwr_escalations']]

In [50]:
df.columns

Index(['day', 'date', 'zone_a_mwr_patients', 'zone_a_mwr_cat_3',
       'zone_a_mwr_cat_4', 'zone_a_mwr_sets_of_emews',
       'zone_a_mwr_deescalations', 'zone_a_mwr_escalations'],
      dtype='object')

## Load Model (optional)

In [51]:
# pipeline_path = '../pipelines/base_models/'
# if not os.path.exists(pipeline_path):
#     print("Path to pipeline does not exist")
# else:
#     model_names_to_load = ['br', 'lasso', 'llar', 'en', 'huber']

#     created_models = {}

#     for name in model_names_to_load:
#         created_models[name] = load_model(os.path.join(pipeline_path, name)).named_steps['trained_model']
    
#     print(created_models)

## Pycaret Setup

In [52]:
s = setup(df, target='zone_a_mwr_sets_of_emews', session_id=123, train_size=0.8)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,zone_a_mwr_sets_of_emews
2,Target type,Regression
3,Original data shape,"(599, 8)"
4,Transformed data shape,"(599, 16)"
5,Transformed train set shape,"(479, 16)"
6,Transformed test set shape,"(120, 16)"
7,Numeric features,5
8,Date features,1
9,Categorical features,1


In [53]:
compare_models(exclude=['lightgbm'], errors='raise')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
en,Elastic Net,11.4296,295.5266,16.8348,0.5566,0.3593,0.1563,0.017
lasso,Lasso Regression,11.4418,297.8158,16.8968,0.5534,0.3588,0.1565,0.016
llar,Lasso Least Angle Regression,11.4421,297.8347,16.8973,0.5534,0.3588,0.1565,0.015
huber,Huber Regressor,11.4794,299.8378,16.973,0.5489,0.3199,0.1568,0.019
br,Bayesian Ridge,11.5602,300.9829,17.011,0.5483,0.3635,0.1581,0.015
omp,Orthogonal Matching Pursuit,11.6805,308.1476,17.2126,0.5409,0.3575,0.1587,0.015
ridge,Ridge Regression,11.8303,310.8433,17.3414,0.5315,0.3723,0.1615,0.015
lar,Least Angle Regression,11.8385,311.1054,17.3498,0.531,0.3725,0.1616,0.015
lr,Linear Regression,11.8385,311.1054,17.3498,0.531,0.3725,0.1616,0.02
catboost,CatBoost Regressor,11.5134,329.5835,17.735,0.5177,0.2972,0.161,0.297


## Custom Metrics

In [54]:
from sklearn.metrics import r2_score, mean_squared_error

def r2_rounded(y_true, y_pred):
    """Calculates R2 score after rounding predictions to the nearest whole number."""
    return r2_score(y_true, np.round(y_pred))

def rmse_rounded(y_true, y_pred):
    """Calculates RMSE after rounding predictions to the nearest whole number."""
    return np.sqrt(mean_squared_error(y_true, np.round(y_pred)))

def r2_ceil(y_true, y_pred):
    """Calculates R2 score after ceiling predictions to the nearest whole number."""
    return r2_score(y_true, np.ceil(y_pred))

def rmse_ceil(y_true, y_pred):
    """Calculates RMSE after ceiling predictions to the nearest whole number."""
    return np.sqrt(mean_squared_error(y_true, np.ceil(y_pred)))

In [55]:
add_metric('R2_Rounded', 'R2_RND', r2_rounded, greater_is_better=True)
add_metric('RMSE_Rounded', 'RMSE_RND', rmse_rounded, greater_is_better=False)
add_metric('R2_Ceil', 'R2_CEIL', r2_ceil, greater_is_better=True)
add_metric('RMSE_Ceil', 'RMSE_CEIL', rmse_ceil, greater_is_better=False);

## Analyze Model

In [56]:
model_names_to_create = ['br', 'lasso', 'llar', 'en', 'huber']
created_models = {}

for model_name in model_names_to_create:
    model_object = create_model(model_name, verbose=False)
    created_models[model_name] = model_object

### Bayesian Ridge

In [57]:
evaluate_model(created_models['br'])

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

### Lasso Regression

In [58]:
# evaluate_model(created_models['lasso'])

### Lasso Least Angle Regression

In [59]:
# evaluate_model(created_models['llar'])

### Elastic Net

In [60]:
# evaluate_model(created_models['en'])

### Huber Regressor

In [61]:
# evaluate_model(created_models['huber'])

## Prediction

In [62]:
holdout_predictions_metric = {}

for model_name, model_object in created_models.items():
    predict_model(model_object, verbose=False)
    holdout_predictions_metric[model_name] = pull()

In [63]:
# pipeline_path = '../pipelines/base_models/'
# os.makedirs(pipeline_path, exist_ok=True)

# for model_name, model_object in created_models.items():
#     save_model(model_object, os.path.join(pipeline_path, model_name));

## Tuning

In [64]:
# param_grids = {
#     'br': {
#         'max_iter': np.arange(50, 301, 50),
#         'alpha_1': np.logspace(-8, -3, 6),
#         'lambda_1': np.logspace(-8, -3, 6),
#         'alpha_2': np.logspace(-8, -3, 6),
#         'lambda_2': np.logspace(-8, -3, 6)
#     },
#     'lasso': {
#         'alpha': np.logspace(-4, 1, 6),
#         'selection': ['cyclic', 'random']
#     },
#     'llar': {
#         'alpha': np.logspace(-4, 1, 6),
#         'jitter': list(np.logspace(-7, -4, 4)) + [None]
#     },
#     'en': {
#         'alpha': np.logspace(-4, 1, 6),
#         'l1_ratio': np.linspace(0, 1, 9),
#         'max_iter': np.arange(100, 1001, 100),
#         'selection': ['cyclic', 'random']
#     },
#     'huber': {
#         'epsilon': [1.0, 1.1, 1.2, 1.25, 1.3, 1.35, 1.4, 1.5, 2.0],
#         'max_iter': np.arange(10, 101, 10),
#         'alpha': np.logspace(-6, -2, 5)
#     }
# }

In [65]:
tuned_models = {}

for model_name, model_object in created_models.items():
    print(f"Tuning model: {model_name}")
    tuned_models[model_name] = tune_model(
        model_object,
        search_library='scikit-optimize',
        n_iter=20,
        early_stopping=True
    )
    print("\r")

Tuning model: br


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE,R2_RND,RMSE_RND,R2_CEIL,RMSE_CEIL
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,10.5224,214.8553,14.6579,0.7133,0.5811,0.1748,0.0,-0.0,0.0,-0.0
1,11.7092,297.121,17.2372,0.6466,0.2764,0.1929,0.0,-0.0,0.0,-0.0
2,15.3663,569.0316,23.8544,0.4197,0.4858,0.1958,0.0,-0.0,0.0,-0.0
3,10.0138,199.0476,14.1084,0.6713,0.4018,0.1096,0.0,-0.0,0.0,-0.0
4,12.586,327.053,18.0846,0.6777,0.2038,0.1518,0.0,-0.0,0.0,-0.0
5,12.4921,291.182,17.0641,0.2597,0.1989,0.1539,0.0,-0.0,0.0,-0.0
6,12.1715,282.6883,16.8133,0.6379,0.4293,0.1573,0.0,-0.0,0.0,-0.0
7,7.2558,112.8456,10.6229,0.699,0.3808,0.0932,0.0,-0.0,0.0,-0.0
8,11.7842,437.2937,20.9116,0.1884,0.2635,0.1828,0.0,-0.0,0.0,-0.0
9,11.4157,266.5891,16.3276,0.5922,0.4045,0.164,0.0,-0.0,0.0,-0.0


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for eac

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE,R2_RND,RMSE_RND,R2_CEIL,RMSE_CEIL
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,9.9525,193.9179,13.9254,0.7412,0.5643,0.1691,0.0,-0.0,0.0,-0.0
1,11.7547,300.8501,17.345,0.6421,0.2741,0.19,0.0,-0.0,0.0,-0.0
2,15.5538,585.5116,24.1973,0.4029,0.4823,0.1965,0.0,-0.0,0.0,-0.0
3,9.9466,198.7801,14.0989,0.6717,0.4011,0.1085,0.0,-0.0,0.0,-0.0
4,12.6425,316.7639,17.7979,0.6878,0.2044,0.1533,0.0,-0.0,0.0,-0.0
5,12.4288,290.2013,17.0353,0.2622,0.1982,0.1531,0.0,-0.0,0.0,-0.0
6,12.1448,284.7478,16.8745,0.6353,0.4293,0.1572,0.0,-0.0,0.0,-0.0
7,7.1784,112.8317,10.6222,0.699,0.3738,0.0921,0.0,-0.0,0.0,-0.0
8,11.399,427.5585,20.6775,0.2065,0.2608,0.1776,0.0,-0.0,0.0,-0.0
9,11.3353,262.7252,16.2088,0.5981,0.3996,0.1636,0.0,-0.0,0.0,-0.0


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for eac

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE,R2_RND,RMSE_RND,R2_CEIL,RMSE_CEIL
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,10.1941,201.6088,14.1989,0.7309,0.5691,0.1715,0.0,-0.0,0.0,-0.0
1,11.7353,299.8234,17.3154,0.6434,0.2756,0.1919,0.0,-0.0,0.0,-0.0
2,15.4446,576.4397,24.0092,0.4121,0.4826,0.1963,0.0,-0.0,0.0,-0.0
3,9.8524,194.4009,13.9428,0.6789,0.3999,0.1078,0.0,-0.0,0.0,-0.0
4,12.4767,311.1516,17.6395,0.6933,0.2023,0.1512,0.0,-0.0,0.0,-0.0
5,12.4279,291.3689,17.0695,0.2592,0.1983,0.1528,0.0,-0.0,0.0,-0.0
6,12.1816,288.5449,16.9866,0.6304,0.429,0.1577,0.0,-0.0,0.0,-0.0
7,7.1554,111.1567,10.5431,0.7035,0.3707,0.0917,0.0,-0.0,0.0,-0.0
8,11.4554,432.6876,20.8011,0.1969,0.2625,0.1792,0.0,-0.0,0.0,-0.0
9,11.4972,271.1647,16.4671,0.5852,0.3983,0.1651,0.0,-0.0,0.0,-0.0


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for eac

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE,R2_RND,RMSE_RND,R2_CEIL,RMSE_CEIL
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,10.1199,198.2761,14.0811,0.7354,0.5717,0.171,0.0,-0.0,0.0,-0.0
1,11.7186,298.4728,17.2764,0.645,0.2735,0.19,0.0,-0.0,0.0,-0.0
2,15.4287,571.5592,23.9073,0.4171,0.484,0.1961,0.0,-0.0,0.0,-0.0
3,9.9852,198.2408,14.0798,0.6726,0.4032,0.1092,0.0,-0.0,0.0,-0.0
4,12.571,319.4824,17.8741,0.6851,0.2041,0.1524,0.0,-0.0,0.0,-0.0
5,12.4486,290.7762,17.0522,0.2607,0.1987,0.1534,0.0,-0.0,0.0,-0.0
6,12.1549,281.7981,16.7868,0.6391,0.4299,0.1574,0.0,-0.0,0.0,-0.0
7,7.2145,113.0373,10.6319,0.6985,0.3755,0.0926,0.0,-0.0,0.0,-0.0
8,11.5422,429.0484,20.7135,0.2037,0.2609,0.179,0.0,-0.0,0.0,-0.0
9,11.1822,256.3148,16.0098,0.6079,0.3983,0.1615,0.0,-0.0,0.0,-0.0


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for eac

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE,R2_RND,RMSE_RND,R2_CEIL,RMSE_CEIL
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,9.9036,193.8363,13.9225,0.7413,0.515,0.1638,0.0,-0.0,0.0,-0.0
1,12.285,306.962,17.5203,0.6349,0.2902,0.2126,0.0,-0.0,0.0,-0.0
2,14.6654,508.5266,22.5505,0.4814,0.4492,0.1894,0.0,-0.0,0.0,-0.0
3,10.3932,210.6811,14.5149,0.652,0.3493,0.115,0.0,-0.0,0.0,-0.0
4,12.3938,296.5953,17.2219,0.7077,0.2067,0.1532,0.0,-0.0,0.0,-0.0
5,12.5739,300.5407,17.3361,0.2359,0.1995,0.1534,0.0,-0.0,0.0,-0.0
6,11.9817,277.3241,16.6531,0.6448,0.377,0.1558,0.0,-0.0,0.0,-0.0
7,7.0117,102.5824,10.1283,0.7264,0.3163,0.09,0.0,-0.0,0.0,-0.0
8,11.2686,409.0657,20.2254,0.2408,0.2565,0.1757,0.0,-0.0,0.0,-0.0
9,11.6391,286.1125,16.9149,0.5623,0.351,0.1648,0.0,-0.0,0.0,-0.0


Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for eac

In [66]:
# pipeline_path = '../pipelines/tuned_models/'
# if not os.path.exists(pipeline_path):
#     print("Path to pipeline does not exist")
# else:
#     model_names_to_load = ['br', 'lasso', 'llar', 'en', 'huber']

#     tuned_models = {}

#     for name in model_names_to_load:
#         tuned_models[name] = load_model(os.path.join(pipeline_path, name)).named_steps['trained_model']
    
#     print(tuned_models)

In [67]:
# pipeline_path = '../pipelines/tuned_models/'
# os.makedirs(pipeline_path, exist_ok=True)

# for model_name, model_object in tuned_models.items():
#     save_model(model_object, os.path.join(pipeline_path, model_name));

In [68]:
tuning_predictions_metric = {}

for model_name, model_object in tuned_models.items():
    predict_model(model_object, verbose=False)
    tuning_predictions_metric[model_name] = pull()

In [69]:
list_of_metric_dfs = []

file_name = 'temp_excel.xlsx'

# Iterate through the dictionary and append each DataFrame to the list
for model_name, metrics_df in holdout_predictions_metric.items():
    list_of_metric_dfs.append(metrics_df)

# Concatenate all DataFrames into a single one
print("--- Holdout Predictions DataFrame ---", end='\n\n')
results_df = pd.concat(list_of_metric_dfs, ignore_index=True)
print(results_df.to_string())

with pd.ExcelWriter(file_name) as writer:
    results_df.to_excel(writer, sheet_name='Base Model Metrics', index=False)

list_of_metric_dfs = []
for model_name, metrics_df in tuning_predictions_metric.items():
    list_of_metric_dfs.append(metrics_df)

print("\n--- Tuned Predictions DataFrame ---", end='\n\n')
results_df = pd.concat(list_of_metric_dfs, ignore_index=True)
print(results_df.to_string())

with pd.ExcelWriter(file_name, mode='a') as writer:
    results_df.to_excel(writer, sheet_name='Tuned Model Metrics', index=False)

--- Holdout Predictions DataFrame ---

                          Model      MAE       MSE     RMSE      R2   RMSLE    MAPE  R2_RND  RMSE_RND  R2_CEIL  RMSE_CEIL
0                Bayesian Ridge  10.2324  222.7322  14.9242  0.7350  0.3492  0.1247  0.7352   14.9189   0.7372    14.8621
1              Lasso Regression  10.1560  222.6730  14.9222  0.7351  0.3455  0.1237  0.7348   14.9315   0.7388    14.8158
2  Lasso Least Angle Regression  10.1556  222.6626  14.9219  0.7351  0.3454  0.1237  0.7348   14.9315   0.7388    14.8158
3                   Elastic Net  10.1909  222.9116  14.9302  0.7348  0.3456  0.1241  0.7350   14.9234   0.7362    14.8896
4               Huber Regressor   9.8681  215.9528  14.6953  0.7431  0.2657  0.1211  0.7418   14.7312   0.7462    14.6065

--- Tuned Predictions DataFrame ---

                          Model      MAE       MSE     RMSE      R2   RMSLE    MAPE  R2_RND  RMSE_RND  R2_CEIL  RMSE_CEIL
0                Bayesian Ridge  10.2210  222.7694  14.9255  0.7350  