In [10]:
from engine import LassoRegressor, HoltWintersAtomic, HoltWintersPooled
from utils import generate_synthetic_data
import joblib

In [13]:
lasso_df = generate_synthetic_data(model_name="lasso", synthetic_data_config={"m": 100, "n": 20, "sigma": 5, "density": 0.2})
atomic_df = generate_synthetic_data(model_name="atomic", synthetic_data_config={"n" : 20, "time_period" : 750}).reset_index()
pooled_df = generate_synthetic_data(model_name="pooled", synthetic_data_config={"n_models" : 7, "n" : 20, "time_period" : 750}).reset_index()
lasso_df.to_parquet("lasso_df.parquet", index=False)
atomic_df.to_parquet("atomic_df.parquet", index=False)
pooled_df.to_parquet("pooled_df.parquet", index=False)
lasso_df.to_csv("lasso_df.csv", index=False)
atomic_df.to_csv("atomic_df.csv", index=False)
pooled_df.to_csv("pooled_df.csv", index=False)

2023-03-15 20:17:55,397 : MainThread : INFO : epoch: 0
2023-03-15 20:17:55,514 : MainThread : INFO : epoch: 1
2023-03-15 20:17:55,581 : MainThread : INFO : epoch: 2
2023-03-15 20:17:55,641 : MainThread : INFO : epoch: 3
2023-03-15 20:17:55,700 : MainThread : INFO : epoch: 4
2023-03-15 20:17:55,759 : MainThread : INFO : epoch: 5
2023-03-15 20:17:55,817 : MainThread : INFO : epoch: 6
2023-03-15 20:17:55,876 : MainThread : INFO : epoch: 7
2023-03-15 20:17:55,930 : MainThread : INFO : epoch: 8
2023-03-15 20:17:55,984 : MainThread : INFO : epoch: 9
2023-03-15 20:17:56,639 : MainThread : INFO : epoch: 0
2023-03-15 20:17:56,746 : MainThread : INFO : epoch: 1
2023-03-15 20:17:56,799 : MainThread : INFO : epoch: 2
2023-03-15 20:17:56,854 : MainThread : INFO : epoch: 3
2023-03-15 20:17:56,911 : MainThread : INFO : epoch: 4
2023-03-15 20:17:56,964 : MainThread : INFO : epoch: 5
2023-03-15 20:17:57,021 : MainThread : INFO : epoch: 6
2023-03-15 20:17:57,090 : MainThread : INFO : epoch: 7
2023-03-15

In [3]:
# Non-negative Lasso Regression

In [4]:
lasso_model = LassoRegressor(alpha= 1)
lasso_model.fit(lasso_df.drop("y", axis=1), lasso_df["y"])
lasso_df["y_hat"] = lasso_model.predict(lasso_df.drop("y", axis=1))



In [5]:
# Atomic Holt-Winters

In [6]:
atomic_model = HoltWintersAtomic(trend="add", date_col = "date", date_freq = "D", target_col = "yt")
atomic_model.fit(atomic_df[['date', 'yt']])
predicted_df = atomic_model.predict(atomic_df[['date','yt']])
atomic_df.set_index('date').join(predicted_df)[['yt','yt_hat']]

Unnamed: 0_level_0,yt,yt_hat
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-01-01,0.918352,0.514614
2022-01-02,0.561445,0.514585
2022-01-03,0.507681,0.514556
2022-01-04,0.252311,0.514527
2022-01-05,0.822078,0.514498
...,...,...
2024-01-16,0.529256,0.493039
2024-01-17,0.084518,0.493010
2024-01-18,0.634704,0.492981
2024-01-19,0.143717,0.492952


In [7]:
# Pooled Holt-Winters

In [8]:
holt_winters_pooled_model = HoltWintersPooled(trend="add", date_col = "date", date_freq = "D", model_id_col = "model_id", target_col = "yt")
holt_winters_pooled_model.fit(pooled_df[['date', 'yt', 'model_id']], y = None)
predicted_df = holt_winters_pooled_model.predict(pooled_df[['date','yt', 'model_id']])
pooled_df[['date', 'yt', 'model_id']].merge(predicted_df, on = ['date', 'model_id'], how = 'left')

Unnamed: 0,date,yt,model_id,yt_hat
0,2022-01-01,0.233470,0,0.500518
1,2022-01-02,0.588021,0,0.500496
2,2022-01-03,0.924158,0,0.500475
3,2022-01-04,0.142893,0,0.500453
4,2022-01-05,0.308549,0,0.500432
...,...,...,...,...
1495,2024-01-16,0.343628,1,0.484244
1496,2024-01-17,0.283204,1,0.484193
1497,2024-01-18,0.502999,1,0.484142
1498,2024-01-19,0.844140,1,0.484092


In [9]:
holt_winters_pooled_model.models
# https://www.databricks.com/blog/2021/09/21/managing-model-ensembles-with-mlflow.html

{0: HoltWintersAtomic(trend='add'), 1: HoltWintersAtomic(trend='add')}

In [None]:
# Saving all the models using joblib

In [10]:
joblib.dump(lasso_model, "lasso_model.pkl")
joblib.dump(atomic_model, "atomic_model.pkl")
joblib.dump(holt_winters_pooled_model, "holt_winters_pooled_model.pkl")

['holt_winters_pooled_model.pkl']