# <font color="red">**4. NO FS - High-freq Forecast**</font>

**Author:** Osmar Bolivar

In [2]:
#%pip install kaleido

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import statsmodels.api as sm
import plotly.graph_objects as go
import plotly.express as px

In [4]:
print(plt.style.available)

['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'petroff10', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']


## **1. Data**

In [36]:
df_d = pd.read_csv('./IPC_forecast/daily_test_set.csv', index_col=0)
df = pd.read_csv('./IPC_forecast/monthly_train_val_sets.csv', index_col=0)
df.dropna(axis=0, inplace=True)

In [37]:
isna = df.isna().sum()
isna[isna > 0]

Series([], dtype: int64)

In [38]:
isna_d = df_d.isna().sum()
isna_d[isna_d > 0]

ipc_all    11
dtype: int64

In [39]:
df.shape, df_d.shape

((183, 132), (5580, 132))

## **2. Train and Val sets**

In [40]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [41]:
train, validation = train_test_split(df, test_size=0.2, random_state=5)

print(f'Obs in train set: {train.shape[0]}; variables in train set: {train.shape[1]}')
print(f'Obs in validation set: {validation.shape[0]}; variables in validation set: {validation.shape[1]}')

Obs in train set: 146; variables in train set: 132
Obs in validation set: 37; variables in validation set: 132


In [42]:
test_d = df_d.copy()
print(f'Obs in daily test set: {test_d.shape[0]}; variables in daily test set: {test_d.shape[1]}')

Obs in daily test set: 5580; variables in daily test set: 132


In [43]:
scaler = StandardScaler()
train_scaled = pd.DataFrame(scaler.fit_transform(train), columns=train.columns, index=train.index)
validation_scaled = pd.DataFrame(scaler.transform(validation), columns=validation.columns, index=validation.index)
test_scaled_d = pd.DataFrame(scaler.transform(test_d), columns=test_d.columns, index=test_d.index)

X_train = train_scaled.drop('ipc_all', axis=1)
y_train = train['ipc_all']

X_validation = validation_scaled.drop('ipc_all', axis=1)
y_validation = validation['ipc_all']

X_test_d = test_scaled_d.drop('ipc_all', axis=1)

## **3. Algorithms**

In [45]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor

### **3.1. Ridge**   

**Without tuning (Validation):**
MSE:  2.209; R2:  0.001; MAE:  1.256

In [46]:
ridge = Ridge(random_state=0)
#ridge = Ridge(alpha=5.3, fit_intercept=True, positive=False, random_state=0)  
# Fit on training set
ridge.fit(X_train, y_train)
# Make predictions on validation set using best model from GridSearchCV
ridge_train_pred = ridge.predict(X_train)
ridge_val_pred = ridge.predict(X_validation)
# Calculate Forecast metrics on train set
mse_train_ridge = mean_squared_error(y_train, ridge_train_pred)
r2_train_ridge = r2_score(y_train, ridge_train_pred)
mae_train_ridge = mean_absolute_error(y_train, ridge_train_pred)
print("Train MSE: ", mse_train_ridge)
print("Train R2: ", r2_train_ridge)
print("Train MAE: ", mae_train_ridge)
# Calculate Forecast metrics on validation set
mse_val_ridge = mean_squared_error(y_validation, ridge_val_pred)
r2_val_ridge = r2_score(y_validation, ridge_val_pred)
mae_val_ridge = mean_absolute_error(y_validation, ridge_val_pred)
print("Validation MSE: ", mse_val_ridge)
print("Validation R2: ", r2_val_ridge)
print("Validation MAE: ", mae_val_ridge)

ridge_test_pred_d = ridge.predict(X_test_d)

Train MSE:  0.03130102948436744
Train R2:  0.9963325409776926
Train MAE:  0.13897595701680582
Validation MSE:  0.1541543986512958
Validation R2:  0.974073594418155
Validation MAE:  0.3069370291931472


In [47]:
coef = ridge.coef_
feature_importance_ridge = pd.DataFrame({'Feature': X_train.columns, 'Importance': coef})
feature_importance_ridge = feature_importance_ridge.sort_values('Importance', ascending=False).reset_index(drop=True)
feature_importance_ridge.columns = ['feat_ridge', 'imp_ridge']
feature_importance_ridge.head(15)

Unnamed: 0,feat_ridge,imp_ridge
0,lag_1,0.690384
1,flour_lp,0.462612
2,oil_cb,0.420036
3,sugar_cb,0.399558
4,lag_6,0.347586
5,redpepper_lp,0.2616
6,beef_lp,0.26107
7,lead,0.254605
8,exchange,0.231147
9,papa2_lp,0.215394


### **3.2. Lasso**
Without tuning:
Validation MSE:  0.796
Validation R2:  0.640
Validation MAE:  0.690

In [48]:
#lasso = Lasso(random_state=0)
lasso = Lasso(alpha=0.1, fit_intercept=True, max_iter=10000, random_state=0)  
# Fit on training set
lasso.fit(X_train, y_train)
# Make predictions on validation set using best model from GridSearchCV
lasso_train_pred = lasso.predict(X_train)
lasso_val_pred = lasso.predict(X_validation)
# Calculate Forecast metrics on train set
mse_train_lasso = mean_squared_error(y_train, lasso_train_pred)
r2_train_lasso = r2_score(y_train, lasso_train_pred)
mae_train_lasso = mean_absolute_error(y_train, lasso_train_pred)
print("Train MSE: ", mse_train_lasso)
print("Train R2: ", r2_train_lasso)
print("Train MAE: ", mae_train_lasso)
# Calculate Forecast metrics on validation set
mse_val_lasso = mean_squared_error(y_validation, lasso_val_pred)
r2_val_lasso = r2_score(y_validation, lasso_val_pred)
mae_val_lasso = mean_absolute_error(y_validation, lasso_val_pred)
print("Validation MSE: ", mse_val_lasso)
print("Validation R2: ", r2_val_lasso)
print("Validation MAE: ", mae_val_lasso)

lasso_test_pred_d = lasso.predict(X_test_d)

Train MSE:  0.24778063879353743
Train R2:  0.970968196437428
Train MAE:  0.38697358210123917
Validation MSE:  0.4114674836516373
Validation R2:  0.9307974799407137
Validation MAE:  0.4848716371363052


In [49]:
coef = lasso.coef_
feature_importance_lasso = pd.DataFrame({'Feature': X_train.columns, 'Importance': coef})
feature_importance_lasso = feature_importance_lasso.sort_values('Importance', ascending=False).reset_index(drop=True)
feature_importance_lasso.columns = ['feat_lasso', 'imp_lasso']
feature_importance_lasso.head(15)

Unnamed: 0,feat_lasso,imp_lasso
0,lag_1,2.383416
1,rice_lp,0.250087
2,sugar_lp,0.217782
3,chicken_sc,0.070634
4,tomato_cb,0.059381
5,silver,0.055337
6,ycorn_cb,0.018349
7,ycorn_sc,0.017621
8,rice2_sc,0.0
9,rice2_lp,0.0


### **3.4. ADA**
Without tuning:
Validation MSE:  0.324
Validation R2:  0.853
Validation MAE:  0.429

In [50]:
#ada = AdaBoostRegressor(random_state=0)
ada = AdaBoostRegressor(learning_rate=0.99, estimator=DecisionTreeRegressor(max_depth=7), random_state=0)
# Fit the model on training data
ada.fit(X_train, y_train)
# Make predictions on the validation set
ada_train_pred = ada.predict(X_train)
ada_val_pred = ada.predict(X_validation)
# Calculate Forecast metrics on train set
mse_train_ada = mean_squared_error(y_train, ada_train_pred)
r2_train_ada = r2_score(y_train, ada_train_pred)
mae_train_ada = mean_absolute_error(y_train, ada_train_pred)
print("Train MSE: ", mse_train_ada)
print("Train R2: ", r2_train_ada)
print("Train MAE: ", mae_train_ada)
# Evaluate the model on the validation set
mse_val_ada = mean_squared_error(y_validation, ada_val_pred)
r2_val_ada = r2_score(y_validation, ada_val_pred)
mae_val_ada = mean_absolute_error(y_validation, ada_val_pred)
print("Validation MSE: ", mse_val_ada)
print("Validation R2: ", r2_val_ada)
print("Validation MAE: ", mae_val_ada)

ada_test_pred_d = ada.predict(X_test_d)

Train MSE:  0.0027247928744719685
Train R2:  0.9996807432095359
Train MAE:  0.02496654517443218
Validation MSE:  0.5042651675991037
Validation R2:  0.9151903327420136
Validation MAE:  0.49415566016956397


In [51]:
feature_importance_ada = pd.DataFrame({'Feature': X_train.columns, 'Importance': ada.feature_importances_})
feature_importance_ada = feature_importance_ada.sort_values(by='Importance', ascending=False).reset_index(drop=True)
feature_importance_ada.columns = ['feat', 'imp_ada']
feature_importance_ada.head(15)

Unnamed: 0,feat,imp_ada
0,lag_1,0.840376
1,sugar_cb,0.059634
2,sugar_lp,0.007386
3,lag_2,0.006682
4,onion2_cb,0.004817
5,ycorn_lp,0.004182
6,onion_sc,0.003994
7,veglard_sc,0.003993
8,pineapple_lp,0.003549
9,peas_lp,0.00354


### **3.5. GBR**
Without tuning:
Validation MSE:  0.277
Validation R2:  0.874
Validation MAE:  0.414

In [52]:
#gbr = GradientBoostingRegressor(random_state=0)
gbr = GradientBoostingRegressor(learning_rate=0.15, random_state=0)
# Fit the model on the training set
gbr.fit(X_train, y_train)
# Make predictions on validation set using best model from GridSearchCV
gbr_train_pred = gbr.predict(X_train)
gbr_val_pred = gbr.predict(X_validation)
# Calculate Forecast metrics on train set
mse_train_gbr = mean_squared_error(y_train, gbr_train_pred)
r2_train_gbr = r2_score(y_train, gbr_train_pred)
mae_train_gbr = mean_absolute_error(y_train, gbr_train_pred)
print("Train MSE: ", mse_train_gbr)
print("Train R2: ", r2_train_gbr)
print("Train MAE: ", mae_train_gbr)
# Calculate Forecast metrics on validation set
mse_val_gbr = mean_squared_error(y_validation, gbr_val_pred)
r2_val_gbr = r2_score(y_validation, gbr_val_pred)
mae_val_gbr = mean_absolute_error(y_validation, gbr_val_pred)
print("Validation MSE: ", mse_val_gbr)
print("Validation R2: ", r2_val_gbr)
print("Validation MAE: ", mae_val_gbr)

gbr_test_pred_d = gbr.predict(X_test_d)

Train MSE:  0.0005007905589175801
Train R2:  0.9999413236917812
Train MAE:  0.018660149056807292
Validation MSE:  0.4492387670434519
Validation R2:  0.9244449293736803
Validation MAE:  0.49826261127678856


In [53]:
feature_importance_gbr = pd.DataFrame({'Feature': X_train.columns, 'Importance': gbr.feature_importances_})
feature_importance_gbr = feature_importance_gbr.sort_values(by='Importance', ascending=False).reset_index(drop=True)
feature_importance_gbr.columns = ['feat_gbr', 'imp_gbr']
feature_importance_gbr.head(15)

Unnamed: 0,feat_gbr,imp_gbr
0,lag_1,0.921831
1,sugar_lp,0.019133
2,onion_lp,0.006179
3,noodle_cb,0.004709
4,ycorn_cb,0.004694
5,tomato_cb,0.004279
6,lag_6,0.003098
7,pineapple_lp,0.002543
8,veglard_cb,0.002531
9,oil_cb,0.002364


### **3.4. RF**
Without tuning:
Validation MSE:  0.324
Validation R2:  0.853
Validation MAE:  0.434

In [54]:
rf = RandomForestRegressor(random_state=0)
#rf = RandomForestRegressor(criterion="absolute_error", max_depth=5, n_estimators=130, random_state=0)

# Fit the model to the training data and make predictions on the validation set
rf.fit(X_train, y_train)
# Make predictions on validation set using best model from GridSearchCV
rf_train_pred = rf.predict(X_train)
rf_val_pred = rf.predict(X_validation)
# Calculate Forecast metrics on train set
mse_train_rf = mean_squared_error(y_train, rf_train_pred)
r2_train_rf = r2_score(y_train, rf_train_pred)
mae_train_rf = mean_absolute_error(y_train, rf_train_pred)
print("Train MSE: ", mse_train_rf)
print("Train R2: ", r2_train_rf)
print("Train MAE: ", mae_train_rf)
# Calculate Forecast metrics on validation set
mse_val_rf = mean_squared_error(y_validation, rf_val_pred)
r2_val_rf = r2_score(y_validation, rf_val_pred)
mae_val_rf = mean_absolute_error(y_validation, rf_val_pred)
print("Validation MSE: ", mse_val_rf)
print("Validation R2: ", r2_val_rf)
print("Validation MAE: ", mae_val_rf)

rf_test_pred_d = rf.predict(X_test_d)

Train MSE:  0.06213851741592749
Train R2:  0.9927193938958571
Train MAE:  0.16951923685495868
Validation MSE:  0.8307523613764257
Validation R2:  0.8602801940939646
Validation MAE:  0.6279140662009839


In [55]:
feature_importance_rf = pd.DataFrame({'Feature': X_train.columns, 'Importance': rf.feature_importances_})
feature_importance_rf = feature_importance_rf.sort_values(by='Importance', ascending=False).reset_index(drop=True)
feature_importance_rf.columns = ['feat', 'imp_rf']
feature_importance_rf.head(30)

Unnamed: 0,feat,imp_rf
0,lag_1,0.906045
1,veglard_sc,0.010024
2,lag_2,0.008788
3,sugar_cb,0.003993
4,onion_lp,0.003001
5,oil2_sc,0.00255
6,oil_cb,0.00236
7,zinc,0.001982
8,chicken_cb,0.001818
9,ufv,0.001715


### **3.5. ET**   
Without tuning:
Validation MSE:  0.322
Validation R2:  0.854
Validation MAE:  0.423

In [56]:
et = ExtraTreesRegressor(random_state=0)
#et = ExtraTreesRegressor(max_depth=9, random_state=0)
# Fit the model to the training data and make predictions on the validation set
et.fit(X_train, y_train)
# Make predictions on validation set using best model from GridSearchCV
et_train_pred = et.predict(X_train)
et_val_pred = et.predict(X_validation)
# Calculate Forecast metrics on train set
mse_train_et = mean_squared_error(y_train, et_train_pred)
r2_train_et = r2_score(y_train, et_train_pred)
mae_train_et = mean_absolute_error(y_train, et_train_pred)
print("Train MSE: ", mse_train_et)
print("Train R2: ", r2_train_et)
print("Train MAE: ", mae_train_et)
# Calculate Forecast metrics on validation set
mse_val_et = mean_squared_error(y_validation, et_val_pred)
r2_val_et = r2_score(y_validation, et_val_pred)
mae_val_et = mean_absolute_error(y_validation, et_val_pred)
print("Validation MSE: ", mse_val_et)
print("Validation R2: ", r2_val_et)
print("Validation MAE: ", mae_val_et)

et_test_pred_d = et.predict(X_test_d)

Train MSE:  4.8718603528800485e-29
Train R2:  1.0
Train MAE:  4.5251093587590074e-15
Validation MSE:  0.3201670952805192
Validation R2:  0.9461528049875457
Validation MAE:  0.40265536193298623


In [57]:
feature_importance_et = pd.DataFrame({'Feature': X_train.columns, 'Importance': et.feature_importances_})
feature_importance_et = feature_importance_et.sort_values(by='Importance', ascending=False).reset_index(drop=True)
feature_importance_et.columns = ['feat', 'imp_et']
feature_importance_et.head(30)

Unnamed: 0,feat,imp_et
0,lag_1,0.451271
1,lag_2,0.216003
2,lag_3,0.070658
3,sugar_cb,0.04091
4,silver,0.021121
5,sugar_lp,0.020545
6,chicken_sc,0.012003
7,sugar_sc,0.008532
8,lag_12,0.007654
9,tin,0.00598


## **4. Report**

In [58]:
val_forecast = pd.DataFrame(
    {'cpi': y_validation,
     'ridge': ridge_val_pred,
     'lasso': lasso_val_pred,
     'ada': ada_val_pred,
     'gbr': gbr_val_pred,
     'rf': rf_val_pred,
     'et': et_val_pred
    }, index=validation.index
)
val_forecast['set'] = 'validation'

day_forecast = pd.DataFrame(
    {'cpi': np.nan,
     'ridge': ridge_test_pred_d,
     'lasso': lasso_test_pred_d,
     'ada': ada_test_pred_d,
     'gbr': gbr_test_pred_d,
     'rf': rf_test_pred_d,
     'et': et_test_pred_d
    }, index=test_d.index
)
day_forecast['set'] = 'day'

all_forecast = pd.concat([val_forecast, day_forecast], axis=0)
all_forecast = all_forecast.sort_index(ascending=True)
all_forecast.tail(40)

Unnamed: 0,cpi,ridge,lasso,ada,gbr,rf,et,set
2025-03-03,,13.751122,13.851688,13.228036,13.299884,13.024483,13.502979,day
2025-03-04,,13.77237,13.852276,13.228036,13.287605,13.024483,13.502979,day
2025-03-05,,13.79164,13.858629,13.228036,13.006561,13.108312,13.500898,day
2025-03-06,,13.872849,13.86032,13.228036,13.006561,13.019093,13.538881,day
2025-03-07,,13.899078,13.867319,13.228036,13.094034,13.015642,13.504206,day
2025-03-08,,13.502061,13.857873,13.228036,13.084173,13.00609,13.490158,day
2025-03-09,,13.500801,13.85781,13.228036,13.084173,13.00609,13.490158,day
2025-03-10,,13.148082,13.855412,13.228036,13.067903,12.921618,13.600461,day
2025-03-11,,13.040946,13.860586,13.228036,13.981431,12.982192,13.663116,day
2025-03-12,,13.301927,13.865616,13.228036,14.009668,13.175259,13.719309,day


In [59]:
metrics = pd.DataFrame(
    {'MSE': [mse_val_ridge, mse_val_lasso, mse_val_ada, mse_val_gbr, mse_val_rf, mse_val_et],
     'R2': [r2_val_ridge, r2_val_lasso, r2_val_ada, r2_val_gbr, r2_val_rf, r2_val_et],
     'MAE': [mae_val_ridge, mae_val_lasso, mae_val_ada, mae_val_gbr, mae_val_rf, mae_val_et]},
    index=['ridge', 'lasso', 'ada', 'gbr', 'rf', 'et']
)
metrics.sort_values(['MSE'], ascending=True, inplace=True)
metrics

Unnamed: 0,MSE,R2,MAE
ridge,0.154154,0.974074,0.306937
et,0.320167,0.946153,0.402655
lasso,0.411467,0.930797,0.484872
gbr,0.449239,0.924445,0.498263
ada,0.504265,0.91519,0.494156
rf,0.830752,0.86028,0.627914


In [60]:
all_forecast['min'] = all_forecast[['ridge', 'lasso', 'ada', 'gbr', 'rf', 'et']].min(axis=1)
all_forecast['max'] = all_forecast[['ridge', 'lasso', 'ada', 'gbr', 'rf', 'et']].max(axis=1)

inv1 = (1/mse_val_ridge)  / 1000
inv2 = (1/mse_val_lasso) / 1000
inv3 = (1/mse_val_ada) / 1000
inv4 = (1/mse_val_gbr) / 1000
inv5 = (1/mse_val_rf) / 1000
inv6 = (1/mse_val_et) / 1000

num_1 = (all_forecast['ridge'] * inv1) + (all_forecast['lasso'] * inv2) + (all_forecast['ada'] * inv3) + (all_forecast['gbr'] * inv4) + (all_forecast['rf'] * inv5) + (all_forecast['et'] * inv6)
den_1 = inv1+inv2+inv3+inv4+inv5+inv6
all_forecast['w_avg'] = num_1 / den_1

num_2 = (all_forecast['ridge'] * inv1) + (all_forecast['et'] * inv6) + (all_forecast['lasso'] * inv2)
den_2 = inv1+inv6+inv2
all_forecast['w_avg_best'] = num_2 / den_2

all_forecast

Unnamed: 0,cpi,ridge,lasso,ada,gbr,rf,et,set,min,max,w_avg,w_avg_best
2010-01-01,,-0.199693,0.465266,0.073078,0.287553,0.357041,0.330907,day,-0.199693,0.465266,0.119384,0.072162
2010-01-02,,-0.100558,0.488982,0.073078,0.304497,0.421485,0.282262,day,-0.100558,0.488982,0.157433,0.117740
2010-01-03,,-0.067680,0.489608,0.073078,0.304497,0.421485,0.288440,day,-0.067680,0.489608,0.170845,0.137182
2010-01-04,,-0.024177,0.495058,0.126990,0.506786,0.446593,0.336407,day,-0.024177,0.506786,0.230014,0.174163
2010-01-05,,-0.304013,0.494714,0.155257,0.490205,0.393022,0.312822,day,-0.304013,0.494714,0.119140,0.017212
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-07,,14.528062,15.080599,14.632839,13.514357,12.898184,13.089919,day,12.898184,15.080599,14.117849,14.266532
2025-04-08,,15.207565,15.111413,14.632839,13.489677,12.859263,13.135451,day,12.859263,15.207565,14.377009,14.650649
2025-04-09,,15.208234,15.110842,14.632839,13.453171,12.858693,12.941625,day,12.858693,15.208234,14.337798,14.600616
2025-04-10,,15.112431,15.086127,14.632839,13.396325,12.811195,12.912505,day,12.811195,15.112431,14.283011,14.536459


In [61]:
all_forecast['cpi'] = df['ipc_all'].reindex(all_forecast.index)
all_forecast.tail(35)

Unnamed: 0,cpi,ridge,lasso,ada,gbr,rf,et,set,min,max,w_avg,w_avg_best
2025-03-08,,13.502061,13.857873,13.228036,13.084173,13.00609,13.490158,day,13.00609,13.857873,13.430839,13.570791
2025-03-09,,13.500801,13.85781,13.228036,13.084173,13.00609,13.490158,day,13.00609,13.85781,13.430361,13.5701
2025-03-10,,13.148082,13.855412,13.228036,13.067903,12.921618,13.600461,day,12.921618,13.855412,13.310769,13.408199
2025-03-11,,13.040946,13.860586,13.228036,13.981431,12.982192,13.663116,day,12.982192,13.981431,13.40357,13.367776
2025-03-12,,13.301927,13.865616,13.228036,14.009668,13.175259,13.719309,day,13.175259,14.009668,13.528243,13.523973
2025-03-13,,13.549508,13.942095,13.228036,14.009668,13.068039,13.764441,day,13.068039,14.009668,13.631593,13.684503
2025-03-14,,14.599824,14.514701,13.228036,14.124598,13.079911,14.007198,day,13.079911,14.599824,14.16062,14.428915
2025-03-15,,14.257023,14.475521,13.228036,14.157258,13.079911,14.112787,day,13.079911,14.475521,14.050815,14.26371
2025-03-16,,14.255637,14.475451,13.228036,14.157258,13.079911,14.112787,day,13.079911,14.475451,14.05029,14.262949
2025-03-17,,15.270475,14.517789,13.228036,14.033639,13.056671,14.152851,day,13.056671,15.270475,14.423175,14.828638


In [62]:
only_validation = all_forecast.query('set == "validation"')

mse_val_w_avg = mean_squared_error(only_validation['cpi'], only_validation['w_avg'])
r2_val_w_avg = r2_score(only_validation['cpi'], only_validation['w_avg'])
mae_val_w_avg = mean_absolute_error(only_validation['cpi'], only_validation['w_avg'])

mse_val_w_avg_best = mean_squared_error(only_validation['cpi'], only_validation['w_avg_best'])
r2_val_w_avg_best = r2_score(only_validation['cpi'], only_validation['w_avg_best'])
mae_val_w_avg_best = mean_absolute_error(only_validation['cpi'], only_validation['w_avg_best'])

metrics_b = pd.DataFrame(
    {'MSE': [mse_val_w_avg, mse_val_w_avg_best],
     'R2': [r2_val_w_avg, r2_val_w_avg_best],
     'MAE': [mae_val_w_avg, mae_val_w_avg_best]},
    index=['w_avg', 'w_avg_best']
)

metrics_all = pd.concat([metrics, metrics_b], axis=0)
metrics_all.sort_values(['MSE'], ascending=True, inplace=True)
metrics_all

Unnamed: 0,MSE,R2,MAE
ridge,0.154154,0.974074,0.306937
w_avg_best,0.16071,0.972971,0.308942
w_avg,0.221785,0.962699,0.349422
et,0.320167,0.946153,0.402655
lasso,0.411467,0.930797,0.484872
gbr,0.449239,0.924445,0.498263
ada,0.504265,0.91519,0.494156
rf,0.830752,0.86028,0.627914


In [63]:
daily_forecast = all_forecast.copy().query('set == "day"')
daily_forecast.index = pd.to_datetime(daily_forecast.index)
daily_forecast.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5580 entries, 2010-01-01 to 2025-04-11
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   cpi         183 non-null    float64
 1   ridge       5580 non-null   float64
 2   lasso       5580 non-null   float64
 3   ada         5580 non-null   float64
 4   gbr         5580 non-null   float64
 5   rf          5580 non-null   float64
 6   et          5580 non-null   float64
 7   set         5580 non-null   object 
 8   min         5580 non-null   float64
 9   max         5580 non-null   float64
 10  w_avg       5580 non-null   float64
 11  w_avg_best  5580 non-null   float64
dtypes: float64(11), object(1)
memory usage: 566.7+ KB


In [66]:
daily_forecast['forecast'] = daily_forecast.groupby(daily_forecast.index.to_period('M'))['ridge'].transform(lambda x: x.expanding().mean())

In [67]:
daily_forecast.tail(30)

Unnamed: 0,cpi,ridge,lasso,ada,gbr,rf,et,set,min,max,w_avg,w_avg_best,forecast
2025-03-13,,13.549508,13.942095,13.228036,14.009668,13.068039,13.764441,day,13.068039,14.009668,13.631593,13.684503,13.583847
2025-03-14,,14.599824,14.514701,13.228036,14.124598,13.079911,14.007198,day,13.079911,14.599824,14.16062,14.428915,13.656417
2025-03-15,,14.257023,14.475521,13.228036,14.157258,13.079911,14.112787,day,13.079911,14.475521,14.050815,14.26371,13.696457
2025-03-16,,14.255637,14.475451,13.228036,14.157258,13.079911,14.112787,day,13.079911,14.475451,14.05029,14.262949,13.731406
2025-03-17,,15.270475,14.517789,13.228036,14.033639,13.056671,14.152851,day,13.056671,15.270475,14.423175,14.828638,13.82194
2025-03-18,,15.297379,14.52204,13.228036,14.070702,12.992993,14.166899,day,12.992993,15.297379,14.436615,14.847635,13.903908
2025-03-19,,15.73451,14.127228,14.632839,13.753601,12.905637,14.050687,day,12.905637,15.73451,14.636461,14.973306,14.000256
2025-03-20,,15.855059,14.211417,14.632839,14.09387,13.001749,14.048793,day,13.001749,15.855059,14.742676,15.054755,14.092996
2025-03-21,,15.778086,14.179519,14.632839,14.063566,13.023981,14.239871,day,13.023981,15.778086,14.741488,15.056412,14.173238
2025-03-22,,15.717071,14.144531,14.632839,14.133481,13.152268,14.297917,day,13.152268,15.717071,14.74209,15.031536,14.243413


#### Daily Forecast

In [70]:
fig2 = go.Figure()
# Add the forecast interval fill
fig2.add_trace(go.Scatter(
    x=daily_forecast.index.tolist() + daily_forecast.index[::-1].tolist(),
    y=daily_forecast["min"].tolist() + daily_forecast["max"][::-1].tolist(),
    fill='toself',
    fillcolor='#ffdabe',
    line=dict(color='rgba(255,255,255,0.5)'),
    hoverinfo="skip",
    showlegend=True,
    name="Forecast Interval"
))
# Add the high-frequency forecast line
fig2.add_trace(go.Scatter(
    x=daily_forecast.index, 
    y=daily_forecast["forecast"], 
    mode='lines', 
    name="Daily Forecast", 
    line=dict(color='#ff7d00', width=1.0),
    #marker=dict(size=1.5)
))
# Add the y-o-y inflation line
fig2.add_trace(go.Scatter(
    x=df.index, 
    y=df["ipc_all"], 
    mode='markers', 
    name="Observed y-o-y Inflation", 
    line=dict(color='#073763', width=0.8),
    marker=dict(size=4)
))
# Update layout for better visualization
fig2.update_layout(
    #title='High-frequency Inflation Forecast and y-o-y Inflation',
    #xaxis_title='Date',
    #yaxis_title='%',
    #legend_title='Legend',
    font=dict(
        family="sans-serif, sans-serif",  
        color="black",
        size=12               
    ),
    template='plotly_white',
    width=1000, 
    height=600,
    legend=dict(
        x=0.65,
        y=0.99,
        bgcolor='rgba(255, 255, 255, 0.8)',
        font=dict(size=15),
        bordercolor='black',
        borderwidth=1
    )
)
# Update x-axis to show dates in the desired format
fig2.update_xaxes(tickformat='%b %Y')
# Show the plot
fig2.show()

# Save the plot as an image
fig2.write_image('./IPC_forecast/F0_Daily_Forecast.png', format='png', scale=5)

In [None]:
#daily_forecast.to_csv('./IPC_forecast/daily_forecast.csv')

#### End