In [1]:
cd ../

/Users/linafaik/Documents/projects/time-series-forecasting-models


In [2]:
import matplotlib.pyplot as plt

In [3]:
import pandas as pd
import numpy as np
import os

from nixtla import NixtlaClient

from config import *
from src.data_processing import *
from src.metrics import *
from src.training import *
from src.viz import *

%load_ext autoreload
%autoreload 2


In [58]:
dir_output = "output/timegpt"
path_input="output/data_processed.gz"

H = 14 # forecast horizon
n_ts = 200

In [5]:
client = NixtlaClient(api_key=NIXTLA_API_KEY)

df = pd.read_csv(path_input)
df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")

In [6]:
df.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,date,sales,weekday,wm_yr_wk,event_type_1,event_type_2,sell_price,event_sporting,event_cultural,event_national,event_religious
0,FOODS_1_001_CA_1_evaluation,FOODS_1_001,FOODS_1,FOODS,CA_1,CA,2011-01-29,3,Saturday,11101,,,2.0,0,0,0,0
1,FOODS_1_001_CA_1_evaluation,FOODS_1_001,FOODS_1,FOODS,CA_1,CA,2011-01-30,0,Sunday,11101,,,2.0,0,0,0,0
2,FOODS_1_001_CA_1_evaluation,FOODS_1_001,FOODS_1,FOODS,CA_1,CA,2011-01-31,0,Monday,11101,,,2.0,0,0,0,0
3,FOODS_1_001_CA_1_evaluation,FOODS_1_001,FOODS_1,FOODS,CA_1,CA,2011-02-01,1,Tuesday,11101,,,2.0,0,0,0,0
4,FOODS_1_001_CA_1_evaluation,FOODS_1_001,FOODS_1,FOODS,CA_1,CA,2011-02-02,4,Wednesday,11101,,,2.0,0,0,0,0


## Train / Test Split

In [59]:
count_df = df[df["sales"]>0].groupby("id").size()
count_df.describe()

count    30490.000000
mean       621.163365
std        433.631478
min         12.000000
25%        262.000000
50%        518.000000
75%        903.000000
max       1938.000000
dtype: float64

In [None]:
random_ts = np.random.choice(
    df[df['id'].isin(
        count_df[count_df>800].index.unique()
        )]["id"].unique(), 
    size=n_ts)
subset_df = df[df['id'].isin(random_ts)].copy()
train_df, test_df = split_train_test(subset_df, H)

print(f"{len(train_df)} rows for train")
print(f"{len(test_df)} rows for test")

385400 rows for train
2800 rows for test


## Model training

In [61]:
id_col = "id"
time_col = "date"
target_col = "sales"
finetune_steps = 10
finetune_loss = "rmse"
finetune_depth = 3
freq = "D"
model = "timegpt-1-long-horizon"

### Baseline

In [62]:
name_scenario = "baseline"

In [63]:
forecast_df = client.forecast(
    df=train_df[["date", "sales", "id"]],
    id_col=id_col,
    time_col=time_col,
    target_col=target_col,
    h=H,
    freq=freq,
    finetune_steps=finetune_steps,
    finetune_loss=finetune_loss,
    finetune_depth=finetune_depth, 
    model=model
)

forecast_df.head()

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


Unnamed: 0,id,date,TimeGPT
0,FOODS_1_005_WI_1_evaluation,2016-05-09,2.889947
1,FOODS_1_005_WI_1_evaluation,2016-05-10,2.919353
2,FOODS_1_005_WI_1_evaluation,2016-05-11,2.842311
3,FOODS_1_005_WI_1_evaluation,2016-05-12,2.815968
4,FOODS_1_005_WI_1_evaluation,2016-05-13,2.832054


In [64]:
test_with_forecasts_df = (
    test_df
    .merge(forecast_df, on=["id", "date"], how="left")
    .rename(columns={"TimeGPT": "y_pred"})
    )

os.makedirs(dir_output, exist_ok=True)
test_with_forecasts_df.to_csv(os.path.join(dir_output, f"forecasts_{name_scenario}.csv"), index=False)

test_with_forecasts_df = (
    test_with_forecasts_df[test_with_forecasts_df.sales>0]
    .reset_index(drop=True)
    )

scores_df = (
    pd.DataFrame({
        name_scenario: evaluate(test_with_forecasts_df["sales"], test_with_forecasts_df["y_pred"])})
    .T.reset_index()
    .rename(columns={"index":"trial"})
    )
scores_df

Unnamed: 0,trial,MAE,RMSE,MAPE,R2,count
0,baseline,2.012083,11.247075,0.619356,0.475378,2085.0


In [65]:
scores_per_ts_df = test_with_forecasts_df.groupby("id").apply(
    lambda group: pd.Series(evaluate(group["sales"], group["y_pred"]))
).reset_index()
scores_per_ts_df["trial"] = name_scenario
scores_per_ts_df = scores_per_ts_df[["trial"]+[c for c in scores_per_ts_df.columns if c!="trial"]]

scores_per_ts_df


R^2 score is not well-defined with less than two samples.


R^2 score is not well-defined with less than two samples.


R^2 score is not well-defined with less than two samples.





Unnamed: 0,trial,id,MAE,RMSE,MAPE,R2,count
0,baseline,FOODS_1_005_WI_1_evaluation,0.976174,1.373812,0.769113,-1.579713,13.0
1,baseline,FOODS_1_044_CA_3_evaluation,1.111483,1.892178,0.421775,-0.525950,10.0
2,baseline,FOODS_1_048_CA_3_evaluation,1.002412,1.662689,0.395490,-0.939804,7.0
3,baseline,FOODS_1_061_TX_3_evaluation,1.257001,2.998396,0.448217,-1.085841,8.0
4,baseline,FOODS_1_067_CA_3_evaluation,3.984511,21.601195,1.304875,0.010786,14.0
...,...,...,...,...,...,...,...
193,baseline,HOUSEHOLD_2_348_TX_3_evaluation,0.806048,1.801334,0.346434,0.080952,10.0
194,baseline,HOUSEHOLD_2_355_CA_2_evaluation,0.943139,1.820060,0.479909,-0.848949,8.0
195,baseline,HOUSEHOLD_2_437_TX_3_evaluation,0.309156,0.282188,0.177187,-0.505003,8.0
196,baseline,HOUSEHOLD_2_442_CA_1_evaluation,0.762487,0.824073,0.364702,-0.681782,10.0


### With date features

In [66]:
name_scenario = "with_date_feat"

In [67]:
forecast_df = client.forecast(
    df=train_df[["date", "sales", "id"]],
    id_col=id_col,
    time_col=time_col,
    target_col=target_col,
    h=H,
    freq=freq,
    finetune_steps=finetune_steps,
    finetune_loss=finetune_loss,
    finetune_depth=finetune_depth, 
    model=model,
    date_features=True,
    date_features_to_one_hot=False,
)

test_with_forecasts_df = (
    test_df
    .merge(forecast_df, on=["id", "date"], how="left")
    .rename(columns={"TimeGPT": "y_pred"})
    )

test_with_forecasts_df.to_csv(os.path.join(dir_output, f"forecasts_{name_scenario}.csv"), index=False)

test_with_forecasts_df = (
    test_with_forecasts_df[test_with_forecasts_df.sales>0]
    .reset_index(drop=True)
    )

scores_iter_df = (
    pd.DataFrame({
        name_scenario: evaluate(test_with_forecasts_df["sales"], test_with_forecasts_df["y_pred"])})
    .T.reset_index()
    .rename(columns={"index":"trial"})
    )

scores_df = pd.concat([scores_df, scores_iter_df],axis=0).reset_index(drop=True)

scores_per_ts_iter_df = test_with_forecasts_df.groupby("id").apply(
    lambda group: pd.Series(evaluate(group["sales"], group["y_pred"]))
).reset_index()
scores_per_ts_iter_df["trial"] = name_scenario
scores_per_ts_iter_df = scores_per_ts_iter_df[["trial"]+[c for c in scores_per_ts_iter_df.columns if c!="trial"]]
scores_per_ts_df = pd.concat([scores_per_ts_df, scores_per_ts_iter_df],axis=0).reset_index(drop=True)

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Using future exogenous features: ['year', 'month', 'day', 'weekday']
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...

R^2 score is not well-defined with less than two samples.


R^2 score is not well-defined with less than two samples.


R^2 score is not well-defined with less than two samples.





In [68]:
scores_df

Unnamed: 0,trial,MAE,RMSE,MAPE,R2,count
0,baseline,2.012083,11.247075,0.619356,0.475378,2085.0
1,with_date_feat,2.00199,11.272606,0.61056,0.474187,2085.0


### With exog features

In [69]:
name_scenario = "with_exog_feat"

In [70]:
columns_exog_features = ["sell_price", "event_cultural", "event_national", "event_religious", "event_sporting"]

forecast_df = client.forecast(
    df=train_df[["date", "sales", "id"]+columns_exog_features],
    X_df=test_df[["date", "id"]+columns_exog_features],
    id_col=id_col,
    time_col=time_col,
    target_col=target_col,
    h=H,
    freq=freq,
    finetune_steps=finetune_steps,
    finetune_loss=finetune_loss,
    finetune_depth=finetune_depth, 
    model=model,
    date_features=True,
    date_features_to_one_hot=False,
    clean_ex_first=True,
)

test_with_forecasts_df = (
    test_df
    .merge(forecast_df, on=["id", "date"], how="left")
    .rename(columns={"TimeGPT": "y_pred"})
    )

test_with_forecasts_df.to_csv(os.path.join(dir_output, f"forecasts_{name_scenario}.csv"), index=False)

test_with_forecasts_df = (
    test_with_forecasts_df[test_with_forecasts_df.sales>0]
    .reset_index(drop=True)
    )

scores_iter_df = (
    pd.DataFrame({
        name_scenario: evaluate(test_with_forecasts_df["sales"], test_with_forecasts_df["y_pred"])})
    .T.reset_index()
    .rename(columns={"index":"trial"})
    )

scores_df = pd.concat([scores_df, scores_iter_df],axis=0).reset_index(drop=True)

scores_per_ts_iter_df = test_with_forecasts_df.groupby("id").apply(
    lambda group: pd.Series(evaluate(group["sales"], group["y_pred"]))
).reset_index()
scores_per_ts_iter_df["trial"] = name_scenario
scores_per_ts_iter_df = scores_per_ts_iter_df[["trial"]+[c for c in scores_per_ts_iter_df.columns if c!="trial"]]
scores_per_ts_df = pd.concat([scores_per_ts_df, scores_per_ts_iter_df],axis=0).reset_index(drop=True)

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Using future exogenous features: ['sell_price', 'event_cultural', 'event_national', 'event_religious', 'event_sporting', 'year', 'month', 'day', 'weekday']
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...

R^2 score is not well-defined with less than two samples.


R^2 score is not well-defined with less than two samples.


R^2 score is not well-defined with less than two samples.





## Results analysis

In [71]:
scores_df

Unnamed: 0,trial,MAE,RMSE,MAPE,R2,count
0,baseline,2.012083,11.247075,0.619356,0.475378,2085.0
1,with_date_feat,2.00199,11.272606,0.61056,0.474187,2085.0
2,with_exog_feat,1.999467,11.131055,0.645213,0.480789,2085.0


In [72]:
list_metrics = ["RMSE", "MAE", "R2"]
plot_global_scores(scores_df=scores_df, list_metrics=list_metrics)

In [73]:
plot_scores_per_ts(scores_per_ts_df, list_metrics=list_metrics)

In [80]:
# Sort by RMSE (ascending for best, descending for worst)
sorted_scores = scores_per_ts_iter_df.sort_values(by="MAPE")

# Best 5 (lowest RMSE)
best_ids = sorted_scores['id'].head(5).tolist()

plot_forecasts_with_train(
    train_df, test_df, forecast_df,
    scores_per_ts_df = scores_per_ts_iter_df, 
    metric="MAPE",
    ids_to_plot=best_ids
    )


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.




The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.




The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.




The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.




The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



In [81]:
worst_ids = sorted_scores['id'].tail(5).tolist()
plot_forecasts_with_train(
    train_df, test_df, forecast_df,
    scores_per_ts_df = scores_per_ts_iter_df, 
    metric="MAPE",
    ids_to_plot=worst_ids
    )


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.




The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.




The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.




The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.




The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.

