In [1]:
cd ../

/Users/linafaik/Documents/projects/time-series-forecasting-models


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import pandas as pd
import numpy as np
import os

from nixtla import NixtlaClient

from config import *
from src.data_processing import *
from src.metrics import *
from src.training import *
from src.viz import *

%load_ext autoreload
%autoreload 2


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
force = True

In [4]:
client = NixtlaClient(api_key=NIXTLA_API_KEY)

## Data loading

In [5]:
df = pd.read_csv(path_data_processed)
df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d")

df.head()

Unnamed: 0,date,store_id,state_id,sold_quantity,sold_amount,event_type_1,event_type_2,event_sporting,event_cultural,event_national,event_religious
0,2011-01-29,CA_1,CA,4337,10933.16,,,0,0,0,0
1,2011-01-29,CA_2,CA,3494,9101.52,,,0,0,0,0
2,2011-01-29,CA_3,CA,4739,11679.83,,,0,0,0,0
3,2011-01-29,CA_4,CA,1625,4561.59,,,0,0,0,0
4,2011-01-29,TX_1,TX,2556,6586.68,,,0,0,0,0


## Model training

### Baseline

In [6]:
name_scenario = "timegpt_baseline"

In [7]:
path = os.path.join("output", name_scenario, f"forecasts_{name_scenario}.csv")

if force or not os.path.exists(path):
    
    train_df, test_df = split_train_test(
        df=df, 
        horizon=H,
        column_date=time_col,
        column_id=id_col,
        )

    print(f"{len(train_df)} rows for train")
    print(f"{len(test_df)} rows for test")
    
    forecasts_df = client.forecast(
        df=train_df[[time_col, target_col, id_col]],
        id_col=id_col,
        time_col=time_col,
        target_col=target_col,
        h=H,
        freq=freq,
        finetune_steps=10,
        finetune_loss="rmse",
        finetune_depth=3, 
        model="timegpt-1", #"timegpt-1-long-horizon"
    )
    
    forecasts_enr_df = ( 
        test_df
        .merge(forecasts_df, on=[id_col, time_col], how="left")
    )
    
    forecasts_enr_df = pd.concat([train_df, forecasts_enr_df], axis=0).reset_index(drop=True)

    os.makedirs(os.path.join("output", name_scenario), exist_ok=True)
    forecasts_enr_df.to_csv(path, index=False)

else:
    forecasts_enr_df = pd.read_csv(path)

forecasts_enr_df.tail()

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Querying model metadata...


19270 rows for train
140 rows for test


INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


Unnamed: 0,date,store_id,state_id,sold_quantity,sold_amount,event_type_1,event_type_2,event_sporting,event_cultural,event_national,event_religious,TimeGPT
19405,2016-05-18,WI_3,WI,3268,9163.29,,,0,0,0,0,9686.321
19406,2016-05-19,WI_3,WI,3398,9660.13,,,0,0,0,0,9896.385
19407,2016-05-20,WI_3,WI,4126,11982.37,,,0,0,0,0,11178.528
19408,2016-05-21,WI_3,WI,4519,12370.23,,,0,0,0,0,13649.355
19409,2016-05-22,WI_3,WI,4757,13432.85,,,0,0,0,0,13287.627


In [8]:
# Initialize containers:
# - `scores` will store overall evaluation metrics for the model
# - `scores_per_ts` will store evaluation metrics per time series (per ID)
scores = {}
scores_per_ts = []

# Loop through the list of model forecast columns (here only 'TimeLLM')
for column in ['TimeGPT']:
    
    # Filter out rows where the model forecast is missing (NaN)
    forecasts_filtered_df = forecasts_enr_df[forecasts_enr_df[column].notna()]
    
    # Compute global evaluation metrics (e.g., MAE, RMSE) for TimeLLM
    scores[column] = evaluate(
        forecasts_filtered_df[target_col],    # Ground truth
        forecasts_filtered_df[column]         # Model forecast
    )
    
    # Compute evaluation metrics per time series (grouped by id_col)
    scores_per_ts_model_df = (
        forecasts_filtered_df
        .groupby(id_col)
        .apply(lambda group: pd.Series(
            evaluate(group[target_col], group[column])  # Metrics per time series
        ))
        .reset_index()
    )
    
    # Add a column to tag each row with the model name
    scores_per_ts_model_df["model"] = column
    
    # Append only relevant columns to the results list
    scores_per_ts.append(
        scores_per_ts_model_df[["model"] + [c for c in scores_per_ts_model_df.columns if c != "model"]]
    )

# Convert the global scores dictionary into a DataFrame
scores_df = (
    pd.DataFrame(scores).T
    .reset_index()
    .rename(columns={"index": "model"})
)

# Concatenate all time-series-level score DataFrames into a single table
scores_per_ts_df = pd.concat(scores_per_ts, axis=0).reset_index(drop=True)


  forecasts_filtered_df


In [9]:
scores_df

Unnamed: 0,model,MAE,RMSE,MAPE,R2,count
0,TimeGPT,1222.124056,2541648.0,0.08183,0.851037,140.0


### With dates features

In [10]:
name_scenario = "timegpt_with_date_feat"

In [11]:
path = os.path.join("output", name_scenario, f"forecasts_{name_scenario}.csv")

if force or not os.path.exists(path):
    
    train_df, test_df = split_train_test(
        df=df, 
        horizon=H,
        column_date=time_col,
        column_id=id_col,
        )

    print(f"{len(train_df)} rows for train")
    print(f"{len(test_df)} rows for test")
    
    forecasts_df = client.forecast(
        df=train_df[[time_col, target_col, id_col]],
        id_col=id_col,
        time_col=time_col,
        target_col=target_col,
        h=H,
        freq=freq,
        finetune_steps=10,
        finetune_loss="rmse",
        finetune_depth=3, 
        model="timegpt-1", #"timegpt-1-long-horizon"
        date_features=True,
        date_features_to_one_hot=False,
    )
    
    forecasts_enr_df = ( 
        test_df
        .merge(forecasts_df, on=[id_col, time_col], how="left")
    )
    
    forecasts_enr_df = pd.concat([train_df, forecasts_enr_df], axis=0).reset_index(drop=True)

    os.makedirs(os.path.join("output", name_scenario), exist_ok=True)
    forecasts_enr_df.to_csv(path, index=False)

else:
    forecasts_enr_df = pd.read_csv(path)

forecasts_enr_df.tail()

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Using future exogenous features: ['year', 'month', 'day', 'weekday']
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


19270 rows for train
140 rows for test


Unnamed: 0,date,store_id,state_id,sold_quantity,sold_amount,event_type_1,event_type_2,event_sporting,event_cultural,event_national,event_religious,TimeGPT
19405,2016-05-18,WI_3,WI,3268,9163.29,,,0,0,0,0,9450.85
19406,2016-05-19,WI_3,WI,3398,9660.13,,,0,0,0,0,9557.519
19407,2016-05-20,WI_3,WI,4126,11982.37,,,0,0,0,0,10900.216
19408,2016-05-21,WI_3,WI,4519,12370.23,,,0,0,0,0,13038.91
19409,2016-05-22,WI_3,WI,4757,13432.85,,,0,0,0,0,12892.754


In [12]:
# Initialize containers:
# - `scores` will store overall evaluation metrics for the model
# - `scores_per_ts` will store evaluation metrics per time series (per ID)
scores = {}
scores_per_ts = []

# Loop through the list of model forecast columns (here only 'TimeLLM')
for column in ['TimeGPT']:
    
    # Filter out rows where the model forecast is missing (NaN)
    forecasts_filtered_df = forecasts_enr_df[forecasts_enr_df[column].notna()]
    
    # Compute global evaluation metrics (e.g., MAE, RMSE) for TimeLLM
    scores[column] = evaluate(
        forecasts_filtered_df[target_col],    # Ground truth
        forecasts_filtered_df[column]         # Model forecast
    )
    
    # Compute evaluation metrics per time series (grouped by id_col)
    scores_per_ts_model_df = (
        forecasts_filtered_df
        .groupby(id_col)
        .apply(lambda group: pd.Series(
            evaluate(group[target_col], group[column])  # Metrics per time series
        ))
        .reset_index()
    )
    
    # Add a column to tag each row with the model name
    scores_per_ts_model_df["model"] = column
    
    # Append only relevant columns to the results list
    scores_per_ts.append(
        scores_per_ts_model_df[["model"] + [c for c in scores_per_ts_model_df.columns if c != "model"]]
    )

# Convert the global scores dictionary into a DataFrame
scores_df = (
    pd.DataFrame(scores).T
    .reset_index()
    .rename(columns={"index": "model"})
)

# Concatenate all time-series-level score DataFrames into a single table
scores_per_ts_df = pd.concat(scores_per_ts, axis=0).reset_index(drop=True)


  forecasts_filtered_df


In [13]:
scores_df

Unnamed: 0,model,MAE,RMSE,MAPE,R2,count
0,TimeGPT,1216.554253,2699880.0,0.08005,0.841763,140.0


### With TimeGPT long horizon

In [14]:
name_scenario = "timegpt_long_horizon"

In [15]:
path = os.path.join("output", name_scenario, f"forecasts_{name_scenario}.csv")

if force or not os.path.exists(path):
    
    train_df, test_df = split_train_test(
        df=df, 
        horizon=H,
        column_date=time_col,
        column_id=id_col,
        )

    print(f"{len(train_df)} rows for train")
    print(f"{len(test_df)} rows for test")
    
    forecasts_df = client.forecast(
        df=train_df[[time_col, target_col, id_col]],
        id_col=id_col,
        time_col=time_col,
        target_col=target_col,
        h=H,
        freq=freq,
        finetune_steps=10,
        finetune_loss="rmse",
        finetune_depth=3, 
        model="timegpt-1-long-horizon",
        date_features=True,
        date_features_to_one_hot=False,
    )
    
    forecasts_enr_df = ( 
        test_df
        .merge(forecasts_df, on=[id_col, time_col], how="left")
    )
    
    forecasts_enr_df = pd.concat([train_df, forecasts_enr_df], axis=0).reset_index(drop=True)

    os.makedirs(os.path.join("output", name_scenario), exist_ok=True)
    forecasts_enr_df.to_csv(path, index=False)

else:
    forecasts_enr_df = pd.read_csv(path)

forecasts_enr_df.tail()

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Querying model metadata...


19270 rows for train
140 rows for test


INFO:nixtla.nixtla_client:Using future exogenous features: ['year', 'month', 'day', 'weekday']
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


Unnamed: 0,date,store_id,state_id,sold_quantity,sold_amount,event_type_1,event_type_2,event_sporting,event_cultural,event_national,event_religious,TimeGPT
19405,2016-05-18,WI_3,WI,3268,9163.29,,,0,0,0,0,9297.76
19406,2016-05-19,WI_3,WI,3398,9660.13,,,0,0,0,0,9525.98
19407,2016-05-20,WI_3,WI,4126,11982.37,,,0,0,0,0,11045.677
19408,2016-05-21,WI_3,WI,4519,12370.23,,,0,0,0,0,13141.402
19409,2016-05-22,WI_3,WI,4757,13432.85,,,0,0,0,0,12514.386


In [16]:
# Initialize containers:
# - `scores` will store overall evaluation metrics for the model
# - `scores_per_ts` will store evaluation metrics per time series (per ID)
scores = {}
scores_per_ts = []

# Loop through the list of model forecast columns (here only 'TimeLLM')
for column in ['TimeGPT']:
    
    # Filter out rows where the model forecast is missing (NaN)
    forecasts_filtered_df = forecasts_enr_df[forecasts_enr_df[column].notna()]
    
    # Compute global evaluation metrics (e.g., MAE, RMSE) for TimeLLM
    scores[column] = evaluate(
        forecasts_filtered_df[target_col],    # Ground truth
        forecasts_filtered_df[column]         # Model forecast
    )
    
    # Compute evaluation metrics per time series (grouped by id_col)
    scores_per_ts_model_df = (
        forecasts_filtered_df
        .groupby(id_col)
        .apply(lambda group: pd.Series(
            evaluate(group[target_col], group[column])  # Metrics per time series
        ))
        .reset_index()
    )
    
    # Add a column to tag each row with the model name
    scores_per_ts_model_df["model"] = column
    
    # Append only relevant columns to the results list
    scores_per_ts.append(
        scores_per_ts_model_df[["model"] + [c for c in scores_per_ts_model_df.columns if c != "model"]]
    )

# Convert the global scores dictionary into a DataFrame
scores_df = (
    pd.DataFrame(scores).T
    .reset_index()
    .rename(columns={"index": "model"})
)

# Concatenate all time-series-level score DataFrames into a single table
scores_per_ts_df = pd.concat(scores_per_ts, axis=0).reset_index(drop=True)
scores_df

  forecasts_filtered_df


Unnamed: 0,model,MAE,RMSE,MAPE,R2,count
0,TimeGPT,1178.283678,2525547.0,0.078074,0.851981,140.0


### With exogenous features

In [17]:
name_scenario = "timegpt_with_exog_feat"

In [18]:
path = os.path.join("output", name_scenario, f"forecasts_{name_scenario}.csv")

if force or not os.path.exists(path):
    
    train_df, test_df = split_train_test(
        df=df, 
        horizon=H,
        column_date=time_col,
        column_id=id_col,
        )

    print(f"{len(train_df)} rows for train")
    print(f"{len(test_df)} rows for test")

    columns_exog_features = ["event_cultural", "event_national", "event_religious", "event_sporting"]
    
    forecasts_df = client.forecast(
        df=train_df[[time_col, target_col, id_col]+columns_exog_features],
        X_df=test_df[[time_col, target_col, id_col]+columns_exog_features],
        id_col=id_col,
        time_col=time_col,
        target_col=target_col,
        h=H,
        freq=freq,
        finetune_steps=10,
        finetune_loss="rmse",
        finetune_depth=3, 
        model="timegpt-1-long-horizon",
        date_features=True,
        date_features_to_one_hot=False,
    )
    
    forecasts_enr_df = ( 
        test_df
        .merge(forecasts_df, on=[id_col, time_col], how="left")
    )
    
    forecasts_enr_df = pd.concat([train_df, forecasts_enr_df], axis=0).reset_index(drop=True)

    os.makedirs(os.path.join("output", name_scenario), exist_ok=True)
    forecasts_enr_df.to_csv(path, index=False)

else:
    forecasts_enr_df = pd.read_csv(path)

forecasts_enr_df.tail()

INFO:nixtla.nixtla_client:Validating inputs...
INFO:nixtla.nixtla_client:Preprocessing dataframes...
INFO:nixtla.nixtla_client:Using future exogenous features: ['event_cultural', 'event_national', 'event_religious', 'event_sporting', 'year', 'month', 'day', 'weekday']
INFO:nixtla.nixtla_client:Calling Forecast Endpoint...


19270 rows for train
140 rows for test


Unnamed: 0,date,store_id,state_id,sold_quantity,sold_amount,event_type_1,event_type_2,event_sporting,event_cultural,event_national,event_religious,TimeGPT
19405,2016-05-18,WI_3,WI,3268,9163.29,,,0,0,0,0,9562.1
19406,2016-05-19,WI_3,WI,3398,9660.13,,,0,0,0,0,9743.397
19407,2016-05-20,WI_3,WI,4126,11982.37,,,0,0,0,0,11169.068
19408,2016-05-21,WI_3,WI,4519,12370.23,,,0,0,0,0,13329.268
19409,2016-05-22,WI_3,WI,4757,13432.85,,,0,0,0,0,12962.116


In [19]:
# Initialize containers:
# - `scores` will store overall evaluation metrics for the model
# - `scores_per_ts` will store evaluation metrics per time series (per ID)
scores = {}
scores_per_ts = []

# Loop through the list of model forecast columns (here only 'TimeLLM')
for column in ['TimeGPT']:
    
    # Filter out rows where the model forecast is missing (NaN)
    forecasts_filtered_df = forecasts_enr_df[forecasts_enr_df[column].notna()]
    
    # Compute global evaluation metrics (e.g., MAE, RMSE) for TimeLLM
    scores[column] = evaluate(
        forecasts_filtered_df[target_col],    # Ground truth
        forecasts_filtered_df[column]         # Model forecast
    )
    
    # Compute evaluation metrics per time series (grouped by id_col)
    scores_per_ts_model_df = (
        forecasts_filtered_df
        .groupby(id_col)
        .apply(lambda group: pd.Series(
            evaluate(group[target_col], group[column])  # Metrics per time series
        ))
        .reset_index()
    )
    
    # Add a column to tag each row with the model name
    scores_per_ts_model_df["model"] = column
    
    # Append only relevant columns to the results list
    scores_per_ts.append(
        scores_per_ts_model_df[["model"] + [c for c in scores_per_ts_model_df.columns if c != "model"]]
    )

# Convert the global scores dictionary into a DataFrame
scores_df = (
    pd.DataFrame(scores).T
    .reset_index()
    .rename(columns={"index": "model"})
)

# Concatenate all time-series-level score DataFrames into a single table
scores_per_ts_df = pd.concat(scores_per_ts, axis=0).reset_index(drop=True)
scores_df

  forecasts_filtered_df


Unnamed: 0,model,MAE,RMSE,MAPE,R2,count
0,TimeGPT,925.959365,1572503.0,0.062314,0.907838,140.0


## Results analysis

In [28]:
scenario_names = ['timegpt_baseline', 'timegpt_with_date_feat', 'timegpt_long_horizon', 'timegpt_with_exog_feat']
columns_additional = [
    'state_id', 'sold_quantity', 'sold_amount',
    'event_type_1', 'event_type_2', 'event_sporting', 'event_cultural',
    'event_national', 'event_religious'
    ]
columns_key = [time_col, id_col]

for i, name in enumerate(scenario_names):
    path = os.path.join("output",name,f"forecasts_{name}.csv")
    forecasts_sc_df = pd.read_csv(path)
    formatted_name = name.replace("timegpt_", "").replace("_", " ")
    forecasts_sc_df = forecasts_sc_df.rename(columns={"TimeGPT": formatted_name})
    
    if i == 0:
        forecasts_df = forecasts_sc_df
    else:
        forecasts_df = forecasts_df.merge(
            forecasts_sc_df.drop(columns_additional, axis=1),
            on=columns_key
        )
        
forecasts_df.tail()

Unnamed: 0,date,store_id,state_id,sold_quantity,sold_amount,event_type_1,event_type_2,event_sporting,event_cultural,event_national,event_religious,baseline,with date feat,long horizon,with exog feat
19405,2016-05-18,WI_3,WI,3268,9163.29,,,0,0,0,0,9686.321,9450.85,9297.76,9562.1
19406,2016-05-19,WI_3,WI,3398,9660.13,,,0,0,0,0,9896.385,9557.519,9525.98,9743.397
19407,2016-05-20,WI_3,WI,4126,11982.37,,,0,0,0,0,11178.528,10900.216,11045.677,11169.068
19408,2016-05-21,WI_3,WI,4519,12370.23,,,0,0,0,0,13649.355,13038.91,13141.402,13329.268
19409,2016-05-22,WI_3,WI,4757,13432.85,,,0,0,0,0,13287.627,12892.754,12514.386,12962.116


In [48]:
# Initialize two containers:
# - `scores`: to store global evaluation metrics per model
# - `scores_per_ts`: to store evaluation metrics for each time series (by ID) per model
scores = {}
scores_per_ts = []

formatted_names = [name.replace('timegpt_', '').replace('_', ' ') for name in scenario_names]
# Loop through all forecast model columns to evaluate
for column in formatted_names:
    
    # Keep only rows where forecasts from the current model are available (i.e., not NaN)
    forecasts_filtered_df = forecasts_df[forecasts_df[column].notna()]
    
    # Compute overall metrics (e.g., MAE, RMSE, sMAPE) between actual values and model forecasts
    scores[column] = evaluate(
        forecasts_filtered_df[target_col],     # Ground truth
        forecasts_filtered_df[column]          # Forecasted values from the current model
    )
    
    # Compute metrics for each individual time series (grouped by id_col)
    scores_per_ts_model_df = (
        forecasts_filtered_df
        .groupby(id_col)
        .apply(lambda group: pd.Series(
            evaluate(group[target_col], group[column])   # Evaluate per time series
        ))
        .reset_index()
    )
    
    # Add model name to identify which model each row of metrics corresponds to
    scores_per_ts_model_df["model"] = column
    
    # Select columns in the right order and append to the list of all per-series scores
    scores_per_ts.append(
        scores_per_ts_model_df[["model"] + [c for c in scores_per_ts_model_df.columns if c != "model"]]
    )

# Convert the global metrics dictionary to a DataFrame with one row per model
scores_df = (
    pd.DataFrame(scores).T
    .reset_index()
    .rename(columns={"index": "model"})
)

# Concatenate the list of per-time-series score DataFrames into a single DataFrame
scores_per_ts_df = pd.concat(scores_per_ts, axis=0).reset_index(drop=True)












In [31]:
list_metrics = ["MAPE"]
plot_global_scores(scores_df=scores_df, list_metrics=list_metrics)

In [33]:
plot_scores_per_ts(scores_per_ts_df, column_id=id_col, list_metrics=list_metrics)

In [49]:
rnd_id = np.random.choice(scores_per_ts_df[id_col].unique())

plot_forecasts_uid(
    uid=rnd_id,
    forecasts_df=forecasts_df, 
    column_id=id_col,
    column_date=time_col,
    column_target=target_col,
    scores_per_ts_df=scores_per_ts_df,
    metric="MAPE",
    as_percentage=True,
    map_columns_forecasts={c: c for c in formatted_names},
    train_tail=30,
    )



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns