In [1]:
import sys
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/My Drive/Projetos/ZAAI
sys.path.append('/content/drive/My Drive/Projetos/ZAAI/')
%pip install -r requirements.txt

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Projetos/ZAAI
Collecting git+https://github.com/amazon-science/chronos-forecasting.git (from -r requirements.txt (line 1))
  Cloning https://github.com/amazon-science/chronos-forecasting.git to /tmp/pip-req-build-d05r4qec
  Running command git clone --filter=blob:none --quiet https://github.com/amazon-science/chronos-forecasting.git /tmp/pip-req-build-d05r4qec
  Resolved https://github.com/amazon-science/chronos-forecasting.git to commit b4e8085c7fa162002574e8a86d50174a28e3e95c
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


# Load Libraries and set global variables

In [2]:
%load_ext autoreload
%autoreload 2
from darts.dataprocessing.transformers import StaticCovariatesTransformer, MissingValuesFiller
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.utils.likelihood_models import QuantileRegression
from darts.dataprocessing.transformers import Scaler
from darts.dataprocessing.pipeline import Pipeline
from chronos import ChronosPipeline
from darts.models import TiDEModel
from darts import TimeSeries
import pandas as pd
import numpy as np
import torch
import utils

TIME_COL = "Date"
TARGET = "Weekly_Sales"
RES_TARGET = "residuals"
STATIC_COV = ["Store", "Dept", "Type", "Size"]
DYNAMIC_COV_FILL_0 = ["IsHoliday", 'MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5']
DYNAMIC_COV_FILL_INTERPOLATE = ['Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
FREQ = "W-FRI"

SCALER = Scaler()
TRANSFORMER = StaticCovariatesTransformer()
PIPELINE = Pipeline([SCALER, TRANSFORMER])

FORECAST_HORIZON = 17 # number of weeks to forecast
TOP_STORES = 500 # number of top stores to forecast

CHRONOS_ARCHITECTURE = ("amazon/chronos-t5-tiny", "cpu")
# CHRONOS_ARCHITECTURE = ("amazon/chronos-t5-large","cuda")
# CHRONOS_ARCHITECTURE = ("amazon/chronos-t5-tiny","cuda")

# Load Datasets for TiDE and Chronos sales forecast

In [3]:
# load data and exogenous features
df = pd.read_csv('data/train.csv')
store_info = pd.read_csv('data/stores.csv')
exo_feat = pd.read_csv('data/features.csv').drop(columns='IsHoliday')

# join all data frames
df = pd.merge(df, store_info, on=['Store'], how='left')
df = pd.merge(df, exo_feat, on=['Store', TIME_COL], how='left')

# create unique id
df["unique_id"] = df['Store'].astype(str)+'-'+df['Dept'].astype(str)

print(f"Distinct number of time series: {len(df['unique_id'].unique())}")
df

Distinct number of time series: 3331


Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday,Type,Size,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,unique_id
0,1,1,2010-02-05,24924.50,False,A,151315,42.31,2.572,,,,,,211.096358,8.106,1-1
1,1,1,2010-02-12,46039.49,True,A,151315,38.51,2.548,,,,,,211.242170,8.106,1-1
2,1,1,2010-02-19,41595.55,False,A,151315,39.93,2.514,,,,,,211.289143,8.106,1-1
3,1,1,2010-02-26,19403.54,False,A,151315,46.63,2.561,,,,,,211.319643,8.106,1-1
4,1,1,2010-03-05,21827.90,False,A,151315,46.50,2.625,,,,,,211.350143,8.106,1-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421565,45,98,2012-09-28,508.37,False,B,118221,64.88,3.997,4556.61,20.64,1.50,1601.01,3288.25,192.013558,8.684,45-98
421566,45,98,2012-10-05,628.10,False,B,118221,64.89,3.985,5046.74,,18.82,2253.43,2340.01,192.170412,8.667,45-98
421567,45,98,2012-10-12,1061.02,False,B,118221,54.47,4.000,1956.28,,7.89,599.32,3990.54,192.327265,8.667,45-98
421568,45,98,2012-10-19,760.01,False,B,118221,56.47,3.969,2004.02,,3.18,437.73,1537.49,192.330854,8.667,45-98


## Pre-process dataset

In [4]:
df[TIME_COL] = pd.to_datetime(df[TIME_COL])
df[TARGET] = np.where(df[TARGET] < 0, 0, df[TARGET]) # remove negative values
df[['MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4','MarkDown5']] = df[['MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4','MarkDown5']].fillna(0) # fill missing values with nan
df["IsHoliday"] = df["IsHoliday"]*1 # convert boolean into binary
df["Size"] = np.where(df["Size"] < store_info["Size"].quantile(0.25), "small",
                np.where(df["Size"] > store_info["Size"].quantile(0.75), "large",
                "medium")) # make size a categorical variable

top_stores = df.groupby(['unique_id']).agg({TARGET: 'sum'}).reset_index().sort_values(by=TARGET, ascending=False).head(TOP_STORES)
df = df[df['unique_id'].isin(top_stores['unique_id'])]

print(f"Distinct number of time series: {len(df['unique_id'].unique())}")
df

Distinct number of time series: 500


Unnamed: 0,Store,Dept,Date,Weekly_Sales,IsHoliday,Type,Size,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,Unemployment,unique_id
143,1,2,2010-02-05,50605.27,0,A,medium,42.31,2.572,0.00,0.00,0.00,0.00,0.00,211.096358,8.106,1-2
144,1,2,2010-02-12,44682.74,1,A,medium,38.51,2.548,0.00,0.00,0.00,0.00,0.00,211.242170,8.106,1-2
145,1,2,2010-02-19,47928.89,0,A,medium,39.93,2.514,0.00,0.00,0.00,0.00,0.00,211.289143,8.106,1-2
146,1,2,2010-02-26,44292.87,0,A,medium,46.63,2.561,0.00,0.00,0.00,0.00,0.00,211.319643,8.106,1-2
147,1,2,2010-03-05,48397.98,0,A,medium,46.50,2.625,0.00,0.00,0.00,0.00,0.00,211.350143,8.106,1-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421285,45,95,2012-09-28,49380.11,0,B,medium,64.88,3.997,4556.61,20.64,1.50,1601.01,3288.25,192.013558,8.684,45-95
421286,45,95,2012-10-05,50241.01,0,B,medium,64.89,3.985,5046.74,0.00,18.82,2253.43,2340.01,192.170412,8.667,45-95
421287,45,95,2012-10-12,49334.77,0,B,medium,54.47,4.000,1956.28,0.00,7.89,599.32,3990.54,192.327265,8.667,45-95
421288,45,95,2012-10-19,48434.97,0,B,medium,56.47,3.969,2004.02,0.00,3.18,437.73,1537.49,192.330854,8.667,45-95


# Training with the whole dataset until the start of the window, and forecast for each window

In [5]:
window1_start=pd.to_datetime('2012-01-20')
window1=(window1_start,window1_start + pd.Timedelta(weeks=FORECAST_HORIZON))

window2_start = pd.to_datetime('2012-03-30')
window2=(window2_start,window2_start + pd.Timedelta(weeks=FORECAST_HORIZON))

window3_start = pd.to_datetime('2012-06-08')
window3=(window3_start,window3_start + pd.Timedelta(weeks=FORECAST_HORIZON))

window4_start = pd.to_datetime('2012-08-17')
window4=(window4_start,window4_start + pd.Timedelta(weeks=FORECAST_HORIZON))

windows = [window1, window2, window3, window4]
predictions = []

In [6]:
window = (window1[0], window1[1])

## Split Data

In [7]:
train = df[(df[TIME_COL] <= window[0])]
test = df[(df[TIME_COL] > window[0]) & (df[TIME_COL] <= window[1])]
df = df[(df[TIME_COL] <= window[1])]

train_darts = TimeSeries.from_group_dataframe(
    df=train,
    group_cols=STATIC_COV,
    time_col=TIME_COL,
    value_cols=TARGET,
    freq=FREQ,
    fill_missing_dates=True,
    fillna_value=0)

print(f"Weeks for training: {len(train[TIME_COL].unique())} from {min(train[TIME_COL]).date()} to {max(train[TIME_COL]).date()}")
print(f"Weeks for testing: {len(test[TIME_COL].unique())} from {min(test[TIME_COL]).date()} to {max(test[TIME_COL]).date()}")

Weeks for training: 103 from 2010-02-05 to 2012-01-20
Weeks for testing: 17 from 2012-01-27 to 2012-05-18


# TiDE

In [8]:
tide_forecast = utils.tide_prediction(window=window, dataframe=df, forecast_horizon=17)
tide_forecast

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=50` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Unnamed: 0,Date,forecast,forecast_lower,forecast_upper,unique_id
0,2012-01-27,43472.821310,43161.178324,45512.287601,1-2
1,2012-02-03,47751.782646,42656.472086,48250.348442,1-2
2,2012-02-10,46197.437261,45049.820138,48064.559809,1-2
3,2012-02-17,47263.975248,42930.588159,47468.421019,1-2
4,2012-02-24,48114.845397,46158.860595,50505.180483,1-2
...,...,...,...,...,...
12,2012-04-20,58339.401981,54714.068364,58950.440822,45-95
13,2012-04-27,56947.207025,54289.290816,58024.060940,45-95
14,2012-05-04,63442.912700,57801.738671,64868.079749,45-95
15,2012-05-11,60936.119495,58847.516991,61858.096428,45-95


In [9]:
ola

NameError: name 'ola' is not defined

In [None]:
# tide_forecast = pd.read_csv('data/tide_forecast.csv')

In [None]:
# create dynamic covariates for each serie in the training darts
dynamic_covariates = []
for serie in train_darts:
    # add the month and week as a covariate
    covariate = datetime_attribute_timeseries(
        serie,
        attribute="month",
        one_hot=True,
        cyclic=False,
        add_length=FORECAST_HORIZON,
    )
    covariate = covariate.stack(
        datetime_attribute_timeseries(
            serie,
            attribute="week",
            one_hot=True,
            cyclic=False,
            add_length=FORECAST_HORIZON,
        )
    )

    store = serie.static_covariates['Store'].item()
    dept = serie.static_covariates['Dept'].item()

    # create covariates to fill with 0
    covariate = covariate.stack(
                TimeSeries.from_dataframe(df[(df['Store'] == store) & (df['Dept'] == dept)], time_col=TIME_COL, value_cols=DYNAMIC_COV_FILL_0, freq=FREQ, fill_missing_dates=True, fillna_value=0)
            )

    # create covariates to fill with interpolation
    dyn_cov_interp = TimeSeries.from_dataframe(df[(df['Store'] == store) & (df['Dept'] == dept)], time_col=TIME_COL, value_cols=DYNAMIC_COV_FILL_INTERPOLATE, freq=FREQ, fill_missing_dates=True)
    covariate = covariate.stack(MissingValuesFiller().transform(dyn_cov_interp))

    dynamic_covariates.append(covariate)

In [None]:
# scale covariates
dynamic_covariates_transformed = SCALER.fit_transform(dynamic_covariates)

# scale data and transform static covariates
data_transformed = PIPELINE.fit_transform(train_darts)

TiDE_params = {
    "input_chunk_length": len(train[TIME_COL].unique()) - FORECAST_HORIZON, # number of weeks to lookback
    "output_chunk_length": FORECAST_HORIZON, # number of weeks to forecast
    "num_encoder_layers": 2,
    "num_decoder_layers": 2,
    "decoder_output_dim": 1,
    "hidden_size": 15,
    "temporal_width_past": 4,
    "temporal_width_future": 4,
    "temporal_decoder_hidden": 26,
    "dropout": 0.1,
    "batch_size": 16,
    "n_epochs": 50,
    "likelihood": QuantileRegression(quantiles=[0.25, 0.5, 0.75]),
    "random_state": 42,
    "use_static_covariates": True,
    "optimizer_kwargs": {"lr": 1e-3},
    "use_reversible_instance_norm": False,
}

model = TiDEModel(**TiDE_params)
model.fit(data_transformed, future_covariates=dynamic_covariates_transformed, verbose=False)
pred = PIPELINE.inverse_transform(model.predict(n=FORECAST_HORIZON, series=data_transformed, future_covariates=dynamic_covariates_transformed, num_samples=50))
tide_forecast = utils.transform_predictions_to_pandas(pred, TARGET, train_darts, [0.25, 0.5, 0.75])
tide_forecast.to_csv('data/tide_forecast.csv', index=False)
tide_forecast

# Chronos

In [None]:
# chronos_forecast = pd.read_csv('data/chronos_forecast.csv')

In [None]:
# Load the Chronos pipeline
pipeline = ChronosPipeline.from_pretrained(
    CHRONOS_ARCHITECTURE[0],
    device_map=CHRONOS_ARCHITECTURE[1],
    torch_dtype=torch.bfloat16)

forecast = []
for ts in train_darts:
    # Forecast
    lower, mid, upper = utils.chronos_forecast(pipeline, ts.pd_dataframe().reset_index(), FORECAST_HORIZON)
    forecast.append(utils.convert_forecast_to_pandas([lower, mid, upper], test[test['unique_id'] == str(int(list(ts.static_covariates_values())[0][0]))+'-'+str(int(list(ts.static_covariates_values())[0][1]))]))
# Convert list to data frames
chronos_forecast = pd.concat(forecast)
chronos_forecast.to_csv('data/chronos_forecast.csv', index=False)
chronos_forecast

# Now let's combine the forecasts from Chronos and residuals forecast from TiDE

In [None]:
# final_forecast = pd.read_csv('data/final_forecast.csv')

## TiDE residuals forecast

In [None]:
residuals = pd.read_csv('data/residuals.csv')
residuals[TIME_COL] = pd.to_datetime(residuals[TIME_COL])
residuals[['Store', 'Dept']] = residuals['unique_id'].str.split('-', expand=True).astype(int)

residuals_train = residuals[residuals[TIME_COL] <= window[0]]
residuals = residuals[(residuals[TIME_COL] <= window[1])]

residuals_darts = TimeSeries.from_group_dataframe(
    df=residuals_train,
    group_cols=STATIC_COV,
    time_col=TIME_COL,
    value_cols=RES_TARGET,
    freq=FREQ,
    fill_missing_dates=True,
    fillna_value=0)

print(f"Weeks for training: {len(residuals_train[TIME_COL].unique())} from {min(residuals_train[TIME_COL]).date()} to {max(residuals_train[TIME_COL]).date()}")

## Create covariates

In [None]:
# create dynamic covariates for each serie in the training darts
dynamic_covariates = []
for serie in residuals_darts:
    # add the month and week as a covariate
    covariate = datetime_attribute_timeseries(
        serie,
        attribute="month",
        one_hot=True,
        cyclic=False,
        add_length=FORECAST_HORIZON,
    )
    covariate = covariate.stack(
        datetime_attribute_timeseries(
            serie,
            attribute="week",
            one_hot=True,
            cyclic=False,
            add_length=FORECAST_HORIZON,
        )
    )

    store = serie.static_covariates['Store'].item()
    dept = serie.static_covariates['Dept'].item()

    # create covariates to fill with 0
    covariate = covariate.stack(
                TimeSeries.from_dataframe(residuals[(residuals['Store'] == store) & (residuals['Dept'] == dept)], time_col=TIME_COL, value_cols=DYNAMIC_COV_FILL_0, freq=FREQ, fill_missing_dates=True, fillna_value=0)
            )

    # create covariates to fill with interpolation
    dyn_cov_interp = TimeSeries.from_dataframe(residuals[(residuals['Store'] == store) & (residuals['Dept'] == dept)], time_col=TIME_COL, value_cols=DYNAMIC_COV_FILL_INTERPOLATE, freq=FREQ, fill_missing_dates=True)
    covariate = covariate.stack(MissingValuesFiller().transform(dyn_cov_interp))

    dynamic_covariates.append(covariate)

## Train and predict

In [None]:
# scale covariates
dynamic_covariates_transformed = SCALER.fit_transform(dynamic_covariates)

# scale data and transform static covariates
data_transformed = PIPELINE.fit_transform(residuals_darts)

TiDE_params = {
    "input_chunk_length": len(residuals_train[TIME_COL].unique()) - FORECAST_HORIZON, # number of weeks to lookback
    "output_chunk_length": FORECAST_HORIZON,
    "num_encoder_layers": 2,
    "num_decoder_layers": 2,
    "decoder_output_dim": 1,
    "hidden_size": 15,
    "temporal_width_past": 4,
    "temporal_width_future": 4,
    "temporal_decoder_hidden": 26,
    "dropout": 0.1,
    "batch_size": 16,
    "n_epochs": 15,
    "likelihood": QuantileRegression(quantiles=[0.25, 0.5, 0.75]),
    "random_state": 42,
    "use_static_covariates": True,
    "optimizer_kwargs": {"lr": 1e-3},
    "use_reversible_instance_norm": False,
}

model = TiDEModel(**TiDE_params)
model.fit(data_transformed, future_covariates=dynamic_covariates_transformed, verbose=False)
pred = PIPELINE.inverse_transform(model.predict(n=FORECAST_HORIZON, series=data_transformed, future_covariates=dynamic_covariates_transformed, num_samples=50))
residuals_forecast = utils.transform_predictions_to_pandas(pred, RES_TARGET, residuals_darts, [0.25, 0.5, 0.75], convert=False)
residuals_forecast

## Final forecast

In [None]:
# Concatenate the two dataframes
combined_df = pd.concat([chronos_forecast, residuals_forecast])

# Group by 'unique_id' and 'Date' and sum the forecast values
final_forecast = combined_df.groupby(['unique_id', 'Date']).agg({
    'forecast_lower': 'sum',
    'forecast': 'sum',
    'forecast_upper': 'sum'
}).reset_index()

final_forecast.to_csv('data/final_forecast.csv', index=False)
final_forecast

# Plot Actuals and Forecast

In [None]:
# get series ordered by volume in a descending way
series = test.groupby('unique_id')[TARGET].sum().reset_index().sort_values(by=TARGET, ascending=False)['unique_id'].tolist()

for ts in series[:1]:
    forecasts = [(final_forecast[final_forecast["unique_id"] == ts],"Chronos + TiDE"),
                 (chronos_forecast[chronos_forecast["unique_id"] == ts],"Chronos"),
                 (tide_forecast[tide_forecast["unique_id"] == ts],"TiDE")]

    utils.plot_multiple_forecasts(actuals_data=df[df["unique_id"]==ts],
                                  forecast_data_list=forecasts,
                                  title="Actuals vs Forecast",
                                  y_label="Weekly Sales",
                                  x_label="Date",
                                  forecast_horizon=FORECAST_HORIZON,
                                  interval=False)

## Evaluate forecast

In [None]:
tide_mape = utils.evaluation_metrics(tide_forecast,test)

print(f"TiDE Mean Absolute Percentage Error: {tide_mape}\n")

chronos_mape = utils.evaluation_metrics(chronos_forecast,test)

print(f"Chronos Mean Absolute Percentage Error: {chronos_mape}\n")

final_mape = utils.evaluation_metrics(final_forecast,test)

print(f"Chronos+TiDE Mean Absolute Percentage Error: {final_mape}\n")