In [1]:
import pandas as pd
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
import random
from metrics import MAPE_GroupFairnessScorer, evaluate_predictions

  from .autonotebook import tqdm as notebook_tqdm


Look into autogluon tutorial for explanations of this code  
The dtype of the columns determines the interpretation and preprocessing of the features  
Autogluon has its own dataframe type with static and dynamic features

In [2]:
df = pd.read_csv("../possible_datasets/M4/train.csv")
static_features_df = pd.read_csv("../possible_datasets/M4/metadata.csv")

WEEKEND_INDICES = [5, 6]
df = df.assign(
    weekend=pd.DatetimeIndex(df["timestamp"].astype('datetime64[ns]')).weekday.isin(WEEKEND_INDICES)
)
# convert it to category as bool is intepreted as a continuous variable in autogluon
df["weekend"] = df["weekend"].astype("category")

static_features_df.domain.value_counts()

domain
Micro       43
Finance     36
Industry    11
Other        8
Macro        2
Name: count, dtype: int64

In [4]:
drop_ids = static_features_df.loc[static_features_df.domain == "Macro", "item_id"]
static_features_df = static_features_df.loc[~static_features_df.item_id.isin(drop_ids)]
df = df.loc[~df.item_id.isin(drop_ids)]

sampling strategy SMOTE needs later at least >= 5 samples to produce meaningful results so drop the items with static feature Macro

In [5]:
train_data = TimeSeriesDataFrame.from_data_frame(
    df,
    id_column="item_id",
    timestamp_column="timestamp",
    static_features_df=static_features_df,
)

PREDICTION_LENGTH = 14
train_data, test_data = train_data.train_test_split(PREDICTION_LENGTH)      #split seems to be done stratified with respect to the static features

Sorting the dataframe index before generating the train/test split.


# Baseline model

In [6]:
predictor = TimeSeriesPredictor(
    prediction_length=14,
    target="target",    #specify that target is the target and weekend a known active covariate, other dynamic features are automatically detected as known covariates
    known_covariates_names=["weekend"],
    #eval_metric is default WQL
).fit(train_data, presets="fast_training")

Beginning AutoGluon training...
AutoGluon will save models to 'c:\Users\Luca\Studium\Master\Master-project\Autogluon_Timeseries\AutogluonModels\ag-20251212_164845'
AutoGluon Version:  1.4.0
Python Version:     3.11.14
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26200
CPU Count:          4
GPU Count:          0
Memory Avail:       1.28 GB / 7.88 GB (16.2%)
Disk Space Avail:   74.15 GB / 475.69 GB (15.6%)
Setting presets to: fast_training

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'hyperparameters': 'very_light',
 'known_covariates_names': ['weekend'],
 'num_val_windows': 1,
 'prediction_length': 14,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'verbosity': 2}

Inferred time series frequency: 'D'
Provided train_data has 242073 rows, 98 time series. Median time series length is 3

In [8]:
#proof that autogluon metrics have the same implementation as sklearn ones
predictor.evaluate(test_data, metrics=["MAE", "RMSE"], display = True)


from sklearn.metrics import mean_absolute_error, root_mean_squared_error

known_covariates = predictor.make_future_data_frame(train_data)
WEEKEND_INDICES = [5, 6]
known_covariates = known_covariates.assign(
    weekend=pd.DatetimeIndex(known_covariates.loc[:, "timestamp"].astype('datetime64[ns]')).weekday.isin(WEEKEND_INDICES)
)
known_covariates.loc[:, "weekend"] = known_covariates.loc[:, "weekend"].astype("category")

predictions = predictor.predict(train_data, known_covariates=known_covariates)

y_true = test_data.loc[~test_data.index.isin(train_data.index), "target"].values
y_pred = predictions["mean"].values
mae = mean_absolute_error(y_true, y_pred)
rmse = root_mean_squared_error(y_true, y_pred)
print("MAE:", mae)
print("RMSE:", rmse)

Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble
Evaluations on test data:
{
    "MAE": -190.02122867471377,
    "RMSE": -434.02524546121737
}
Length: 1372
Categories (2, bool): [False, True]' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.
  known_covariates.loc[:, "weekend"] = known_covariates.loc[:, "weekend"].astype("category")
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


MAE: 190.02122867471377
RMSE: 434.02524546121737


In [9]:
known_covariates = predictor.make_future_data_frame(train_data)
WEEKEND_INDICES = [5, 6]
known_covariates = known_covariates.assign(
    weekend=pd.DatetimeIndex(known_covariates.loc[:, "timestamp"].astype('datetime64[ns]')).weekday.isin(WEEKEND_INDICES)
)
known_covariates.loc[:, "weekend"] = known_covariates.loc[:, "weekend"].astype("category")

predictions = predictor.predict(train_data, known_covariates=known_covariates)

df_baseline = evaluate_predictions(test_data, train_data, predictor, predictions)
pd.DataFrame(df_baseline["domain"])

Length: 1372
Categories (2, bool): [False, True]' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.
  known_covariates.loc[:, "weekend"] = known_covariates.loc[:, "weekend"].astype("category")
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble
Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


Own metric:  0.017550390322636885


Unnamed: 0,Industry,Finance,Micro,Other,std,cv,max_diff,mean_diff
RMSE,-129.635232,-589.437311,-363.932133,-95.698247,229.86318,-0.780055,493.739065,285.919016
MAE,-78.494096,-272.632741,-174.433554,-55.402985,99.342637,-0.683986,217.229756,124.604787
MAPE,-0.023865,-0.029169,-0.031825,-0.020037,0.005288,-0.201648,0.011788,0.006778


# Use Fairness Metric for evalauation

In [10]:
predictor = TimeSeriesPredictor(
    prediction_length=14,
    target="target",    #specify that target is the target and weekend a known active covariate, other dynamic features are automatically detected as known covariates
    known_covariates_names=["weekend"],
    eval_metric=MAPE_GroupFairnessScorer()# train_data.static_features
).fit(train_data, presets="fast_training")

Beginning AutoGluon training...
AutoGluon will save models to 'c:\Users\Luca\Studium\Master\Master-project\Autogluon_Timeseries\AutogluonModels\ag-20251212_165506'
AutoGluon Version:  1.4.0
Python Version:     3.11.14
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26200
CPU Count:          4
GPU Count:          0
Memory Avail:       1.01 GB / 7.88 GB (12.8%)
Disk Space Avail:   74.13 GB / 475.69 GB (15.6%)
Setting presets to: fast_training

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MAPE_GroupFairnessScorer,
 'hyperparameters': 'very_light',
 'known_covariates_names': ['weekend'],
 'num_val_windows': 1,
 'prediction_length': 14,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'verbosity': 2}

Inferred time series frequency: 'D'
Provided train_data has 242073 rows, 98 time series. Median ti

In [12]:
known_covariates = predictor.make_future_data_frame(train_data)
WEEKEND_INDICES = [5, 6]
known_covariates = known_covariates.assign(
    weekend=pd.DatetimeIndex(known_covariates.loc[:, "timestamp"].astype('datetime64[ns]')).weekday.isin(WEEKEND_INDICES)
)
known_covariates.loc[:, "weekend"] = known_covariates.loc[:, "weekend"].astype("category")

predictions = predictor.predict(train_data, known_covariates=known_covariates)


df_regularization = evaluate_predictions(test_data, train_data, predictor, predictions)
df_regularization["domain"]

Length: 1372
Categories (2, bool): [False, True]' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.
  known_covariates.loc[:, "weekend"] = known_covariates.loc[:, "weekend"].astype("category")
Model not specified in predict, will default to the model with the best validation score: ETS
Model not specified in predict, will default to the model with the best validation score: ETS
Model not specified in predict, will default to the model with the best validation score: ETS
Model not specified in predict, will default to the model with the best validation score: ETS
Model not specified in predict, will default to the model with the best validation score: ETS


Own metric:  0.015749622553199533


Unnamed: 0,Industry,Finance,Micro,Other,std,cv,max_diff,mean_diff
RMSE,-121.778391,-583.782426,-355.112093,-227.545166,198.860794,-0.617476,462.004035,252.263172
MAE,-73.513268,-267.781804,-168.981017,-100.387109,86.633453,-0.567471,194.268535,108.566586
MAPE,-0.022205,-0.028316,-0.030521,-0.029472,0.003726,-0.134859,0.008316,0.004351


In [13]:
s_base = df_baseline["domain"].copy()
s_reg = df_regularization["domain"].copy()

s_base.index = s_base.index.astype(str) + "_baseline"
s_reg.index = s_reg.index.astype(str) + "_regularization"

pd.concat([s_base, s_reg], axis=0).sort_index()

Unnamed: 0,Industry,Finance,Micro,Other,std,cv,max_diff,mean_diff
MAE_baseline,-78.494096,-272.632741,-174.433554,-55.402985,99.342637,-0.683986,217.229756,124.604787
MAE_regularization,-73.513268,-267.781804,-168.981017,-100.387109,86.633453,-0.567471,194.268535,108.566586
MAPE_baseline,-0.023865,-0.029169,-0.031825,-0.020037,0.005288,-0.201648,0.011788,0.006778
MAPE_regularization,-0.022205,-0.028316,-0.030521,-0.029472,0.003726,-0.134859,0.008316,0.004351
RMSE_baseline,-129.635232,-589.437311,-363.932133,-95.698247,229.86318,-0.780055,493.739065,285.919016
RMSE_regularization,-121.778391,-583.782426,-355.112093,-227.545166,198.860794,-0.617476,462.004035,252.263172


# Use sampling strategy/synthetic data generation 

## Simple resampling

In [14]:
df = pd.read_csv("../possible_datasets/M4/train.csv")
static_features_df = pd.read_csv("../possible_datasets/M4/metadata.csv")

drop_ids = static_features_df.loc[static_features_df.domain == "Macro", "item_id"]
static_features_df = static_features_df.loc[~static_features_df.item_id.isin(drop_ids)]
df = df.loc[~df.item_id.isin(drop_ids)]

WEEKEND_INDICES = [5, 6]
df["weekend"] = pd.DatetimeIndex(df["timestamp"].astype('datetime64[ns]').values).weekday.isin(WEEKEND_INDICES)

static_features_df.domain.value_counts()

domain
Micro       43
Finance     36
Industry    11
Other        8
Name: count, dtype: int64

In [15]:
random.seed(42)
ids_industry = random.choices(list(static_features_df[static_features_df.domain == "Industry"].item_id), k=19)
ids_other = random.choices(list(static_features_df[static_features_df.domain == "Other"].item_id), k=22)

Bring Other and Industry up to 30 datapoints

In [16]:
m4_oversampled = pd.DataFrame()
for e, id in enumerate(ids_industry + ids_other):
    m4_oversampled_id = df.loc[df.item_id == id].copy()
    m4_oversample_static = static_features_df.loc[static_features_df.item_id == id].copy()
    m4_oversampled_id.loc[:, "item_id"] += str(e)
    m4_oversample_static.loc[:, "item_id"] += str(e)
    static_features_df = pd.concat([static_features_df, m4_oversample_static], axis=0)
    df = pd.concat([df, m4_oversampled_id], axis=0)
static_features_df.domain.value_counts()

domain
Micro       43
Finance     36
Industry    30
Other       30
Name: count, dtype: int64

In [17]:
random.seed(42)
ids_industry = random.choices(list(test_data.static_features[test_data.static_features.domain == "Industry"].index), k=19)
ids_other = random.choices(list(test_data.static_features[test_data.static_features.domain == "Other"].index), k=22)

In [18]:
train_data_oversampling = TimeSeriesDataFrame.from_data_frame(
    df,
    id_column="item_id",
    timestamp_column="timestamp",
    static_features_df=static_features_df,
)

PREDICTION_LENGTH = 14
train_data_oversampling, test_data_oversampling = train_data_oversampling.train_test_split(PREDICTION_LENGTH) 

Sorting the dataframe index before generating the train/test split.


In [19]:
predictor = TimeSeriesPredictor(
    prediction_length=14,
    target="target",    #specify that target is the target and weekend a known active covariate, other dynamic features are automatically detected as known covariates
    known_covariates_names=["weekend"],
    #eval_metric is default WQL<
).fit(train_data_oversampling, presets="fast_training")

known_covariates = predictor.make_future_data_frame(train_data_oversampling)
WEEKEND_INDICES = [5, 6]
known_covariates = known_covariates.assign(
    weekend=pd.DatetimeIndex(known_covariates.loc[:, "timestamp"].astype('datetime64[ns]')).weekday.isin(WEEKEND_INDICES)
)
known_covariates.loc[:, "weekend"] = known_covariates.loc[:, "weekend"].astype("category")

predictions = predictor.predict(train_data_oversampling, known_covariates=known_covariates)

df_oversampling = evaluate_predictions(test_data_oversampling, train_data_oversampling, predictor, predictions)

Beginning AutoGluon training...
AutoGluon will save models to 'c:\Users\Luca\Studium\Master\Master-project\Autogluon_Timeseries\AutogluonModels\ag-20251212_170530'
AutoGluon Version:  1.4.0
Python Version:     3.11.14
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26200
CPU Count:          4
GPU Count:          0
Memory Avail:       1.34 GB / 7.88 GB (17.0%)
Disk Space Avail:   74.11 GB / 475.69 GB (15.6%)
Setting presets to: fast_training

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'hyperparameters': 'very_light',
 'known_covariates_names': ['weekend'],
 'num_val_windows': 1,
 'prediction_length': 14,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'verbosity': 2}

Inferred time series frequency: 'D'
Provided train_data has 254183 rows, 139 time series. Median time series length is 

Own metric:  0.014358817089837508


## Smote Oversampling

In [20]:
df_smote = pd.read_csv("synth_m4_smote.csv")
df_smote

Unnamed: 0,item_id,domain,time_index,target
0,0,Micro,0,5802.700000
1,0,Micro,1,5769.900000
2,0,Micro,2,5774.300000
3,0,Micro,3,5741.600000
4,0,Micro,4,5706.900000
...,...,...,...,...
17195,171,Other,95,1206.044854
17196,171,Other,96,1219.917460
17197,171,Other,97,1192.204928
17198,171,Other,98,1194.386444


In [21]:
train_data_smote = TimeSeriesDataFrame.from_data_frame(
    df_smote.drop("domain", axis=1),
    id_column="item_id",
    timestamp_column="time_index",
    static_features_df=df_smote[["item_id", "domain"]].drop_duplicates(),
)

PREDICTION_LENGTH = 14
train_data_smote, test_data_smote = train_data_smote.train_test_split(PREDICTION_LENGTH) 

In [22]:
predictor = TimeSeriesPredictor(
    prediction_length=14,
    target="target",    
    #known_covariates_names=["weekend"],
    #eval_metric is default WQL<
).fit(train_data_smote, presets="fast_training")


known_covariates = predictor.make_future_data_frame(train_data_smote)
WEEKEND_INDICES = [5, 6]
known_covariates = known_covariates.assign(
    weekend=pd.DatetimeIndex(known_covariates.loc[:, "timestamp"].astype('datetime64[ns]')).weekday.isin(WEEKEND_INDICES)
)
known_covariates.loc[:, "weekend"] = known_covariates.loc[:, "weekend"].astype("category")

predictions = predictor.predict(train_data_smote, known_covariates=known_covariates)

df_oversampling2 = evaluate_predictions(test_data_smote, train_data_smote, predictor, predictions)

Beginning AutoGluon training...


AutoGluon will save models to 'c:\Users\Luca\Studium\Master\Master-project\Autogluon_Timeseries\AutogluonModels\ag-20251212_170859'
AutoGluon Version:  1.4.0
Python Version:     3.11.14
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26200
CPU Count:          4
GPU Count:          0
Memory Avail:       1.68 GB / 7.88 GB (21.3%)
Disk Space Avail:   74.10 GB / 475.69 GB (15.6%)
Setting presets to: fast_training

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': WQL,
 'hyperparameters': 'very_light',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 14,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'verbosity': 2}

Inferred time series frequency: 'ns'
Provided train_data has 14792 rows, 172 time series. Median time series length is 86 (min=86, max=86). 

Provided data cont

Own metric:  0.011891193209029361


## Evaluation

In [23]:
s_base = df_baseline["domain"].copy()
s_reg = df_regularization["domain"].copy()
s_over = df_oversampling["domain"].copy()
s_over2 = df_oversampling2["domain"].copy()

s_base.index = s_base.index.astype(str) + "_baseline"
s_reg.index = s_reg.index.astype(str) + "_regularization"
s_over.index = s_over.index.astype(str) + "_simple_oversampling"
s_over2.index = s_over2.index.astype(str) + "_smote_oversampling"

pd.concat([s_base, s_reg, s_over, s_over2], axis=0).sort_index()

Unnamed: 0,Industry,Finance,Micro,Other,std,cv,max_diff,mean_diff
MAE_baseline,-78.494096,-272.632741,-174.433554,-55.402985,99.342637,-0.683986,217.229756,124.604787
MAE_regularization,-73.513268,-267.781804,-168.981017,-100.387109,86.633453,-0.567471,194.268535,108.566586
MAE_simple_oversampling,-60.848085,-272.787878,-176.162979,-77.655132,98.156097,-0.668349,211.939793,122.387871
MAE_smote_oversampling,-55.047173,-125.279623,-116.968886,-45.196189,41.329653,-0.482694,80.083434,50.362003
MAPE_baseline,-0.023865,-0.029169,-0.031825,-0.020037,0.005288,-0.201648,0.011788,0.006778
MAPE_regularization,-0.022205,-0.028316,-0.030521,-0.029472,0.003726,-0.134859,0.008316,0.004351
MAPE_simple_oversampling,-0.022646,-0.029389,-0.031635,-0.024756,0.004129,-0.152314,0.008989,0.005267
MAPE_smote_oversampling,-0.022131,-0.034552,-0.039479,-0.023332,0.008503,-0.284632,0.017348,0.010544
RMSE_baseline,-129.635232,-589.437311,-363.932133,-95.698247,229.86318,-0.780055,493.739065,285.919016
RMSE_regularization,-121.778391,-583.782426,-355.112093,-227.545166,198.860794,-0.617476,462.004035,252.263172


The smote model is the worst model, resampling the second best and regularization the best