In [1]:
import random
import pandas as pd
import numpy as np
import tensorflow as tf
from scipy import stats
from truesight.preprocessing import Preprocessor
from truesight.core import TrueSight
from truesight.metrics import Evaluator, smape, mape, mse, rmse, mae
from sklearn.metrics import r2_score
from truesight.utils import generate_syntetic_data

seed = 42
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [2]:
num_time_steps = 60
season_length = 12
forecast_horizon = 12
df = generate_syntetic_data(num_time_steps, season_length, 500)

In [None]:
from statsforecast.models import SeasonalNaive, AutoETS, AutoARIMA
from sklearn.linear_model import LinearRegression
from xgboost.sklearn import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from truesight.utils import ModelWrapper

models = [
    ModelWrapper(LinearRegression, horizon=forecast_horizon, season_length=season_length, alias="LinearRegression"),
    ModelWrapper(AutoETS, horizon=forecast_horizon, season_length=season_length),
    ModelWrapper(AutoARIMA, horizon=forecast_horizon, season_length=season_length),
    ModelWrapper(DecisionTreeRegressor, horizon=forecast_horizon, alias="DecisionTreeRegressor"),
]

preprocessor = Preprocessor(df)
X_train, Y_train, ids_train, X_val, Y_val, ids_val, models = preprocessor.make_dataset(
    forecast_horizon = forecast_horizon, 
    season_length = season_length,
    date_freq = "MS", 
    models = models, 
    fallback_model = ModelWrapper(SeasonalNaive, horizon=forecast_horizon, season_length=season_length),
    verbose = True
    )

import pickle
pickle.dump((X_train, Y_train, ids_train, X_val, Y_val, ids_val, models), open("synthetic.pkl", "wb"))

In [4]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience = 20, restore_best_weights = True, monitor = "val_loss"),
    tf.keras.callbacks.ReduceLROnPlateau(monitor = "val_loss", factor = 0.5, patience = 10, verbose = False),
]

In [6]:
models_wcov = models.copy()
X_train_wcov = X_train.copy()
X_val_wcov = X_val.copy()

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
ts_wcov= TrueSight(models_wcov, forecast_horizon, filter_size = 256, context_size = 512, hidden_size = 1024, dropout_rate = 0.2)
ts_wcov.compile(optimizer=optimizer, loss='mse')

ts_wcov.fit(
    x = X_train_wcov, y = Y_train,
    validation_data = [X_val_wcov, Y_val],
    batch_size = 16,
    epochs = 1000,
    verbose = True,
    callbacks = callbacks,
)
ts_wcov.plot_training_history()
ts_wcov.save_weights('ts_wcov.h5')

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000

KeyboardInterrupt: 

In [None]:
models_nocov = models[-1:]
X_train_nocov = X_train[-1:]
X_val_nocov = X_val[-1:]

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
ts_nocov = TrueSight(models_nocov, forecast_horizon, filter_size = 256, context_size = 512, hidden_size = 1024, dropout_rate = 0.2)
ts_nocov.compile(optimizer=optimizer, loss='mse')

ts_nocov.fit(
    x = X_train_nocov, y = Y_train,
    validation_data = [X_val_nocov, Y_val],
    batch_size = 16,
    epochs = 1000,
    verbose = True,
    callbacks = callbacks,
)
ts_nocov.plot_training_history()
ts_nocov.save_weights('nocov.h5')

In [None]:
yhat_nocov = ts_nocov.predict(X_val_nocov, n_repeats = 100, n_quantiles = 15, verbose = False)
yhat_wcov = ts_wcov.predict(X_val_wcov, n_repeats = 100, n_quantiles = 15, verbose = False)

In [None]:
evaluator_wcov = Evaluator(X_val, Y_val, yhat_wcov, ids_val)
evaluator_wcov.evaluate_prediction(evaluators = [smape, mape, mse, rmse, mae], return_mean = True)

smape       0.508953
mape        0.620911
mse      1009.338173
rmse       23.243978
mae        17.380230
dtype: float64

In [None]:
evaluator_nocov = Evaluator(X_val, Y_val, yhat_nocov, ids_val)
evaluator_nocov.evaluate_prediction(evaluators = [smape, mape, mse, rmse, mae], return_mean = True)

smape       0.605017
mape        0.631182
mse      1006.972819
rmse       23.098795
mae        17.470237
dtype: float64

In [None]:
group_a = smape(Y_val, yhat_wcov.mean(axis=0))
group_b = smape(Y_val, yhat_nocov.mean(axis=0))

t_statistic, p_value = stats.ttest_rel(group_a, group_b)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -4.194696937529229
P-Value: 5.960989508184106e-05


In [None]:
group_a = mape(Y_val, yhat_wcov.mean(axis=0))
group_b = mape(Y_val, yhat_nocov.mean(axis=0))

t_statistic, p_value = stats.ttest_rel(group_a, group_b)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -0.18566375554778192
P-Value: 0.8530883238131164


In [None]:
group_a = mse(Y_val, yhat_wcov.mean(axis=0))
group_b = mse(Y_val, yhat_nocov.mean(axis=0))

t_statistic, p_value = stats.ttest_rel(group_a, group_b)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: 0.13369916392845604
P-Value: 0.8939118753295798


In [None]:
group_a = rmse(Y_val, yhat_wcov.mean(axis=0))
group_b = rmse(Y_val, yhat_nocov.mean(axis=0))

t_statistic, p_value = stats.ttest_rel(group_a, group_b)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: 0.4354636686643309
P-Value: 0.6641738480054535


In [None]:
group_a = mae(Y_val, yhat_wcov.mean(axis=0))
group_b = mae(Y_val, yhat_nocov.mean(axis=0))

t_statistic, p_value = stats.ttest_rel(group_a, group_b)

print("T-Statistic:", t_statistic)
print("P-Value:", p_value)

T-Statistic: -0.26743694335893436
P-Value: 0.7896891120722491
