In [1]:
!pip install autogluon
import pandas as pd
import numpy as np
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import warnings
import time
import os

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

def qlike_loss(y_true, y_pred, epsilon=1e-8):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    mask = (y_true > epsilon) & (y_pred > epsilon)
    y_true = y_true[mask]
    y_pred = y_pred[mask]
    
    if len(y_true) == 0:
        return np.nan

    y_pred = y_pred + epsilon
    ratio = y_true / y_pred
    loss = ratio - np.log(ratio) - 1
    return np.mean(loss)


df_train = pd.read_csv("/kaggle/input/yfinance/df_before_shock.csv")
df_test = pd.read_csv("/kaggle/input/yfinance/df_after_shock.csv")

df_train['Date'] = pd.to_datetime(df_train['Date'])
df_test['Date'] = pd.to_datetime(df_test['Date'])

target_variable = "Realized_Volatility_Daily"
price_variable = "Close"


df_train = df_train.dropna(subset=[target_variable])


df_train['item_id'] = 'Stock'
df_test['item_id'] = 'Stock'

train_data = TimeSeriesDataFrame.from_data_frame(
    df_train,
    id_column="item_id",
    timestamp_column="Date"
)

test_data = TimeSeriesDataFrame.from_data_frame(
    df_test,
    id_column="item_id",
    timestamp_column="Date"
)


prediction_length = len(df_test)
save_path = "AutogluonModels_TimeSeries_GPU_Extended_Volatility_Target_FullHorizon"
os.makedirs(save_path, exist_ok=True)


predictor = TimeSeriesPredictor(
    prediction_length=prediction_length,
    path=save_path,
    target=target_variable,
    eval_metric='RMSE',
    freq="B"
)


start_time = time.time()
predictor.fit(
    train_data,
    presets="best_quality",
    time_limit=3600,
)
end_time = time.time()


leaderboard = predictor.leaderboard(test_data, silent=True)
best_model_name = predictor.model_best
predictions = predictor.predict(train_data)


predicted_volatility_df = predictions.reset_index()
predicted_volatility_df = predicted_volatility_df.rename(columns={'mean': f'Predicted_{target_variable}'})


actual_cols = ['Date', 'item_id', target_variable]
if price_variable in df_test.columns:
    actual_cols.append(price_variable)

actual_data = df_test[actual_cols].copy()
actual_data = actual_data.rename(columns={'Date': 'timestamp'})
actual_data_clean = actual_data.dropna(subset=[target_variable])


merge_cols = ['timestamp']
if 'item_id' in actual_data_clean.columns and 'item_id' in predicted_volatility_df.columns:
    merge_cols.append('item_id')

comparison_df = pd.merge(actual_data_clean, predicted_volatility_df, on=merge_cols, how='inner')
comparison_df = comparison_df.dropna(subset=[f'Predicted_{target_variable}'])


volatility_rmse = np.sqrt(mean_squared_error(comparison_df[target_variable], comparison_df[f'Predicted_{target_variable}']))
volatility_mae = mean_absolute_error(comparison_df[target_variable], comparison_df[f'Predicted_{target_variable}'])
volatility_qlike = qlike_loss(comparison_df[target_variable], comparison_df[f'Predicted_{target_variable}'])


price_target_variable = "Adj Close"
price_save_path = "AutogluonModels_TimeSeries_GPU_Price_Target"
os.makedirs(price_save_path, exist_ok=True)

price_predictor = TimeSeriesPredictor(
    prediction_length=prediction_length,
    path=price_save_path,
    target=price_target_variable,
    eval_metric='RMSE',
    freq="B"
)


start_time_price = time.time()
price_predictor.fit(
    train_data,
    presets="best_quality",
    time_limit=3600,
)
end_time_price = time.time()


price_predictions = price_predictor.predict(train_data)
price_predictions_df = price_predictions.reset_index()
price_predictions_df = price_predictions_df.rename(columns={'mean': f'Predicted_{price_target_variable}'})


actual_price_data = df_test[['Date', 'item_id', price_target_variable]].copy()
actual_price_data = actual_price_data.rename(columns={'Date': 'timestamp'})
actual_price_data_clean = actual_price_data.dropna(subset=[price_target_variable])
print(f"twierdzenie_asteriksa_obeliksa(debugger):{actual_price_data_clean.head()}")

price_comparison_df = pd.merge(
    actual_price_data_clean, 
    price_predictions_df[['timestamp', 'item_id', f'Predicted_{price_target_variable}']], 
    on=['timestamp', 'item_id'], 
    how='inner'
)
price_comparison_df = price_comparison_df.rename(columns={'mean': f'Predicted_{price_target_variable}'})


price_rmse = np.sqrt(mean_squared_error(
    price_comparison_df[price_target_variable], 
    price_comparison_df[f'Predicted_{price_target_variable}']
))
price_mae = mean_absolute_error(
    price_comparison_df[price_target_variable], 
    price_comparison_df[f'Predicted_{price_target_variable}']
)

print(f"Price RMSE: {price_rmse}")
print(f"Price MAE: {price_mae}")
print(f"Volatility RMSE:{volatility_rmse}")
print(f"Volatility MAE: {volatility_mae}")
print(f"Volatility QLIKE: {volatility_qlike}")

Collecting autogluon
  Downloading autogluon-1.2-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.core==1.2 (from autogluon.core[all]==1.2->autogluon)
  Downloading autogluon.core-1.2-py3-none-any.whl.metadata (12 kB)
Collecting autogluon.features==1.2 (from autogluon)
  Downloading autogluon.features-1.2-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.tabular==1.2 (from autogluon.tabular[all]==1.2->autogluon)
  Downloading autogluon.tabular-1.2-py3-none-any.whl.metadata (14 kB)
Collecting autogluon.multimodal==1.2 (from autogluon)
  Downloading autogluon.multimodal-1.2-py3-none-any.whl.metadata (12 kB)
Collecting autogluon.timeseries==1.2 (from autogluon.timeseries[all]==1.2->autogluon)
  Downloading autogluon.timeseries-1.2-py3-none-any.whl.metadata (12 kB)
Collecting scikit-learn<1.5.3,>=1.4.0 (from autogluon.core==1.2->autogluon.core[all]==1.2->autogluon)
  Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (

Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to '/kaggle/working/AutogluonModels_TimeSeries_GPU_Extended_Volatility_Target_FullHorizon'
AutoGluon Version:  1.2
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Sun Nov 10 10:07:59 UTC 2024
CPU Count:          4
GPU Count:          2
Memory Avail:       30.18 GB / 31.35 GB (96.3%)
Disk Space Avail:   19.50 GB / 19.52 GB (99.9%)
Setting presets to: best_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': RMSE,
 'freq': 'B',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 2,
 'prediction_length': 796,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'Realized_Volatility_Daily',
 'time_limit': 3600,
 'verbosity': 2}

train_data with frequency 'None'

config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/821M [00:00<?, ?B/s]

	-17.0792      = Validation score (-RMSE)
	27.27   s     = Training runtime
	6.78    s     = Validation (prediction) runtime
Training timeseries model ChronosFineTuned[bolt_small]. Training for up to 583.5s of the 3500.9s of remaining time.
	Skipping covariate_regressor since the dataset contains no covariates or static features.


config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/191M [00:00<?, ?B/s]

	Saving fine-tuned model to /kaggle/working/AutogluonModels_TimeSeries_GPU_Extended_Volatility_Target_FullHorizon/models/ChronosFineTuned[bolt_small]/W0/fine-tuned-ckpt
	-17.9823      = Validation score (-RMSE)
	138.39  s     = Training runtime
	0.26    s     = Validation (prediction) runtime
Training timeseries model TemporalFusionTransformer. Training for up to 690.6s of the 3362.2s of remaining time.
  return op(a, b)
	-27.7219      = Validation score (-RMSE)
	237.44  s     = Training runtime
	0.07    s     = Validation (prediction) runtime
Training timeseries model DeepAR. Training for up to 841.6s of the 3124.7s of remaining time.
  return op(a, b)
	-20.3302      = Validation score (-RMSE)
	39.26   s     = Training runtime
	3.68    s     = Validation (prediction) runtime
Training timeseries model PatchTST. Training for up to 1240.9s of the 3081.7s of remaining time.
  return op(a, b)
	-20.2608      = Validation score (-RMSE)
	21.42   s     = Training runtime
	0.07    s     = Valid

twierdzenie_asteriksa_obeliksa(debugger):   timestamp item_id   Adj Close
0 2022-02-24   Stock   95.575851
1 2022-02-25   Stock  102.262817
2 2022-02-28   Stock  127.625526
3 2022-03-01   Stock  149.596985
4 2022-03-02   Stock  145.011627
Price RMSE: 328.9238648901407
Price MAE: 262.4876016007161
Volatility RMSE:17.33507011210425
Volatility MAE: 11.267449762686509
Volatility QLIKE: 0.08458883039548344
