Random code, ignore this

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error

from fxvol.backtest import run_backtest
from fxvol.data_utils import load_csv, save_csv
from fxvol.fin_comp import qlike_loss, realized_vol
from fxvol.models import (
    ewma_forecast,
    garch11_forecast,
    har_forecast,
    naive_forecast,
    rolling_mean_forecast,
)

In [2]:
# Data

HORIZON = 5
log_ret = load_csv("processed", "log_returns").dropna()
eur_ret = log_ret["EUR"]
real_vol = realized_vol(eur_ret, window=HORIZON).dropna()

In [29]:
exog = ((eur_ret < 0) * real_vol).dropna()

In [31]:
exog.iloc[-1]

0.0

In [25]:
real_vol

Date
2010-01-08    0.005153
2010-01-11    0.005954
2010-01-12    0.005630
2010-01-13    0.005667
2010-01-14    0.004393
                ...   
2025-12-23    0.002690
2025-12-24    0.002836
2025-12-26    0.002869
2025-12-29    0.002807
2025-12-30    0.002569
Name: EUR, Length: 4160, dtype: float64

In [24]:
exog

Date
2010-01-11    0.000000
2010-01-12    0.000000
2010-01-13    0.005630
2010-01-14    0.000000
2010-01-15    0.004393
                ...   
2025-12-23    0.001188
2025-12-24    0.000000
2025-12-26    0.000000
2025-12-29    0.002869
2025-12-30    0.002807
Name: EUR, Length: 4159, dtype: float64

In [5]:
eur_ret.isna().sum()

0

In [4]:
real_vol.isna().sum()

0

In [6]:
exog.isna().sum()

5

In [None]:
from arch import arch_model

In [None]:
scaled_ret = 100*eur_ret
am = arch_model(scaled_ret, vol="GARCH", p=1, o=0, q=1, dist="normal")

In [None]:
res = am.fit(update_freq=5, disp='off')

In [None]:
(res.forecast(horizon=5).variance)**0.5

In [None]:
100*real_vol.iloc[-1]

In [None]:
res.forecast(horizon=horizon).mean.iloc[-1, -1] / 100

In [None]:
models = [
    # (naive_forecast, "naive", {}),
    # (rolling_mean_forecast, "rolling5", {"window": 5}),
    # (rolling_mean_forecast, "rolling20", {"window": 20}),
    # (rolling_mean_forecast, "rolling50", {"window": 50}),
    # (rolling_mean_forecast, "rolling100", {"window": 100}),
    # (ewma_forecast, "ewma092", {}),
    # (ewma_forecast, "ewma030", {"alpha": 0.3}),
    # (har_forecast, "har1-5-22", {"lags": [1, 5, 22]}),
    # (har_forecast, "har1-5-22-66", {"lags": [1, 5, 22, 66]}),
    (garch11_forecast, "garch11", {}),
]

# Run backest

HORIZON = 5

scores = pd.DataFrame(
    index=[model[1] for model in models], columns=["RMSE", "MAE", "QLIKE"]
)

for forecast_fn, name, params in models:
    results = run_backtest(
        log_ret=eur_ret, forecast_fn=forecast_fn, horizon=HORIZON, stride=100, **params
    )
    y_true = results["y_true"]
    y_pred = results["y_pred"]
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    qlike = qlike_loss(y_true, y_pred)
    scores.loc[name] = [rmse, mae, qlike]

save_csv(scores.astype(float).round(5), "results", "baselines")

In [None]:
results

In [None]:
from arch.univariate import HARX

In [None]:
har = HARX(real_vol, lags=[1, 5, 22])

In [None]:
res = har.fit()

In [None]:
res.forecast(horizon=5).mean

In [None]:
har

In [None]:
har.fit()

In [None]:
df_res = run_backtest(eur_ret, EWMA(0.3), horizon=HORIZON)
y_true = df_res["y_true"]
y_pred = df_res["y_pred"]

In [None]:
from fxvol.data_utils import save_csv

save_csv(df_res.round(3), "results", "baselines")

In [None]:
df_res.round(3)

In [None]:
df_res.iloc[-200:].plot(y=["y_true", "y_pred"])

In [None]:
plt.rcParams["figure.figsize"] = [15, 8]

In [None]:
# use darts plotting style
from darts import set_option

set_option("plotting.use_darts_style", True)

In [None]:
import warnings

warnings.filterwarnings("ignore", category=SyntaxWarning)

In [None]:
from darts import TimeSeries

In [None]:
from fxvol.data_utils import load_csv

historic_vol = load_csv("processed", "historic_vol")
historic_vol.dropna(inplace=True)

In [None]:
historic_vol.index = pd.to_datetime(historic_vol.index)

In [None]:
series = TimeSeries.from_dataframe(historic_vol[["EUR"]], freq="D")

In [None]:
from statsmodels.tsa.stattools import adfuller

adf_result = adfuller(historic_vol[["EUR"]], maxlag=100, regression="ctt")

In [None]:
# p-value
adf_result[1]
# Small -> No unit root, i.e. stationary series

In [None]:
eur = historic_vol[["EUR"]].diff().dropna()

In [None]:
adf_result = adfuller(eur, maxlag=100, regression="ctt")
adf_result[1]

In [None]:
from darts.utils.missing_values import fill_missing_values

series = fill_missing_values(series)

In [None]:
series.plot()

In [None]:
train, val = series.split_before(pd.Timestamp("20200101"))
train.plot(label="training")
val.plot(label="validation")

In [None]:
from darts.metrics import mape

In [None]:
from darts.models import AutoARIMA, ExponentialSmoothing, Theta


def eval_model(model):
    model.fit(train)
    forecast = model.predict(len(val))
    print(f"model {model} obtains MAPE: {mape(val, forecast):.2f}%")


eval_model(ExponentialSmoothing())
eval_model(AutoARIMA())
eval_model(Theta())

In [None]:
from darts.models import AutoARIMA, ExponentialSmoothing, Theta

In [None]:
model = AutoARIMA()
model.fit(train)
pred = model.predict(len(val))

In [None]:
train.plot(label="training")
val.plot(label="validation")
pred.plot(label="prediction")

In [None]:
hist_for = model.historical_forecasts(
    series=series,
    start=0.8,
    val_length=21,
    forecast_horizon=21,
    stride=21,
    verbose=True,
)

In [None]:
_, val80 = series.split_after(0.80)

In [None]:
val80.plot()
hist_for.plot()

In [None]:
print(mape(val80, hist_for))

In [None]:
from darts.models.forecasting.baselines import NaiveMovingAverage, NaiveDrift

In [None]:
model = NaiveMovingAverage(input_chunk_length=1)
# model = NaiveDrift()
model.fit(train)
pred = model.predict(len(val))

In [None]:
hist_for = model.historical_forecasts(
    series=series, start=0.8, forecast_horizon=21, stride=21, verbose=True
)

In [None]:
val80.plot()
hist_for.plot()

In [None]:
print(mape(val80, hist_for))

In [None]:
from darts.models import ARIMA

In [None]:
from statsmodels.tools.sm_exceptions import ConvergenceWarning

warnings.simplefilter("ignore", ConvergenceWarning)

In [None]:
gs = ARIMA.gridsearch(
    {"p": [1, 5, 20], "d": [0], "q": [1]},
    series=series,
    forecast_horizon=21,
    stride=210,
    verbose=True,
    show_warnings=False,
    start=0.5,
)

In [None]:
model = gs[0]

In [None]:
hist_for = model.historical_forecasts(
    series=series,
    start=0.8,
    val_length=21,
    forecast_horizon=21,
    stride=210,
    verbose=True,
)

In [None]:
print(mape(val80, hist_for))

In [None]:
val80.plot()
hist_for.plot()