In [1]:
from stochastic_volatility_models.src.models.time_series.heterogeneous_autoregressive_model import HAR
from stochastic_volatility_models.src.core.underlying import Underlying
from stochastic_volatility_models.visualisations.forecasts import plot_forecast
from stochastic_volatility_models.src.core.forecasts import forecast_performance
import numpy as np
import pandas as pd

[32m2024-09-03 10:59:38.892[0m | [1mINFO    [0m | [36mstochastic_volatility_models.config[0m:[36minitialise[0m:[36m30[0m - [1mLoaded module `stochastic_volatility_models` from project directory path `/Users/mayurankv/Documents/Mayuran/Programming/Projects/Academic/Imperial College London/MSc Statistics/Dissertation/Project/modules`[0m


In [4]:
# date = "2013-07-11"
# date = "2015-09-01"
date = "2017-11-30"
time = np.datetime64(date)
underlying = Underlying("SPX")
har = HAR()

fitting_period = 3
har.fit(
	time=time,
	underlying=underlying,
	fitting_period=fitting_period,
)

forecast_period = 1 / 2
forecast_confidences = [0.9, 0.95, 0.99]
har.forecast(forecast_period=forecast_period, forecast_confidences=forecast_confidences)

performance, prediction_interval_accuracies = forecast_performance(underlying, har.volatility_forecast)

historical_period = 3
historical_period = 0.1

plot_forecast(
	underlying=underlying,
	volatility_forecast=har.volatility_forecast,
	historical_period=historical_period,
).show(
	config={
		"toImageButtonOptions": {
			"format": "svg",  # one of png, svg, jpeg, webp
			"filename": "har intervals",
			"height": 400,
			"width": 400,
			"scale": 1,  # Multiply title/legend/axis/canvas sizes by this factor
		}
	}
)

print(performance)
print(prediction_interval_accuracies)
print(" & ".join([str(round(a * 1000, 3)) for a in performance.values()]))

{'MAE': 0.0042188005515612255, 'RMSE': 0.0060918097540191875}
{0.9: 0.5952380952380952, 0.95: 0.7063492063492064, 0.99: 0.8571428571428571}
4.219 & 6.092


In [28]:
print(har.model.summary())

                            OLS Regression Results                            
Dep. Variable:                   RV_t   R-squared:                       0.590
Model:                            OLS   Adj. R-squared:                  0.588
Method:                 Least Squares   F-statistic:                     360.7
Date:                Sat, 31 Aug 2024   Prob (F-statistic):          4.36e-145
Time:                        06:36:15   Log-Likelihood:                -825.97
No. Observations:                 756   AIC:                             1660.
Df Residuals:                     752   BIC:                             1678.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.0034      0.317     -3.164      0.0

In [17]:
vals = "2.930 & 3.204 & 3.271 & 3.521"
print(f"{round((np.float64((arr:=vals.split(" & "))[0]) - np.float64(arr[2])) / np.float64(arr[0]) * 100,2)} & {round((np.float64(arr[1]) - np.float64(arr[3])) / np.float64(arr[1]) * 100,2)}")

-11.64 & -9.89


In [22]:
import plotly.express as px

realised_measures = pd.read_csv("../modules/stochastic_volatility_models/stochastic_volatility_models/data/realised_volatility/oxfordman.csv", index_col=0)
ticker_realised_volatility = pd.Series(np.sqrt(realised_measures.loc[(realised_measures.Symbol == f".{"SPX".upper()}"), "rk_parzen"]))
ticker_realised_volatility.index = pd.Index(((ticker_realised_volatility.index).values.astype("datetime64[ns]") + np.timedelta64(2, "h")).astype("datetime64[D]").astype(str))
ticker_realised_volatility = ticker_realised_volatility.loc[(pd.to_datetime(ticker_realised_volatility.index) >= np.datetime64("2010-01-01"))]
# ticker_realised_volatility = ticker_realised_volatility.loc[(pd.to_datetime(ticker_realised_volatility.index) <= np.datetime64("2017-02-17")) & (pd.to_datetime(ticker_realised_volatility.index) >= np.datetime64("2010-07-15"))]

config = {
	"toImageButtonOptions": {
		"format": "svg",  # one of png, svg, jpeg, webp
		"filename": "custom_image",
		"height": 300,
		"width": 1000,
		"scale": 1,  # Multiply title/legend/axis/canvas sizes by this factor
	}
}

px.line(ticker_realised_volatility).update_layout(margin=dict(l=0, r=0, t=0, b=0), template="plotly_white", xaxis_title="Time", yaxis_title="Realised Volatility", showlegend=False).show(config=config)


parsing timezone aware datetimes is deprecated; this will raise an error in the future



In [6]:
from statsmodels.tsa.stattools import adfuller

realised_measures = pd.read_csv("../modules/stochastic_volatility_models/stochastic_volatility_models/data/realised_volatility/oxfordman.csv", index_col=0)
ticker_realised_volatility = pd.Series(np.log(realised_measures.loc[(realised_measures.Symbol == f".{"SPX".upper()}"), "rk_parzen"]))
ticker_realised_volatility.index = pd.Index(((ticker_realised_volatility.index).values.astype("datetime64[ns]") + np.timedelta64(2, "h")).astype("datetime64[D]").astype(str))

print(adfuller(ticker_realised_volatility.loc[(pd.to_datetime(ticker_realised_volatility.index) <= np.datetime64("2013-07-11")) & (pd.to_datetime(ticker_realised_volatility.index) >= np.datetime64("2010-07-15"))].to_numpy()))
print(adfuller(ticker_realised_volatility.loc[(pd.to_datetime(ticker_realised_volatility.index) <= np.datetime64("2015-09-01")) & (pd.to_datetime(ticker_realised_volatility.index) >= np.datetime64("2012-08-29"))].to_numpy()))
print(adfuller(ticker_realised_volatility.loc[(pd.to_datetime(ticker_realised_volatility.index) <= np.datetime64("2017-11-30")) & (pd.to_datetime(ticker_realised_volatility.index) >= np.datetime64("2014-12-02"))].to_numpy()))
print(adfuller(ticker_realised_volatility.loc[(pd.to_datetime(ticker_realised_volatility.index) <= np.datetime64("2017-11-30")) & (pd.to_datetime(ticker_realised_volatility.index) >= np.datetime64("2010-07-15"))].to_numpy()))
print(adfuller((ticker_realised_volatility.loc[pd.to_datetime(ticker_realised_volatility.index) >= np.datetime64("2010-01-01")].to_numpy())))
# print(adfuller(np.log(np.square(ticker_realised_volatility.loc[pd.to_datetime(ticker_realised_volatility.index) >= np.datetime64("2010-01-01")].to_numpy()))))


parsing timezone aware datetimes is deprecated; this will raise an error in the future



(-4.612286851164431, 0.0001225989436164265, 5, 747, {'1%': -3.439134355513998, '5%': -2.865416893922985, '10%': -2.56883447171999}, 1732.8546964168786)
(-6.3115482547836415, 3.225390682347815e-08, 7, 748, {'1%': -3.43912257105195, '5%': -2.8654117005229844, '10%': -2.568831705010152}, 1726.8925837280897)
(-2.1572559483002745, 0.2221230322960543, 17, 738, {'1%': -3.4392418578608983, '5%': -2.86546426916434, '10%': -2.568859710342903}, 1597.0845007456724)
(-3.98389826338533, 0.001497211626760136, 17, 1842, {'1%': -3.4339050685840564, '5%': -2.8631103639464617, '10%': -2.56760600704281}, 4235.41665708564)
(-4.834747783676038, 4.6645615558518984e-05, 15, 2120, {'1%': -3.4334383186784434, '5%': -2.86290429532013, '10%': -2.5674962853773584}, 4886.068496592123)


In [14]:
# realised_measures = pd.read_csv("../modules/stochastic_volatility_models/stochastic_volatility_models/data/realised_volatility/oxfordman.csv", index_col=0)
# ticker_realised_volatility = pd.Series(np.sqrt(realised_measures.loc[(realised_measures.Symbol == f".{"SPX".upper()}"), "rk_parzen"]))
print(adfuller(ticker_realised_volatility.loc[(pd.to_datetime(ticker_realised_volatility.index) <= np.datetime64("2015-09-01")) & (pd.to_datetime(ticker_realised_volatility.index) >= np.datetime64("2010-07-15"))].to_numpy()))

(-4.986614555855447, 2.3587468683649575e-05, 8, 1284, {'1%': -3.4354531319230373, '5%': -2.8637935995282224, '10%': -2.5679698346531965}, -10963.554847491407)
