In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

import statsmodels.api as sm

In [2]:
SPX = pd.read_csv('data/SPX.csv', skiprows=2)
VIX = pd.read_csv('data/VIX.csv', skiprows=2)

SPX.drop(columns=["Ticker", "Open", "High", "Low"], inplace=True)
VIX.drop(columns=["Ticker", "Open", "High", "Low"], inplace=True)

In [3]:
print(SPX.shape)
print(VIX.shape)

(9159, 2)
(9211, 2)


In [4]:
df = SPX.merge(VIX, on="Date")
df.columns = ["Date", "SPX", "VIX"]

In [5]:
df["SPX_pct"] = df.SPX.pct_change(periods=1)

# We really don't want to predict changes in the VIX as much as the absolute level 
# of the VIX, since it is already an annualized volatility figure.
#df["VIX_pct"] = df.VIX.pct_change(periods=1)

In [6]:
df

Unnamed: 0,Date,SPX,VIX,SPX_pct
0,01/02/1986,209.59,18.07,
1,01/03/1986,210.88,17.96,0.006155
2,01/06/1986,210.65,17.05,-0.001091
3,01/07/1986,213.80,17.39,0.014954
4,01/08/1986,207.97,19.97,-0.027268
...,...,...,...,...
9149,04/28/2022,4287.50,29.99,0.024747
9150,04/29/2022,4131.93,33.40,-0.036285
9151,05/02/2022,4155.38,32.34,0.005675
9152,05/03/2022,4175.48,29.25,0.004837


In [7]:
df["SPX_5d_vol"] = df.SPX_pct.rolling(5).std() * np.sqrt(255) # Scale to annualized volatility
df["SPX_30d_vol"] = df.SPX_pct.rolling(30).std() * np.sqrt(255) # Scale to annualized volatility
df["SPX_60d_vol"] = df.SPX_pct.rolling(60).std() * np.sqrt(255) # Scale to annualized volatility
df["SPX_90d_vol"] = df.SPX_pct.rolling(90).std() * np.sqrt(255) # Scale to annualized volatility

# Target variable:
df["VIX_lag1"] = df["VIX"].shift(1)

In [8]:
df

Unnamed: 0,Date,SPX,VIX,SPX_pct,SPX_5d_vol,SPX_30d_vol,SPX_60d_vol,SPX_90d_vol,VIX_lag1
0,01/02/1986,209.59,18.07,,,,,,
1,01/03/1986,210.88,17.96,0.006155,,,,,18.07
2,01/06/1986,210.65,17.05,-0.001091,,,,,17.96
3,01/07/1986,213.80,17.39,0.014954,,,,,17.05
4,01/08/1986,207.97,19.97,-0.027268,,,,,17.39
...,...,...,...,...,...,...,...,...,...
9149,04/28/2022,4287.50,29.99,0.024747,0.366065,0.207317,0.227166,0.211156,31.60
9150,04/29/2022,4131.93,33.40,-0.036285,0.403561,0.228318,0.237515,0.218773,29.99
9151,05/02/2022,4155.38,32.34,0.005675,0.403518,0.225803,0.233115,0.216732,33.40
9152,05/03/2022,4175.48,29.25,0.004837,0.356066,0.226703,0.233078,0.216117,32.34


In [9]:
df = df.iloc[100:,:]
df

Unnamed: 0,Date,SPX,VIX,SPX_pct,SPX_5d_vol,SPX_30d_vol,SPX_60d_vol,SPX_90d_vol,VIX_lag1
100,05/27/1986,244.75,16.92,0.014087,0.139841,0.144246,0.147082,0.137361,16.74
101,05/28/1986,246.63,17.75,0.007681,0.138049,0.145517,0.146709,0.137414,16.92
102,05/29/1986,247.98,18.76,0.005474,0.101658,0.136191,0.146354,0.137131,17.75
103,05/30/1986,247.35,18.89,-0.002541,0.095217,0.136312,0.146566,0.136210,18.76
104,06/02/1986,245.04,18.28,-0.009339,0.145895,0.139086,0.148252,0.135732,18.89
...,...,...,...,...,...,...,...,...,...
9149,04/28/2022,4287.50,29.99,0.024747,0.366065,0.207317,0.227166,0.211156,31.60
9150,04/29/2022,4131.93,33.40,-0.036285,0.403561,0.228318,0.237515,0.218773,29.99
9151,05/02/2022,4155.38,32.34,0.005675,0.403518,0.225803,0.233115,0.216732,33.40
9152,05/03/2022,4175.48,29.25,0.004837,0.356066,0.226703,0.233078,0.216117,32.34


In [10]:
ols_5d = sm.regression.linear_model.OLS(endog = df.SPX_5d_vol, exog = df.VIX_lag1).fit()
print(ols_5d.summary())

                                 OLS Regression Results                                
Dep. Variable:             SPX_5d_vol   R-squared (uncentered):                   0.833
Model:                            OLS   Adj. R-squared (uncentered):              0.832
Method:                 Least Squares   F-statistic:                          4.500e+04
Date:                Sat, 07 May 2022   Prob (F-statistic):                        0.00
Time:                        09:58:37   Log-Likelihood:                          10309.
No. Observations:                9054   AIC:                                 -2.062e+04
Df Residuals:                    9053   BIC:                                 -2.061e+04
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [11]:
ols_30d = sm.regression.linear_model.OLS(endog = df.SPX_30d_vol, exog = df.VIX_lag1).fit()
print(ols_30d.summary())

                                 OLS Regression Results                                
Dep. Variable:            SPX_30d_vol   R-squared (uncentered):                   0.919
Model:                            OLS   Adj. R-squared (uncentered):              0.919
Method:                 Least Squares   F-statistic:                          1.025e+05
Date:                Sat, 07 May 2022   Prob (F-statistic):                        0.00
Time:                        09:58:58   Log-Likelihood:                          13725.
No. Observations:                9054   AIC:                                 -2.745e+04
Df Residuals:                    9053   BIC:                                 -2.744e+04
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [12]:
ols_60d = sm.regression.linear_model.OLS(endog = df.SPX_60d_vol, exog = df.VIX_lag1).fit()
print(ols_60d.summary())

                                 OLS Regression Results                                
Dep. Variable:            SPX_60d_vol   R-squared (uncentered):                   0.919
Model:                            OLS   Adj. R-squared (uncentered):              0.919
Method:                 Least Squares   F-statistic:                          1.030e+05
Date:                Sat, 07 May 2022   Prob (F-statistic):                        0.00
Time:                        09:59:08   Log-Likelihood:                          13771.
No. Observations:                9054   AIC:                                 -2.754e+04
Df Residuals:                    9053   BIC:                                 -2.753e+04
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [13]:
ols_90d = sm.regression.linear_model.OLS(endog = df.SPX_90d_vol, exog = df.VIX_lag1).fit()
print(ols_90d.summary())

                                 OLS Regression Results                                
Dep. Variable:            SPX_90d_vol   R-squared (uncentered):                   0.916
Model:                            OLS   Adj. R-squared (uncentered):              0.916
Method:                 Least Squares   F-statistic:                          9.837e+04
Date:                Sat, 07 May 2022   Prob (F-statistic):                        0.00
Time:                        09:59:17   Log-Likelihood:                          13594.
No. Observations:                9054   AIC:                                 -2.719e+04
Df Residuals:                    9053   BIC:                                 -2.718e+04
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------