In [1]:
# import Required Libraries
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from IPython.display import display

In [2]:
# define start and end dates based on the case
start_date = "2020-01-01"  
end_date = "2024-09-01"

# get SPY data
spy_data = yf.Ticker("SPY")
history = spy_data.history(start=start_date, end=end_date, auto_adjust=False)
spy_close = history['Close']
spy_dividends = history['Dividends']
print(history.columns)

# get VIX data
vix_data = yf.Ticker("^VIX")
history = vix_data.history(start=start_date, end=end_date, auto_adjust=False)
vix_close = history['Close']

print(vix_close)


Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends',
       'Stock Splits', 'Capital Gains'],
      dtype='object')
Date
2020-01-02 00:00:00-06:00    12.470000
2020-01-03 00:00:00-06:00    14.020000
2020-01-06 00:00:00-06:00    13.850000
2020-01-07 00:00:00-06:00    13.790000
2020-01-08 00:00:00-06:00    13.450000
                               ...    
2024-08-26 00:00:00-05:00    16.150000
2024-08-27 00:00:00-05:00    15.430000
2024-08-28 00:00:00-05:00    17.110001
2024-08-29 00:00:00-05:00    15.650000
2024-08-30 00:00:00-05:00    15.000000
Name: Close, Length: 1174, dtype: float64


In [8]:
# put in a dataframe
df = pd.DataFrame({
    'spy': spy_close,
    'div': spy_dividends
})
# display result
df


Unnamed: 0_level_0,spy,div
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-02 00:00:00-05:00,324.869995,0.0
2020-01-03 00:00:00-05:00,322.410004,0.0
2020-01-06 00:00:00-05:00,323.640015,0.0
2020-01-07 00:00:00-05:00,322.730011,0.0
2020-01-08 00:00:00-05:00,324.450012,0.0
...,...,...
2024-08-26 00:00:00-04:00,560.789978,0.0
2024-08-27 00:00:00-04:00,561.559998,0.0
2024-08-28 00:00:00-04:00,558.299988,0.0
2024-08-29 00:00:00-04:00,558.349976,0.0


In [9]:
# compute SPY Daily Returns
df["spy-1"] = df["spy"].shift(1)
df["Rt"] = (df["spy"] + df["div"])/ df["spy-1"]-1
df

Unnamed: 0_level_0,spy,div,spy-1,Rt
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-02 00:00:00-05:00,324.869995,0.0,,
2020-01-03 00:00:00-05:00,322.410004,0.0,324.869995,-0.007572
2020-01-06 00:00:00-05:00,323.640015,0.0,322.410004,0.003815
2020-01-07 00:00:00-05:00,322.730011,0.0,323.640015,-0.002812
2020-01-08 00:00:00-05:00,324.450012,0.0,322.730011,0.005330
...,...,...,...,...
2024-08-26 00:00:00-04:00,560.789978,0.0,562.130005,-0.002384
2024-08-27 00:00:00-04:00,561.559998,0.0,560.789978,0.001373
2024-08-28 00:00:00-04:00,558.299988,0.0,561.559998,-0.005805
2024-08-29 00:00:00-04:00,558.349976,0.0,558.299988,0.000090


In [5]:
# pick the first month data from the df


In [6]:






# calculate sample standard deviation by month for SPY
monthly_data = df.groupby(pd.Grouper(freq='ME'))['spy'].agg(lambda x: np.std(x, ddof=1)).to_frame(name='S')
monthly_data["S+1"] = monthly_data["S"].shift(-1)

# add VIX data to the DataFrame
monthly_vix = vix_close.resample('ME').last()  # Get VIX values at the end of each month
monthly_vix_aligned = monthly_vix.reindex(monthly_data.index, method='nearest')
monthly_data['VIX'] = monthly_vix_aligned

# display the resulting DataFrame
print(monthly_data)


                                   S        S+1        VIX
Date                                                      
2020-01-31 00:00:00-05:00   3.233973  13.365311  18.840000
2020-02-29 00:00:00-05:00  13.365311  26.782776  40.110001
2020-03-31 00:00:00-04:00  26.782776  13.246293  53.540001
2020-04-30 00:00:00-04:00  13.246293   7.232010  34.150002
2020-05-31 00:00:00-04:00   7.232010   6.227147  27.510000
2020-06-30 00:00:00-04:00   6.227147   4.785479  30.430000
2020-07-31 00:00:00-04:00   4.785479   6.250681  24.459999
2020-08-31 00:00:00-04:00   6.250681   8.624022  26.410000
2020-09-30 00:00:00-04:00   8.624022   7.182037  26.370001
2020-10-31 00:00:00-04:00   7.182037   8.938323  38.020000
2020-11-30 00:00:00-05:00   8.938323   2.433508  20.570000
2020-12-31 00:00:00-05:00   2.433508   4.820420  22.750000
2021-01-31 00:00:00-05:00   4.820420   4.807276  33.090000
2021-02-28 00:00:00-05:00   4.807276   5.571921  27.950001
2021-03-31 00:00:00-04:00   5.571921   4.988774  19.4000

In [7]:
# correlation 1 (r1): VIX and SPY Next Month's Std Dev
r1 = monthly_data['VIX'].corr(monthly_data['S+1'])
N1 = len(monthly_data)
print(f"Correlation (r1) between VIX and SPY next month's Std Dev: {r1}")
print("Number of Observations for r1 - ", N2)

# correlation 2 (r2): SPY Current and Next Month's Std Dev
r2 = monthly_data["S"].corr(monthly_data["S+1"])
N2 = len(monthly_data) - 1
print(f"Correlation (r2) between SPY current and next month's Std Dev: {r2}")
print("Number of Observations for r2 - ", N2)

Correlation (r1) between VIX and SPY next month's Std Dev: 0.4081432130091504


NameError: name 'N2' is not defined

In [None]:
# Step 8: Fisher z-Transformation to Compare Correlations
z1 = 0.5 * np.log((1 + r1) / (1 - r1))
z2 = 0.5 * np.log((1 + r2) / (1 - r2))

In [None]:
# Compute Z statistic
z_stat = (z1 - z2) / np.sqrt((1 / (N1 - 3)) + (1 / (N2 - 3)))
critical_95 = 1.960  # 95% confidence critical value

print(f"Z-Statistic for difference in correlations: {z_stat}")
if abs(z_stat) > critical_95:
    print("The difference between r1 and r2 is statistically significant.")
else:
    print("The difference between r1 and r2 is not statistically significant.")

In [None]:
# Scatter Plot: SPY Current vs Next Month Std Dev
x_values = monthly_data["S"].to_numpy()
y_values = monthly_data["S+1"].to_numpy()

plt.figure(figsize=(4, 4))
plt.plot(x_values, y_values, 'k.')
plt.xlabel('Std Dev One Month Ahead')
plt.ylabel('Std Dev')
plt.grid(True)
plt.show()

In [None]:
# Scatter Plot: VIX vs SPY Std Dev Next Month
x_values = monthly_data['VIX'].to_numpy()
y_values = monthly_data['S+1'].to_numpy()

plt.figure(figsize=(4, 4))
plt.plot(x_values, y_values, 'k.')
plt.xlabel('VIX (Current Month)')
plt.ylabel('SPY Std Dev (Next Month)')
plt.title('VIX vs SPY Volatility (Next Month)')
plt.grid(True)
plt.show()
