# Quantitative Trading Strategies Final Project Draft
### Sean Lin (STUDENT ID) | Thomas McDonnell (STUDENT ID) | Ben Panovich (12365148) | Madison Rusch (12365298)

## Data Collection

In [234]:
import pandas as pd
import datetime as dt
from enum import Enum
import numpy as np
import matplotlib.pyplot as plt 
import functools
import quandl
import statsmodels.regression.linear_model as linreg
import requests

@functools.lru_cache()
def fetch_quandl_table(table, begin_date = None, end_date = None):
    qdata = quandl.get_table(table,
                      date = { 'gte': begin_date, 'lte': end_date },
                      # qopts = {"columns":["date", "adj_close"]},
                      # ticker = ticker,
                      api_key='bXgDCzJUfS5ZxasswLcN',
                      paginate=True)
    return qdata

@functools.lru_cache()
def fetch_quandl(asset, begin_date = None, end_date = None, api_key = 'bXgDCzJUfS5ZxasswLcN'):
    qdata = quandl.get(asset,
                      start_date = begin_date,
                      end_date = end_date,
                      api_key= api_key,
                      paginate=True)
    return qdata

@functools.lru_cache()
def fetch_options_price(option, date = '2014-01-01', end_date = '2022-12-31'):
    data_url = f'https://api.polygon.io/v1/open-close/{option}/{date}?adjusted=true&apiKey=cIrLrp5MiBJNGpjRm4hv7hoSbNVirkxx'
    # data_url = f'https://api.polygon.io/v3/snapshot/options/{ticker}?apiKey=cIrLrp5MiBJNGpjRm4hv7hoSbNVirkxx'
    # data_url = data_url + f'&expiration_data.gte={begin_date}'
    # data_url = data_url + f'&expiration_data.lte={end_date}'
    response = requests.get(data_url)
    if response.status_code == 200:
        return response.json()
    else:
        return response.status_code
    

In [235]:
# Get VIX close (this will be Y in our regression)

VIX_data = pd.read_csv('../Final Project Data/VIX_History.csv')
VIX_data['DATE'] = pd.to_datetime(VIX_data['DATE'])
VIX_data = VIX_data[VIX_data['DATE'] > '2013-12-31'].set_index('DATE').drop(columns=['OPEN', 'HIGH', 'LOW']).rename(columns={'CLOSE': 'VIX'})
VIX_data = VIX_data[VIX_data.index < '2023-01-01']
display(VIX_data)

Unnamed: 0_level_0,VIX
DATE,Unnamed: 1_level_1
2014-01-02,14.23
2014-01-03,13.76
2014-01-06,13.55
2014-01-07,12.92
2014-01-08,12.87
...,...
2022-12-23,20.87
2022-12-27,21.65
2022-12-28,22.14
2022-12-29,21.44


In [236]:
# Grab (and filter) event data from FxStreet

event_data = pd.read_csv('../Final Project Data/Economic Events.csv')
event_data['DATE'] = pd.to_datetime(event_data['Start']).dt.date
event_data = event_data.set_index('DATE')

display(event_data.head())

Unnamed: 0_level_0,Id,Start,Name,Impact,Currency
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-03,5eb8c082-f21f-48cf-97ee-6deeef147743,01/03/2014 19:30:00,Former Fed's Chair Bernanke speech,HIGH,USD
2014-01-06,772b7026-8faf-4daa-a1d6-09f73e8b8bc8,01/06/2014 15:00:00,ISM Services PMI,HIGH,USD
2014-01-06,9baa22c0-ec0f-408a-83d6-f15402804ea5,01/06/2014 22:30:00,Fed Chairman Nomination Vote,HIGH,USD
2014-01-08,21996eee-50f9-4449-9e63-ca90b527e1d3,01/08/2014 19:00:00,FOMC Minutes,HIGH,USD
2014-01-10,0c37fc76-b9d5-46e2-9030-32f6d5e4dc51,01/10/2014 13:30:00,Nonfarm Payrolls,HIGH,USD


In [237]:
# Set event column to True if there's an event, False otherwise

idx = pd.date_range('2014-01-01', '2022-12-31')
VIX_data = VIX_data.reindex(idx, fill_value=None)
VIX_data['Event'] = np.where(VIX_data.index.isin(event_data.index.values), True, False)
VIX_data['Days until Event'] = np.where(VIX_data['Event'] == True, 0, VIX_data.groupby((VIX_data['Event'] == True).cumsum()).cumcount(ascending=False)+1)


# TODO: do we want weekends? remove this if so, and change fill_value above to .ffill()
VIX_data = VIX_data.dropna(subset=['VIX'])
display(VIX_data.head(15))

Unnamed: 0,VIX,Event,Days until Event
2014-01-02,14.23,False,1
2014-01-03,13.76,True,0
2014-01-06,13.55,True,0
2014-01-07,12.92,False,1
2014-01-08,12.87,True,0
2014-01-09,12.89,False,1
2014-01-10,12.14,True,0
2014-01-13,13.28,False,3
2014-01-14,12.28,False,2
2014-01-15,12.28,False,1


In [238]:
# Grab the VIX data and shift it for regression purposes

VIX_data['VIX Shifted'] = VIX_data['VIX'].shift(1)
display(VIX_data)

Unnamed: 0,VIX,Event,Days until Event,VIX Shifted
2014-01-02,14.23,False,1,
2014-01-03,13.76,True,0,14.23
2014-01-06,13.55,True,0,13.76
2014-01-07,12.92,False,1,13.55
2014-01-08,12.87,True,0,12.92
...,...,...,...,...
2022-12-23,20.87,True,0,21.97
2022-12-27,21.65,False,5,20.87
2022-12-28,22.14,False,4,21.65
2022-12-29,21.44,False,3,22.14


In [239]:
# Consumer Sentiment (University of Michigan Consumer Survey, Index of Consumer Sentiment)
# https://data.nasdaq.com/data/UMICH/SOC1-university-of-michigan-consumer-surveyindex-of-consumer-sentiment
# NOTE: this is monthly data, so I forward filled
consumer_sentiment_data = fetch_quandl('UMICH/SOC1', begin_date='2013-12-01', end_date='2022-12-31')
VIX_data['Consumer Sentiment Index'] = consumer_sentiment_data
VIX_data['Consumer Sentiment Index'] = VIX_data['Consumer Sentiment Index'].fillna(method='ffill')
# Fill in the first month with Dec 2013 Index value
VIX_data['Consumer Sentiment Index'] = VIX_data['Consumer Sentiment Index'].fillna(value=consumer_sentiment_data.iloc[0].Index)
display(VIX_data)

Unnamed: 0,VIX,Event,Days until Event,VIX Shifted,Consumer Sentiment Index
2014-01-02,14.23,False,1,,82.5
2014-01-03,13.76,True,0,14.23,82.5
2014-01-06,13.55,True,0,13.76,82.5
2014-01-07,12.92,False,1,13.55,82.5
2014-01-08,12.87,True,0,12.92,82.5
...,...,...,...,...,...
2022-12-23,20.87,True,0,21.97,56.8
2022-12-27,21.65,False,5,20.87,56.8
2022-12-28,22.14,False,4,21.65,56.8
2022-12-29,21.44,False,3,22.14,56.8


In [248]:
# Fetch Options chain for each expiry day
options = ['O:SPY221215P00400000', 'O:SPY221215P00350000', 'O:SPY221216P00400000', 'O:SPY221216P00350000']
dates = pd.date_range('2022-12-01', '2022-12-15')
dates = [date.strftime('%Y-%m-%d') for date in dates]
options_price_data = pd.DataFrame(index=pd.date_range('2014-01-01', '2022-12-31'), columns=options)
for option in options:
    for date in dates:
        result = fetch_options_price(option, date=date)
        if not isinstance(result, int):
            options_price_data.loc[date, option] = result['close']
display(options_price_data[options_price_data.index > '2022-12-01'])

Unnamed: 0,O:SPY221215P00400000,O:SPY221215P00350000,O:SPY221216P00400000,O:SPY221216P00350000
2022-12-02,3.48,0.1,4.37,0.12
2022-12-03,,,,
2022-12-04,,,,
2022-12-05,6.26,0.12,7.4,0.15
2022-12-06,9.48,0.2,10.86,0.22
2022-12-07,10.05,0.14,11.42,0.2
2022-12-08,7.98,0.1,9.12,0.14
2022-12-09,9.57,0.09,10.85,0.13
2022-12-10,,,,
2022-12-11,,,,


In [242]:
# Get historical 10-day volatilities
'''
SPY: Tracks S&P 500
IVW: Tracks growth stocks in S&P 500
RSP: Equally weights S&P 500
IVV: iShares S&P 500
'''
ETFS = ['VOL/SPY', 'VOL/IVW', 'VOL/RSP', 'VOL/IVV']
for ticker in ETFS:
    historical_vol = fetch_quandl('VOL/SPY', begin_date='2014-01-01', end_date='2022-12-31', api_key='iJQ34VBCfaVGdxiuTpSv')
    ticker = ticker[4:]
    VIX_data[f'{ticker} Vol'] = historical_vol['Hv10']
display(VIX_data)

Unnamed: 0,VIX,Event,Days until Event,VIX Shifted,Consumer Sentiment Index,SPY Vol,IVW Vol,RSP Vol,IVV Vol
2014-01-02,14.23,False,1,,82.5,0.0834,0.0834,0.0834,0.0834
2014-01-03,13.76,True,0,14.23,82.5,0.0780,0.0780,0.0780,0.0780
2014-01-06,13.55,True,0,13.76,82.5,0.0787,0.0787,0.0787,0.0787
2014-01-07,12.92,False,1,13.55,82.5,0.0820,0.0820,0.0820,0.0820
2014-01-08,12.87,True,0,12.92,82.5,0.0821,0.0821,0.0821,0.0821
...,...,...,...,...,...,...,...,...,...
2022-12-23,20.87,True,0,21.97,56.8,0.2211,0.2211,0.2211,0.2211
2022-12-27,21.65,False,5,20.87,56.8,0.1781,0.1781,0.1781,0.1781
2022-12-28,22.14,False,4,21.65,56.8,0.1668,0.1668,0.1668,0.1668
2022-12-29,21.44,False,3,22.14,56.8,0.1987,0.1987,0.1987,0.1987
