In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.stats.api as sms
from statsmodels.stats.diagnostic import normal_ad
from statsmodels.stats.outliers_influence import variance_inflation_factor
from stargazer.stargazer import Stargazer
from IPython.core.display import HTML
from IPython.core.display import Latex
from statsmodels.iolib.summary2 import summary_col

In [None]:
#read data about chicago weather. Has 4 non null columns: wind,precipitation,snowfall,temp
dfCHI = pd.read_csv("/Users/rohitk/Documents/Econ422Data/ChicagoWeather.csv")
dfCHI = dfCHI.drop(columns=['WESD','STATION','NAME'])
dfCHI = dfCHI.dropna()
dfCHI = dfCHI.groupby(['DATE']).mean()
dfCHI.index = pd.to_datetime(dfCHI.index)

In [None]:
#read data about nyc weather. Has 4 non null columns: wind,precipitation,snowfall,temp
dfNYC = pd.read_csv("/Users/rohitk/Documents/Econ422Data/NYCWeather.csv")
dfNYC = dfNYC.drop(columns=['PSUN','WESD','STATION','NAME'])
dfNYC = dfNYC.dropna()
dfNYC = dfNYC.groupby(['DATE']).mean()
dfNYC.index = pd.to_datetime(dfNYC.index)

In [None]:
#read data about SPY historical volumes
dfSPY = pd.read_csv("/Users/rohitk/Documents/Econ422Data/SPYvolumes5Y.csv")
dfSPY.index = pd.to_datetime(dfSPY['Date'])
dfSPY = dfSPY[[' Volume']]

In [None]:
#Join tables by date
SPYandNYC = pd.merge(dfNYC,dfSPY, how='inner',left_index=True,right_index=True)
SPYandCHI = pd.merge(dfCHI,dfSPY, how='inner',left_index=True,right_index=True)

In [None]:
#SPY and NYC analysis
x = SPYandNYC[['AWND', 'PRCP','SNOW','TAVG']]
x1 = SPYandNYC[['AWND','PRCP','TAVG']]
x2 = SPYandNYC[['PRCP','TAVG']]
x3 = SPYandNYC[['TAVG']]
y = SPYandNYC[[' Volume']]

vif = pd.DataFrame()
vif["variables"] = x.columns
vif["VIF"] = [variance_inflation_factor(x.values, i) for i in range(x.shape[1])]
display(vif)

x = sm.add_constant(x)
x1 = sm.add_constant(x1)
x2 = sm.add_constant(x2)
x3 = sm.add_constant(x3)

model = sm.OLS(y,x).fit()
model1 = sm.OLS(y,x1).fit()
model2 = sm.OLS(y,x2).fit()
model3 = sm.OLS(y,x3).fit()
test = sms.het_white(model.resid,model.model.exog)
print("White test: " + str(test))

stargazer = Stargazer([model, model1,model2,model3])
stargazer.title('SPY Volume and NYC Weather')
stargazer.custom_columns(['Full model','Omit Snowfall','Omit Snowfall and Wind','Only Temperature'],[1,1,1,1])

HTML(stargazer.render_html())
#Latex(stargazer.render_latex())

In [None]:
#SPY and CHI analysis
x = SPYandCHI[['AWND', 'PRCP','SNOW','TAVG']]
x1 = SPYandCHI[['AWND','SNOW','TAVG']]
x2 = SPYandCHI[['SNOW','TAVG']]
x3 = SPYandCHI[['TAVG']]
y = SPYandCHI[[' Volume']]

vif = pd.DataFrame()
vif["variables"] = x.columns
vif["VIF"] = [variance_inflation_factor(x.values, i) for i in range(x.shape[1])]
display(vif)

x = sm.add_constant(x)
x1 = sm.add_constant(x1)
x2 = sm.add_constant(x2)
x3 = sm.add_constant(x3)

model = sm.OLS(y,x).fit()
model1 = sm.OLS(y,x1).fit()
model2 = sm.OLS(y,x2).fit()
model3 = sm.OLS(y,x3).fit()
test = sms.het_white(model.resid,model.model.exog)
print("White test: " + str(test))

stargazer = Stargazer([model, model1,model2,model3])
stargazer.title('SPY Volume and CHI Weather')
stargazer.custom_columns(['Full model','Omit Precipitation','Omit Precipitation and Wind','Only Temperature'],[1,1,1,1])

HTML(stargazer.render_html())

In [None]:
#define figure size
fig = plt.figure(figsize=(12,8))

#produce regression plots
fig = sm.graphics.plot_regress_exog(model, 'PRCP', fig=fig)
#plt.savefig('PRCPRegressionDiag.png')

In [None]:
correlation = SPYandNYC.corr(method ='pearson')
correlation_map = np.corrcoef(np.array(SPYandNYC),rowvar=False)
heatmap = sns.heatmap(correlation_map,cbar=True, annot=True, square=True, fmt='.2f',yticklabels=SPYandNYC.columns,xticklabels=SPYandNYC.columns)
plt.title('NYC weather and SPY Volume Correlation Matrix')
#plt.savefig('NYCandSPYCorr.png')

In [None]:
correlation = SPYandCHI.corr(method ='pearson')
correlation_map = np.corrcoef(np.array(SPYandCHI),rowvar=False)
heatmap = sns.heatmap(correlation_map,cbar=True, annot=True, square=True, fmt='.2f',yticklabels=SPYandCHI.columns,xticklabels=SPYandCHI.columns)
plt.title('Chicago weather and SPY Volume Correlation Matrix')
#plt.savefig('CHIandSPYCorr.png')

In [None]:
plt.figure(figsize=[12,8])
plt.title("SPY daily trading volumes")
plt.plot(dfSPY)
#plt.savefig('SPYDailyVolumes.png')

In [None]:
plt.figure(figsize=[12,8])
plt.title("New York City Daily Snowfall")
plt.plot(dfNYC['SNOW'])
#plt.savefig('NYCSnow.png')

In [None]:
plt.figure(figsize=[12,8])
plt.title("Chicago Daily Snowfall")
plt.plot(dfCHI['SNOW'])
#plt.savefig('CHISnow.png')

In [None]:
pval = normal_ad(model.resid)
print("AD-test pval: " + str(pval))
plt.subplots(figsize=(12, 6))
plt.title('Distribution of Residuals Chicago')
sns.distplot([model.resid])
#plt.savefig('CHIresid.png')