In [1]:
# The libriaries for this experiment were imported into the python enviroment
import os

import numpy as np
import scipy as sp
import pandas as pd

import statsmodels.api as sm

import holoviews as hv
import hvplot.pandas

import datetime

import datashader as ds
from holoviews.operation.datashader import datashade, shade, dynspread, rasterize

In [2]:
hv.extension('bokeh')
#hv.archive.auto() 

In [3]:
news_volume_path = os.path.join(".",'news_volume.csv')
news_volume = pd.read_csv(news_volume_path)
news_volume.columns = ['index','date']

In [4]:
news_volume['Articles'] = 1

In [5]:
news_volume = news_volume.groupby('date').sum().loc[:,['Articles']]

In [6]:
news_volume = news_volume.rolling(90).sum()

In [7]:
news_volume = news_volume.reset_index()

In [8]:
news_volume.date = pd.to_datetime(news_volume.date)

In [159]:
%%opts Scatter [width=800 height=400]
%%output filename="./media/News Volume" fig="png"
news_volume.hvplot.scatter(x='date', y='Articles')

In [10]:
dates_path = os.path.join(".",'12_dates.csv')
dates = pd.to_datetime(pd.read_csv(dates_path).iloc[:,1])

In [11]:
association_path = os.path.join(".",'13092018_association_risk.csv')
association = pd.read_csv(association_path)

In [12]:
association = association.iloc[1:,1:]

In [13]:
association.index = dates

In [14]:
%%opts Histogram [width=800 height=400] 

def association_plot(time):
    return (1/association.loc[dates.loc[time],:]).hvplot.hist()

hv.DynamicMap(association_plot, kdims=['time']).redim.range(time=(0,3914))

In [15]:
association = association.reset_index()

In [16]:
association_scatter = pd.melt(association, id_vars=['2003-05-16']).loc[:,['2003-05-16','value']]
association_scatter.columns = ['Time', 'Association']
association_scatter.Association = 1/association_scatter.Association

association_scatter['Time'] = pd.to_datetime(association_scatter['Time'])

In [17]:
%%opts RGB [width=800 height=400]
%%output filename="./media/Association Over Time" fig="png"
datashade(association_scatter.hvplot.scatter(x='Time', y='Association'))

In [157]:
var_path = os.path.join(".",'13092018_portfolio_var.csv')
var = pd.read_csv(var_path)
var = var.iloc[:,1:]

In [19]:
%%opts Histogram [width=800 height=400]

def var_plot(time):
    return var.iloc[time,:].hvplot.hist()

hv.DynamicMap(var_plot, kdims=['time']).redim.range(time=(0,3915))

In [20]:
var_scatter = pd.melt(var.iloc[1:,:].reset_index(), id_vars=['index']).loc[:,['index','value']]
var_scatter.columns = ['Time', 'Std']

var_scatter['Time'] = pd.Series(np.tile(dates, var.shape[1])).values

In [21]:
%%opts RGB [width=800 height=400]
%%output filename="./media/Volatility Over Time time" fig="png"

datashade(var_scatter.hvplot.scatter(x='Time', y='Std'))

# Rolling ANCOVA without constant

In [22]:
w_p_values = []
w_coefficient =[]

for i in range(association.shape[0]):
    X = (1/association.iloc[i,1:]).tolist()
    model = sm.OLS((var.iloc[i,:]).tolist(),X)
    results = model.fit()
    w_coefficient.append(results.params.tolist())
    w_p_values.append(results.pvalues.tolist())

In [23]:
%%opts Curve [width=1000 height=500] (line_width=0.5)
%%output filename="./media/P-Values without constant over time" fig="png"
w_p_values_frame = pd.DataFrame(w_p_values).iloc[1:,0]
w_p_values_frame.index = dates.iloc[1:]
w_p_values_frame.hvplot(label='ANCOVA p-value').options(line_alpha=0.2)

In [24]:
%%opts Curve [width=1000 height=500] (line_alpha=0.5 line_width=0.5)
%%output filename="./media/Coefficients without constant over time" fig="png"
w_coefficient_frame = pd.DataFrame(w_coefficient).iloc[1:,0]
w_coefficient_frame.index = dates.iloc[1:]
w_coefficient_frame.hvplot(label='ANCOVA coefficient') * \
hv.VLine(pd.to_datetime('08-08-2008')) * \
hv.Text(pd.to_datetime('06-06-2008'),0.01, "Financial Crisis", fontsize=10, rotation=90).options( color='#0066CC') *\
hv.VLine(pd.to_datetime('01-01-2016')) * \
hv.Text(pd.to_datetime('10-10-2015'),0.01, "Zuma-Gate", fontsize=10, rotation=90).options( color='orange')

# Rolling ANCOVA with constant

In [205]:
p_values = []
coefficient =[]

for i in range(association.shape[0]):
    m = association.iloc[i,1:]**-1
    
    X = sm.add_constant(m.values.tolist())
    model = sm.OLS((var.iloc[i,:]).tolist(),X)
    results = model.fit()
    coefficient.append(results.params.tolist())
    p_values.append(results.pvalues.tolist())

  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


In [206]:
%%opts Curve [width=1000 height=500 tools=['hover']] (line_width=0.5 line_alpha=0.5)
%%output filename="./media/P-Values with constant over time" fig="png"
p_values_frame = pd.DataFrame(p_values).iloc[1:,:]
p_values_frame.index = dates.iloc[1:]
p_values_frame.iloc[:,0].hvplot(label='ANCOVA constant p-value')   * \
p_values_frame.iloc[:,1].hvplot(label='ANCOVA coefficient p-value')   * \
p_values_frame.iloc[:,1].rolling(90).mean().hvplot(label='ANCOVA coefficient p-value rolling 90-day mean') *\
hv.HLine(0.05) * \
hv.Text(dates.quantile(0.5),0.1, "0.05 level of significance", fontsize=10).options( color='#0066CC')

In [207]:
%%opts Curve [width=1000 height=500] (line_alpha=0.5 line_width=0.5)
%%output filename="./media/Coefficients with constant over time" fig="png"
coefficient_frame = pd.DataFrame(coefficient).iloc[1:,:]
coefficient_frame.index = dates.iloc[1:]
coefficient_frame.iloc[:,0].hvplot(label='ANCOVA constant') * \
coefficient_frame.iloc[:,1].hvplot(label='ANCOVA coefficient') * \
hv.VLine(pd.to_datetime('08-08-2008')) * \
hv.Text(pd.to_datetime('06-06-2008'),6e-6, "Financial Crisis", fontsize=10, rotation=90).options( color='#0066CC') *\
hv.VLine(pd.to_datetime('01-01-2016')) * \
hv.Text(pd.to_datetime('10-10-2015'),6e-6, "Zuma-Gate", fontsize=10, rotation=90).options( color='orange')

# Ancova of portfolio over time with constant

In [246]:
t_p_values = []
t_coefficient =[]

for j in range(association.shape[1]-1):
    val = pd.concat([1/association.iloc[:,1+j], var.iloc[1:,j]], axis=1).dropna(0)
    
    X = sm.add_constant(val.iloc[:,0].tolist())
    model = sm.OLS((val.iloc[:,1]).tolist(),X)
    results = model.fit()
    t_coefficient.append(results.params.tolist())
    t_p_values.append(results.pvalues.tolist())

In [212]:
%%opts Histogram [width=1000 height=500] (alpha=0.5)
%%output filename="./media/Coefficeint P-value with contant accross portfolio" fig="png"
t_values_frame = pd.DataFrame(p_values).iloc[:,:]
hv.Histogram(np.histogram(t_values_frame.iloc[:,1].dropna(), bins=100), label='ANCOVA coefficient p-value')

In [213]:
%%opts Histogram [width=1000 height=500] (alpha=0.5)
%%output filename="./media/Constant P-value with contant accross portfolio" fig="png"
t_values_frame = pd.DataFrame(p_values).iloc[:,:]
hv.Histogram(np.histogram(t_values_frame.iloc[:,0].dropna(), bins=100), label='ANCOVA constant p-value')

In [214]:
%%opts Histogram [width=1000 height=500] (alpha=0.5)
%%output filename="./media/Coefficeint value with contant accross portfolio" fig="png"

t_coefficient_frame = pd.DataFrame(t_coefficient).iloc[:,:]
hv.Histogram(np.histogram(t_coefficient_frame.iloc[:,0].dropna(), bins=100), label='ANCOVA constant')

In [215]:
%%opts Histogram [width=1000 height=500] (alpha=0.5)
%%output filename="./media/Coefficeint Value with contant accross portfolio" fig="png"

hv.Histogram(np.histogram(t_coefficient_frame.iloc[:,1].dropna(), bins=100), label='ANCOVA coefficient')

# ANCOVA over time without constant

In [34]:
t_p_values = []
t_coefficient =[]

for j in range(association.shape[1]-1):
    val = pd.concat([1/association.iloc[:,1+j], var.iloc[1:,j]], axis=1).dropna(0)
    
    X = val.iloc[:,0].tolist()
    model = sm.OLS((val.iloc[:,1]).tolist(),X)
    results = model.fit()
    t_coefficient.append(results.params.tolist())
    t_p_values.append(results.pvalues.tolist())

In [35]:
%%opts Histogram [width=1000 height=500] (alpha=0.5)
%%output filename="./media/Coefficeint P-value without contant accross portfolios" fig="png"
t_values_frame = pd.DataFrame(p_values).iloc[:,:]
hv.Histogram(np.histogram(t_values_frame.iloc[:,0].dropna(), bins=100), label='ANCOVA coefficient p-value')

In [36]:
%%opts Histogram [width=1000 height=500] (alpha=0.5)
%%output filename="./media/Coefficeint Value without contant accross portfolio" fig="png"

t_coefficient_frame = pd.DataFrame(t_coefficient).iloc[:,:]
hv.Histogram(np.histogram(t_coefficient_frame.iloc[:,0].dropna(), bins=100), label='ANCOVA constant')

# Single Porfolio Example

In [261]:
val = pd.concat([1/association.iloc[:,1+500], var.iloc[1:,500]], axis=1).dropna(0)
val.columns = ['association','volatility']

X = sm.add_constant(val.iloc[:,0].tolist())
model = sm.OLS((val.iloc[:,1]).tolist(),X)
results = model.fit()
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.013
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,51.67
Date:,"Sat, 15 Sep 2018",Prob (F-statistic):,7.83e-13
Time:,23:51:49,Log-Likelihood:,16188.0
No. Observations:,3865,AIC:,-32370.0
Df Residuals:,3863,BIC:,-32360.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0082,0.000,80.098,0.000,0.008,0.008
x1,0.0083,0.001,7.188,0.000,0.006,0.011

0,1,2,3
Omnibus:,1429.476,Durbin-Watson:,0.019
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5209.937
Skew:,1.848,Prob(JB):,0.0
Kurtosis:,7.323,Cond. No.,19.6


In [264]:
%%opts Scatter [width=1000 height=500]
%%output filename="./media/Scatter Plot of 500th Portofolio" fig="png"

val.hvplot.scatter(x='association',y='volatility', label='500th Portfolio', size=3)