Implemented the research paper and tried to replicate its results 

## Importing libraries and cleaning the data

In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import seaborn as sns
import scipy
import statsmodels.api as sm
import statsmodels.formula.api as smf
sns.set(rc={'figure.figsize':(20,15),"figure.dpi":400, 'savefig.dpi':400})

In [17]:
main_df = pd.read_csv('./quantile_raw.csv')
df = main_df

In [18]:
df['dop'] = 0
df['dcl'] = 0
dfx = df.set_index('t')

In [19]:
#Creating dummy variables for monitoring opening and closing hours 
dfx['hourmin'] = dfx.index
dfx['hourmin'] = dfx['hourmin'].apply(lambda x: int(x[-5:-3])*60 + int(x[-2:]))
dop = (dfx['hourmin'] <= 599)
dfx.loc[(dop,"dop")] = 1
dcl = (dfx['hourmin']>=830)
dfx.loc[(dcl,"dcl")] = 1

In [20]:
dfx.reset_index(inplace = True)

In [74]:
dfx.fillna(method='ffill', inplace=True)
#print(dfx.isna().sum())

In [22]:
dfx['lsv']=np.log(dfx['k200_wn']+1e-12)  #1e-12 added for 0s in k200_wn 
dfx['lfv']=np.log(dfx['f_wn'])
dfx['r']= 100*dfx['k200'].pct_change()
dfx['div']=dfx['iv2'].diff()
dfx['div_atm']=dfx['iv2_atm'].diff()
dfx['div_otm']=dfx['iv2_otm'].diff()

# KEY POINTS


1) 2 parallel dataframes have been created (with and without outliers). The results published in the paper couldn't be replicated without removing the outliers from the data. Also, the paper didn't explicitly mention any outlier removal method, hence, I had to guess the outlier removal method through brute force. 

2) All the regressions have been performed on both the dataframes to show the effect of outliers on the regressions.
  This effect is particularly evident when we regress using the trading volume shocks as the independent variables


3) Outliers should not be removed as they could have been the data points where significant profits/losses could have been made and by dismissing them as such, we miss out on critical information.

In [38]:
df2 = dfx[['r','lsv','lfv','div']]
df2.dropna(inplace = True)

# The suffix 'wo' in the variable name stands for without outliers, df2wo is the data which is within 3 standard deviations 
# from the mean for r, lsv, lfv and div whereas df2 is the actual data without removing any outliers, the number 3 was arrived 
# at through brute force.

df2wo = df2     
df2wo = df2wo[np.abs(scipy.stats.zscore(df2wo) <= 3).all(axis = 1)]

In [24]:
df2wo

Unnamed: 0,r,lsv,lfv,div
1,-0.095115,23.002880,25.179739,-5.346735
2,-0.103860,22.971918,24.977438,-0.554828
3,0.112632,22.997093,25.414614,-0.829135
4,0.043271,23.266617,25.510758,0.677726
5,-0.242215,23.177030,25.338386,0.328027
...,...,...,...,...
821783,0.042368,23.218085,24.550378,0.084025
821784,0.023100,23.312958,24.685934,0.693092
821785,0.015396,23.096407,23.175673,-0.808555
821786,-0.019243,23.365862,24.184248,-0.314433


In [25]:
#Stats without outliers
stats = df2wo.describe(percentiles=[0.01,0.05,0.25,0.5,0.75,0.95,0.99],)
stats.loc['skewness'] = scipy.stats.skew(df2wo).tolist()
stats.loc['kurtosis'] = scipy.stats.kurtosis(df2wo).tolist()
stats

#Removing the outliers has a significant impact on kurtosis and skewness.

Unnamed: 0,r,lsv,lfv,div
count,809452.0,809452.0,809452.0,809452.0
mean,-0.001053,22.82654,24.433386,-0.066018
std,0.050507,0.589547,1.144096,1.463458
min,-2.121282,-27.631021,17.854607,-34.178617
1%,-0.152473,21.421981,20.750541,-4.677325
5%,-0.073787,21.883198,22.286,-2.373255
25%,-0.024807,22.432461,23.865801,-0.658826
50%,0.0,22.819652,24.580068,-0.01622
75%,0.024188,23.224618,25.191792,0.615502
95%,0.072391,23.79425,26.00138,2.171861


## Setting up the regression dataframe

In [39]:
df2_reg1 = dfx[['t','hourmin','div','r','lsv','lfv','dop','dcl']]
df2wo_reg1 = dfx[['t','hourmin','div','r','lsv','lfv','dop','dcl']]
#df2_reg1 = df2_reg1[np.abs(scipy.stats.zscore(df2_reg1[["r","lsv","lfv"]]) <= 3).all(axis = 1)]

In [40]:
df2wo_reg1 = df2wo_reg1.loc[df2wo.index]

In [41]:
df2wo_reg1

Unnamed: 0,t,hourmin,div,r,lsv,lfv,dop,dcl
1,2005-01-03 10:02,602,-5.346735,-0.095115,23.002880,25.179739,0,0
2,2005-01-03 10:03,603,-0.554828,-0.103860,22.971918,24.977438,0,0
3,2005-01-03 10:04,604,-0.829135,0.112632,22.997093,25.414614,0,0
4,2005-01-03 10:05,605,0.677726,0.043271,23.266617,25.510758,0,0
5,2005-01-03 10:06,606,0.328027,-0.242215,23.177030,25.338386,0,0
...,...,...,...,...,...,...,...,...
821783,2014-06-30 14:45,885,0.084025,0.042368,23.218085,24.550378,0,1
821784,2014-06-30 14:46,886,0.693092,0.023100,23.312958,24.685934,0,1
821785,2014-06-30 14:47,887,-0.808555,0.015396,23.096407,23.175673,0,1
821786,2014-06-30 14:48,888,-0.314433,-0.019243,23.365862,24.184248,0,1


In [43]:
df2_reg1.dropna(inplace = True)
df2wo_reg1.dropna(inplace = True)

In [44]:
#Stats with outliers
stats = df2_reg1[["div","r","lsv","lfv"]].describe(percentiles=[0.01,0.05,0.25,0.5,0.75,0.95,0.99])
stats.loc['skewness'] = scipy.stats.skew(df2_reg1[["div","r","lsv","lfv"]]).tolist()
stats.loc['kurtosis'] = scipy.stats.kurtosis(df2_reg1[["div","r","lsv","lfv"]]).tolist()
stats    

#Significant change in kurtosis of r

Unnamed: 0,div,r,lsv,lfv
count,821787.0,821787.0,821787.0,821787.0
mean,-1.8e-05,0.000127,22.837487,24.442989
std,1.63429,0.075209,0.604476,1.152198
min,-34.178617,-6.522082,-27.631021,17.854607
1%,-4.68509,-0.159251,21.424195,20.747746
5%,-2.372553,-0.074755,21.885664,22.287283
25%,-0.651657,-0.024817,22.436054,23.869834
50%,-0.006161,0.0,22.825284,24.586621
75%,0.643954,0.024694,23.233998,25.202861
95%,2.383677,0.075747,23.819702,26.031534


In [45]:
df2wo_reg1.reset_index(inplace = True, drop = True)

In [47]:
#Calculation of time trend terms
df2_reg1.rename(columns = {'t': 'time'},inplace = True)
df2_reg1['tbyT'] = df2_reg1.index/len(df2_reg1.index)
df2_reg1['t2byT2'] = df2_reg1['tbyT']**2

df2wo_reg1.rename(columns = {'t': 'time'},inplace = True)
df2wo_reg1['tbyT'] = (df2wo_reg1.index + 1)/len(df2wo_reg1.index)
df2wo_reg1['t2byT2'] = df2wo_reg1['tbyT']**2

In [50]:
#Calculation of lagged values of r, div, lsv, lfv and fs (with outliers)
df2_reg1["div1"] = df2_reg1["div"].shift(1)
df2_reg1["div2"] = df2_reg1["div"].shift(2)
df2_reg1["div3"] = df2_reg1["div"].shift(3)

df2_reg1["r1"] = df2_reg1["r"].shift(1)
df2_reg1["r2"] = df2_reg1["r"].shift(2)
df2_reg1["r3"] = df2_reg1["r"].shift(3)

df2_reg1["r1sq"] = df2_reg1["r1"]**2
df2_reg1["r2sq"] = df2_reg1["r2"]**2
df2_reg1["r3sq"] = df2_reg1["r3"]**2

df2_reg1["lsv1"] = df2_reg1["lsv"].shift(1)
df2_reg1["lsv2"] = df2_reg1["lsv"].shift(2)
df2_reg1["lsv3"] = df2_reg1["lsv"].shift(3)

df2_reg1["lfv1"] = df2_reg1["lfv"].shift(1)
df2_reg1["lfv2"] = df2_reg1["lfv"].shift(2)
df2_reg1["lfv3"] = df2_reg1["lfv"].shift(3)

df2_reg1["fs1"] = df2_reg1["lfv1"] - df2_reg1["lsv1"]
df2_reg1["fs2"] = df2_reg1["lfv2"] - df2_reg1["lsv2"]
df2_reg1["fs3"] = df2_reg1["lfv3"] - df2_reg1["lsv3"]

df2_reg1.dropna(inplace = True)

In [51]:
#Calculation of r,div, lsv, lfv and fs (without outliers)
df2wo_reg1["div1"] = df2wo_reg1["div"].shift(1)
df2wo_reg1["div2"] = df2wo_reg1["div"].shift(2)
df2wo_reg1["div3"] = df2wo_reg1["div"].shift(3)

df2wo_reg1["r1"] = df2wo_reg1["r"].shift(1)
df2wo_reg1["r2"] = df2wo_reg1["r"].shift(2)
df2wo_reg1["r3"] = df2wo_reg1["r"].shift(3)

df2wo_reg1["r1sq"] = df2wo_reg1["r1"]**2
df2wo_reg1["r2sq"] = df2wo_reg1["r2"]**2
df2wo_reg1["r3sq"] = df2wo_reg1["r3"]**2

df2wo_reg1["lsv1"] = df2wo_reg1["lsv"].shift(1)
df2wo_reg1["lsv2"] = df2wo_reg1["lsv"].shift(2)
df2wo_reg1["lsv3"] = df2wo_reg1["lsv"].shift(3)

df2wo_reg1["lfv1"] = df2wo_reg1["lfv"].shift(1)
df2wo_reg1["lfv2"] = df2wo_reg1["lfv"].shift(2)
df2wo_reg1["lfv3"] = df2wo_reg1["lfv"].shift(3)

df2wo_reg1["fs1"] = df2wo_reg1["lfv1"] - df2wo_reg1["lsv1"]
df2wo_reg1["fs2"] = df2wo_reg1["lfv2"] - df2wo_reg1["lsv2"]
df2wo_reg1["fs3"] = df2wo_reg1["lfv3"] - df2wo_reg1["lsv3"]

df2wo_reg1.dropna(inplace = True)

## Quantile regressions for equations [5], [6] and [7]

In [53]:
q_val = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9,0.95, 0.99]
import statsmodels.formula.api as smf

Due to differences in outlier removal methods applied, the quantile regression coefficients at the extreme quantiles (0.01, 0.05, 0.95, 0.99) might differ a bit.

In [54]:
#EQN 5 (with outliers)
res1 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + lsv1 + lsv2 + lsv3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res1.append(res)

In [58]:
#EQN 5 (without outliers)
reswo1 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + lsv1 + lsv2 + lsv3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo1.append(reswo)

In [56]:
#EQN 6 (with outliers)
res2 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + lfv1 + lfv2 + lfv3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res2.append(res)

In [226]:
#EQN 6 (without outliers)
reswo2 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + lfv1 + lfv2 + lfv3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo2.append(reswo)

q Value = 0.01
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1944
Model:                       QuantReg   Bandwidth:                    0.002860
Method:                 Least Squares   Sparsity:                        3.247
Date:                Wed, 06 Oct 2021   No. Observations:               809449
Time:                        05:51:56   Df Residuals:                   809435
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.1270      0.012     11.038      0.000       0.104       0.150
tbyT          -0.0933      0.006    -16.031      0.000      -0.105      -0.082
t2byT2         0.1303      0.006     23.252      0.000       0.119       0.141
dop           -0.0454      0.001    -

q Value = 0.5
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:             0.001814
Model:                       QuantReg   Bandwidth:                    0.001917
Method:                 Least Squares   Sparsity:                      0.08219
Date:                Wed, 06 Oct 2021   No. Observations:               809449
Time:                        05:53:26   Df Residuals:                   809435
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0022      0.001      1.661      0.097      -0.000       0.005
tbyT          -0.0005      0.001     -0.683      0.494      -0.002       0.001
t2byT2         0.0003      0.001      0.404      0.686      -0.001       0.002
dop            0.0002      0.000      

q Value = 0.99
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1114
Model:                       QuantReg   Bandwidth:                    0.002634
Method:                 Least Squares   Sparsity:                        2.855
Date:                Wed, 06 Oct 2021   No. Observations:               809449
Time:                        05:54:59   Df Residuals:                   809435
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0924      0.010     -8.958      0.000      -0.113      -0.072
tbyT           0.1010      0.005     19.213      0.000       0.091       0.111
t2byT2        -0.1396      0.005    -27.615      0.000      -0.150      -0.130
dop            0.0263      0.001     

In [57]:
#EQN 7 (with outliers)
res3 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + fs1 + fs2 + fs3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res3.append(res)

In [71]:
#EQN 7(without outliers)
reswo3 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + fs1 + fs2 + fs3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo3.append(reswo)

q Value = 0.01
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1918
Model:                       QuantReg   Bandwidth:                    0.002860
Method:                 Least Squares   Sparsity:                        3.351
Date:                Fri, 08 Oct 2021   No. Observations:               809446
Time:                        04:12:37   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0770      0.001    -59.813      0.000      -0.079      -0.074
tbyT          -0.1339      0.005    -24.385      0.000      -0.145      -0.123
t2byT2         0.1663      0.005     30.602      0.000       0.156       0.177
dop           -0.0517      0.001    -

q Value = 0.5
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:             0.001814
Model:                       QuantReg   Bandwidth:                    0.001917
Method:                 Least Squares   Sparsity:                      0.08214
Date:                Fri, 08 Oct 2021   No. Observations:               809446
Time:                        04:14:00   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0002      0.000      1.327      0.185   -9.92e-05       0.001
tbyT          -0.0010      0.001     -1.490      0.136      -0.002       0.000
t2byT2         0.0007      0.001      1.112      0.266      -0.001       0.002
dop            0.0002      0.000      

q Value = 0.99
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1080
Model:                       QuantReg   Bandwidth:                    0.002629
Method:                 Least Squares   Sparsity:                        2.763
Date:                Fri, 08 Oct 2021   No. Observations:               809446
Time:                        04:15:46   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0862      0.001     81.108      0.000       0.084       0.088
tbyT           0.1337      0.005     29.299      0.000       0.125       0.143
t2byT2        -0.1693      0.004    -37.769      0.000      -0.178      -0.161
dop            0.0314      0.001     

## Autoregressions for equations [8],[9]

In [59]:
from statsmodels.tsa.ar_model import AutoReg

In [62]:
# EQN 8&9 (with outliers)
mod = AutoReg(df2_reg1['lsv'], 3, old_names=False)
res = mod.fit()
df2_reg1['lsv_hat'] = res.params[0] + res.params[1]*df2_reg1['lsv1'] + res.params[2]*df2_reg1['lsv2'] + res.params[3]*df2_reg1['lsv3']
df2_reg1['lsvs'] = df2_reg1['lsv'] - df2_reg1['lsv_hat']

mod = AutoReg(df2_reg1['lfv'], 3, old_names=False)
res = mod.fit()
df2_reg1['lfv_hat'] = res.params[0] + res.params[1]*df2_reg1['lfv1'] + res.params[2]*df2_reg1['lfv2'] + res.params[3]*df2_reg1['lfv3']
df2_reg1['lfvs'] = df2_reg1['lfv'] - df2_reg1['lfv_hat']

df2_reg1['fss'] = df2_reg1['lfvs'] - df2_reg1['lsvs']

In [65]:
# EQN 8&9 (without outliers)
mod = AutoReg(df2wo_reg1['lsv'], 3, old_names=False)
res = mod.fit()
df2wo_reg1['lsv_hat'] = res.params[0] + res.params[1]*df2wo_reg1['lsv1'] + res.params[2]*df2wo_reg1['lsv2'] + res.params[3]*df2wo_reg1['lsv3']
df2wo_reg1['lsvs'] = df2wo_reg1['lsv'] - df2wo_reg1['lsv_hat']

mod = AutoReg(df2wo_reg1['lfv'], 3, old_names=False)
res = mod.fit()
df2wo_reg1['lfv_hat'] = res.params[0] + res.params[1]*df2wo_reg1['lfv1'] + res.params[2]*df2wo_reg1['lfv2'] + res.params[3]*df2wo_reg1['lfv3']
df2wo_reg1['lfvs'] = df2wo_reg1['lfv'] - df2wo_reg1['lfv_hat']

df2wo_reg1['fss'] = df2wo_reg1['lfvs'] - df2wo_reg1['lsvs']

In [68]:
#Calculation of lagged values of lsvs, lfvs and fss (with outliers)
df2_reg1["lsvs1"] = df2_reg1["lsvs"].shift(1)
df2_reg1["lsvs2"] = df2_reg1["lsvs"].shift(2)
df2_reg1["lsvs3"] = df2_reg1["lsvs"].shift(3)

df2_reg1["lfvs1"] = df2_reg1["lfvs"].shift(1)
df2_reg1["lfvs2"] = df2_reg1["lfvs"].shift(2)
df2_reg1["lfvs3"] = df2_reg1["lfvs"].shift(3)

df2_reg1["fss1"] = df2_reg1['fss'].shift(1)
df2_reg1["fss2"] = df2_reg1['fss'].shift(2)
df2_reg1["fss3"] = df2_reg1['fss'].shift(3)

df2_reg1.dropna(inplace = True)

In [67]:
#Calculation of lagged vlaues of lsvs, lfvs and fss (without outliers)
df2wo_reg1["lsvs1"] = df2wo_reg1["lsvs"].shift(1)
df2wo_reg1["lsvs2"] = df2wo_reg1["lsvs"].shift(2)
df2wo_reg1["lsvs3"] = df2wo_reg1["lsvs"].shift(3)

df2wo_reg1["lfvs1"] = df2wo_reg1["lfvs"].shift(1)
df2wo_reg1["lfvs2"] = df2wo_reg1["lfvs"].shift(2)
df2wo_reg1["lfvs3"] = df2wo_reg1["lfvs"].shift(3)

df2wo_reg1["fss1"] = df2wo_reg1['fss'].shift(1)
df2wo_reg1["fss2"] = df2wo_reg1['fss'].shift(2)
df2wo_reg1["fss3"] = df2wo_reg1['fss'].shift(3)

df2wo_reg1.dropna(inplace = True)

In [75]:
df2wo_reg1.head(5)

Unnamed: 0,time,hourmin,div,r,lsv,lfv,dop,dcl,tbyT,t2byT2,...,fss,lsvs1,lsvs2,lsvs3,lfvs1,lfvs2,lfvs3,fss1,fss2,fss3
6,2005-01-03 10:08,608,-0.054032,0.129859,23.027346,24.771089,0,0,9e-06,7.478489e-11,...,-0.418002,-0.197837,0.056284,0.286648,0.336502,0.210673,0.459398,0.534339,0.154389,0.17275
7,2005-01-03 10:09,609,-1.197316,0.069168,22.839656,23.765562,0,0,1e-05,9.767823e-11,...,-0.9494,-0.007835,-0.197837,0.056284,-0.425837,0.336502,0.210673,-0.418002,0.534339,0.154389
8,2005-01-03 10:10,610,0.29951,-0.095041,22.924941,24.326326,0,0,1.1e-05,1.23624e-10,...,-0.107569,-0.185786,-0.007835,-0.197837,-1.135186,-0.425837,0.336502,-0.9494,-0.418002,0.534339
9,2005-01-03 10:11,611,1.456779,-0.242152,22.955738,25.131353,0,0,1.2e-05,1.526222e-10,...,0.761562,0.034844,-0.185786,-0.007835,-0.072725,-1.135186,-0.425837,-0.107569,-0.9494,-0.418002
10,2005-01-03 10:12,612,-0.264052,0.034677,22.925101,24.647907,0,0,1.4e-05,1.846729e-10,...,0.053444,0.031898,0.034844,-0.185786,0.793459,-0.072725,-1.135186,0.761562,-0.107569,-0.9494


## Quantile regressions for equations [10], [11] and [12]

In [251]:
#EQN 10 (with outliers)
res4 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + lsvs1 + lsvs2 + lsvs3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res4.append(res)

q Value = 0.01
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1364
Model:                       QuantReg   Bandwidth:                    0.003196
Method:                 Least Squares   Sparsity:                        3.799
Date:                Wed, 06 Oct 2021   No. Observations:               821781
Time:                        07:00:38   Df Residuals:                   821767
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0935      0.001    -66.619      0.000      -0.096      -0.091
tbyT          -0.2123      0.007    -32.435      0.000      -0.225      -0.199
t2byT2         0.2509      0.006     38.683      0.000       0.238       0.264
dop           -0.0895      0.001    -

q Value = 0.5
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:            0.0006441
Model:                       QuantReg   Bandwidth:                    0.001925
Method:                 Least Squares   Sparsity:                      0.06817
Date:                Wed, 06 Oct 2021   No. Observations:               821781
Time:                        07:01:52   Df Residuals:                   821767
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0001      0.000     -1.231      0.218      -0.000    8.57e-05
tbyT          -0.0003      0.001     -0.539      0.590      -0.001       0.001
t2byT2         0.0002      0.001      0.398      0.691      -0.001       0.001
dop            0.0002      0.000      

q Value = 0.99
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1343
Model:                       QuantReg   Bandwidth:                    0.003463
Method:                 Least Squares   Sparsity:                        3.833
Date:                Wed, 06 Oct 2021   No. Observations:               821781
Time:                        07:03:29   Df Residuals:                   821767
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0962      0.001     68.692      0.000       0.093       0.099
tbyT           0.2153      0.007     32.667      0.000       0.202       0.228
t2byT2        -0.2563      0.007    -39.202      0.000      -0.269      -0.243
dop            0.1285      0.001    1

In [253]:
#EQN 10 (without outliers)
reswo4 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + lsvs1 + lsvs2 + lsvs3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo4.append(reswo)

q Value = 0.01
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1887
Model:                       QuantReg   Bandwidth:                    0.002860
Method:                 Least Squares   Sparsity:                        3.400
Date:                Wed, 06 Oct 2021   No. Observations:               809446
Time:                        07:10:49   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0848      0.001    -66.643      0.000      -0.087      -0.082
tbyT          -0.1613      0.006    -27.604      0.000      -0.173      -0.150
t2byT2         0.1956      0.006     33.970      0.000       0.184       0.207
dop           -0.0530      0.001    -

q Value = 0.5
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:             0.001821
Model:                       QuantReg   Bandwidth:                    0.001917
Method:                 Least Squares   Sparsity:                      0.08217
Date:                Wed, 06 Oct 2021   No. Observations:               809446
Time:                        07:12:55   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0001      0.000      0.735      0.463      -0.000       0.000
tbyT          -0.0009      0.001     -1.408      0.159      -0.002       0.000
t2byT2         0.0007      0.001      1.049      0.294      -0.001       0.002
dop            0.0002      0.000      

q Value = 0.99
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1045
Model:                       QuantReg   Bandwidth:                    0.002621
Method:                 Least Squares   Sparsity:                        2.960
Date:                Wed, 06 Oct 2021   No. Observations:               809446
Time:                        07:15:05   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0932      0.001     87.869      0.000       0.091       0.095
tbyT           0.1475      0.005     30.031      0.000       0.138       0.157
t2byT2        -0.1837      0.005    -38.086      0.000      -0.193      -0.174
dop            0.0330      0.001     

In [None]:
#EQN 11 (with outliers)
res5 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + lfvs1 + lfvs2 + lfvs3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res5.append(res)

In [254]:
#EQN 11 (without outliers)
reswo5 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + lfvs1 + lfvs2 + lfvs3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo5.append(reswo)

q Value = 0.01
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1916
Model:                       QuantReg   Bandwidth:                    0.002860
Method:                 Least Squares   Sparsity:                        3.471
Date:                Wed, 06 Oct 2021   No. Observations:               809446
Time:                        07:18:33   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0889      0.001    -71.578      0.000      -0.091      -0.086
tbyT          -0.1393      0.006    -24.249      0.000      -0.151      -0.128
t2byT2         0.1730      0.006     30.465      0.000       0.162       0.184
dop           -0.0504      0.001    -

q Value = 0.5
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:             0.001827
Model:                       QuantReg   Bandwidth:                    0.001917
Method:                 Least Squares   Sparsity:                      0.08243
Date:                Wed, 06 Oct 2021   No. Observations:               809446
Time:                        07:21:19   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept  -5.076e-05      0.000     -0.347      0.729      -0.000       0.000
tbyT          -0.0004      0.001     -0.584      0.560      -0.002       0.001
t2byT2         0.0002      0.001      0.296      0.767      -0.001       0.001
dop            0.0003      0.000      

q Value = 0.99
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1084
Model:                       QuantReg   Bandwidth:                    0.002630
Method:                 Least Squares   Sparsity:                        2.837
Date:                Wed, 06 Oct 2021   No. Observations:               809446
Time:                        07:23:39   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0954      0.001     94.137      0.000       0.093       0.097
tbyT           0.1378      0.005     29.250      0.000       0.129       0.147
t2byT2        -0.1733      0.005    -37.428      0.000      -0.182      -0.164
dop            0.0304      0.001     

In [None]:
#EQN 12 (with outliers)
res6 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + fss1 + fss2 + fss3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res6.append(res)

In [255]:
#EQN 12 (without outliers)
reswo6 = []

for q in q_val:
    mod = smf.quantreg("r ~ tbyT + t2byT2 + dop + dcl + r1 + r2 + r3 + r1sq + r2sq + r3sq + fss1 + fss2 + fss3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo6.append(reswo)

q Value = 0.01
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1912
Model:                       QuantReg   Bandwidth:                    0.002860
Method:                 Least Squares   Sparsity:                        3.276
Date:                Wed, 06 Oct 2021   No. Observations:               809446
Time:                        07:25:32   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.0884      0.001    -75.685      0.000      -0.091      -0.086
tbyT          -0.1417      0.005    -26.226      0.000      -0.152      -0.131
t2byT2         0.1753      0.005     32.817      0.000       0.165       0.186
dop           -0.0504      0.001    -

q Value = 0.5
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:             0.001825
Model:                       QuantReg   Bandwidth:                    0.001917
Method:                 Least Squares   Sparsity:                      0.08235
Date:                Wed, 06 Oct 2021   No. Observations:               809446
Time:                        07:27:57   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept  -7.846e-07      0.000     -0.005      0.996      -0.000       0.000
tbyT          -0.0005      0.001     -0.739      0.460      -0.002       0.001
t2byT2         0.0002      0.001      0.396      0.692      -0.001       0.001
dop            0.0002      0.000      

q Value = 0.99
                         QuantReg Regression Results                          
Dep. Variable:                      r   Pseudo R-squared:               0.1076
Model:                       QuantReg   Bandwidth:                    0.002627
Method:                 Least Squares   Sparsity:                        2.862
Date:                Wed, 06 Oct 2021   No. Observations:               809446
Time:                        07:30:22   Df Residuals:                   809432
                                        Df Model:                           13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0951      0.001     94.386      0.000       0.093       0.097
tbyT           0.1383      0.005     29.488      0.000       0.129       0.147
t2byT2        -0.1735      0.005    -37.624      0.000      -0.183      -0.164
dop            0.0311      0.001     

## Quantile regressions for equations [13], [14] and [15]

The quantile for which the results of equations [13], [14] and [15] have been given, isn't explicitly mentioned in the paper. Hence, I have regressed for all common quantiles(0.01, 0.05, 0.1 ......,0.95, 0.99)  

In [72]:
#EQN 13.1 (with outliers) - ```div - lsv```
res7 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + lsv1 + lsv2 + lsv3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res7.append(res)

q Value = 0.01
                         QuantReg Regression Results                          
Dep. Variable:                    div   Pseudo R-squared:               0.3352
Model:                       QuantReg   Bandwidth:                     0.07113
Method:                 Least Squares   Sparsity:                        89.11
Date:                Fri, 08 Oct 2021   No. Observations:               821781
Time:                        04:34:06   Df Residuals:                   821770
                                        Df Model:                           10
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.4278      0.709     -0.603      0.546      -1.818       0.962
tbyT          -6.8665      0.191    -35.994      0.000      -7.240      -6.493
t2byT2         8.0878      0.185     43.701      0.000       7.725       8.451
dop            0.2752      0.032     

q Value = 0.75
                         QuantReg Regression Results                          
Dep. Variable:                    div   Pseudo R-squared:               0.1121
Model:                       QuantReg   Bandwidth:                     0.03926
Method:                 Least Squares   Sparsity:                        3.300
Date:                Fri, 08 Oct 2021   No. Observations:               821781
Time:                        04:36:03   Df Residuals:                   821770
                                        Df Model:                           10
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0124      0.082      0.150      0.880      -0.149       0.174
tbyT           0.4720      0.026     18.172      0.000       0.421       0.523
t2byT2        -0.7833      0.025    -31.646      0.000      -0.832      -0.735
dop            0.0049      0.005     

In [None]:
#EQN 13.2 (without outliers) - ```div - lsv```
reswo7 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + lsv1 + lsv2 + lsv3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo7.append(reswo)

In [None]:
#EQN 13.3 (with outliers) - ```div - lsvs```
res8 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + lsvs1 + lsvs2 + lsvs3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res8.append(res)

In [None]:
#EQN 13.4 (without outliers) - ```div - lsvs```
reswo8 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + lsvs1 + lsvs2 + lsvs3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo8.append(reswo)

In [None]:
#EQN 14.1 (with outliers) - ```div - lfv```
res9 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + lfv1 + lfv2 + lfv3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res9.append(res)

In [73]:
#EQN 14.2 (without outliers) - ```div - lfv```
reswo9 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + lfv1 + lfv2 + lfv3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo9.append(reswo)

q Value = 0.01
                         QuantReg Regression Results                          
Dep. Variable:                    div   Pseudo R-squared:              0.07129
Model:                       QuantReg   Bandwidth:                     0.08287
Method:                 Least Squares   Sparsity:                        136.8
Date:                Fri, 08 Oct 2021   No. Observations:               809446
Time:                        04:39:21   Df Residuals:                   809435
                                        Df Model:                           10
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    -13.4317      0.439    -30.622      0.000     -14.291     -12.572
tbyT         -12.0523      0.257    -46.879      0.000     -12.556     -11.548
t2byT2        13.3531      0.255     52.263      0.000      12.852      13.854
dop           -0.1746      0.045     

q Value = 0.75
                         QuantReg Regression Results                          
Dep. Variable:                    div   Pseudo R-squared:              0.09069
Model:                       QuantReg   Bandwidth:                     0.03896
Method:                 Least Squares   Sparsity:                        3.132
Date:                Fri, 08 Oct 2021   No. Observations:               809446
Time:                        04:41:16   Df Residuals:                   809435
                                        Df Model:                           10
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.8922      0.044     20.370      0.000       0.806       0.978
tbyT           0.4479      0.024     18.692      0.000       0.401       0.495
t2byT2        -0.7056      0.023    -30.794      0.000      -0.751      -0.661
dop           -0.0059      0.004     

In [None]:
#EQN 14.3 (with outliers) - ```div - lfvs```
res10 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + lfvs1 + lfvs2 + lfvs3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res10.append(res)

In [None]:
#EQN 14.4 (without outliers) - ```div - lfvs```
reswo10 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + lfvs1 + lfvs2 + lfvs3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo10.append(reswo)

In [None]:
#EQN 15.1 (with outliers) - ```div - fs```
res11 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + fs1 + fs2 + fs3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res11.append(res)

In [None]:
#EQN 15.2 (without outliers) - ```div - fs```
reswo11 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + fs1 + fs2 + fs3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo11.append(reswo)

In [None]:
#EQN 15.3 (with outliers) - ```div - fss```
res12 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + fss1 + fss2 + fss3", df2_reg1)
    res = mod.fit(q = q)
    print("q Value =", q)
    print(res.summary())
    res12.append(res)

In [None]:
#EQN 15.4 (without outliers) - ```div - fss```
reswo12 = []

for q in q_val:
    mod = smf.quantreg("div ~ tbyT + t2byT2 + dop + dcl + div1 + div2 + div3 + fss1 + fss2 + fss3", df2wo_reg1)
    reswo = mod.fit(q = q)
    print("q Value =", q)
    print(reswo.summary())
    reswo12.append(reswo)

Div_atm, div_otm can also be used as dependent variables in the quantile regressions for equations [13], [14] and [15] instead of div. 