In [1]:
import numpy as np
import pandas as pd
from linearmodels.iv import IVGMM
from linearmodels.iv import IV2SLS
from statsmodels.api import add_constant

In [2]:
np.seterr(divide = 'ignore')

df = pd.read_csv('data/output/stocks/herding_stockMarket_2002-2022.csv')
df = df.dropna()

# add constant
df = add_constant(df, has_constant='add')

df.head()

Unnamed: 0,const,Datadate,MktRf,Smb,Hml,Rmw,Cma,Rf,Mom,Rm,AbsoluteRm,SquaredRm,Csad,LeftTail,RightTail
1,1.0,2002-01-03,0.0099,0.0077,-0.004,-0.003,-0.008,7e-05,-0.0043,0.00997,0.00997,9.9e-05,0.012651,0,0
2,1.0,2002-01-04,0.007,0.002,0.0039,-0.0008,-0.0023,7e-05,-0.0029,0.00707,0.00707,5e-05,0.009823,0,0
3,1.0,2002-01-07,-0.007,-0.0024,0.0085,-0.0007,0.0016,7e-05,0.0036,-0.00693,0.00693,4.8e-05,0.001171,0,0
4,1.0,2002-01-08,-0.0023,0.0121,0.0029,-0.0021,-0.0001,7e-05,0.0035,-0.00223,0.00223,5e-06,0.005999,0,0
5,1.0,2002-01-09,-0.0045,0.0008,-0.0011,0.0022,0.0007,7e-05,0.0005,-0.00443,0.00443,2e-05,0.000605,0,0


In [3]:
# period splitting
df = df[(df['Datadate'] >= '2002-01-01') & (df['Datadate'] < '2023-01-01')]

## Model 1

In [4]:
res_ols_v1 = IV2SLS(
    df.Csad, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v1)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.6853
Estimator:                        OLS   Adj. R-squared:                 0.6851
No. Observations:                5139   F-statistic:                 1.119e+04
Date:                Mon, Aug 21 2023   P-value (F-stat)                0.0000
Time:                        19:00:57   Distribution:                  chi2(3)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0125     0.0062     2.0193     0.0435      0.0004      0.0247
AbsoluteRm     0.8059     0.0150     53.586     0.00

## Model 2

In [5]:
res_ols_v2 = IV2SLS(
    df.Csad, df[
        ['MktRf', 'Smb', 'Hml', 'Rmw', 'Cma', 'Rf', 'Mom', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v2)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.0331
Estimator:                        OLS   Adj. R-squared:                 0.0318
No. Observations:                5139   F-statistic:                    175.89
Date:                Mon, Aug 21 2023   P-value (F-stat)                0.0000
Time:                        19:00:57   Distribution:                  chi2(7)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
MktRf         -0.0484     0.0121    -4.0025     0.0001     -0.0721     -0.0247
Smb            0.0582     0.0229     2.5362     0.01

## Model 3

In [6]:
df['Csad_Fund'] = df['Csad'] - res_ols_v2.resids

res_ols_v3 = IV2SLS(
    df.Csad_Fund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v3)

                            OLS Estimation Summary                            
Dep. Variable:              Csad_Fund   R-squared:                      0.0337
Estimator:                        OLS   Adj. R-squared:                 0.0332
No. Observations:                5139   F-statistic:                    179.38
Date:                Mon, Aug 21 2023   P-value (F-stat)                0.0000
Time:                        19:00:57   Distribution:                  chi2(3)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm            -0.0227     0.0020    -11.503     0.0000     -0.0266     -0.0189
AbsoluteRm     0.0251     0.0048     5.2346     0.00

## Model 4

In [7]:
df['CSAD_nonFund'] = res_ols_v2.resids

res_ols_v4 = IV2SLS(
    df.Csad_Fund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v4)

                            OLS Estimation Summary                            
Dep. Variable:              Csad_Fund   R-squared:                      0.0337
Estimator:                        OLS   Adj. R-squared:                 0.0332
No. Observations:                5139   F-statistic:                    179.38
Date:                Mon, Aug 21 2023   P-value (F-stat)                0.0000
Time:                        19:00:57   Distribution:                  chi2(3)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm            -0.0227     0.0020    -11.503     0.0000     -0.0266     -0.0189
AbsoluteRm     0.0251     0.0048     5.2346     0.00