In [1]:
import numpy as np
import pandas as pd
from linearmodels.iv import IVGMM
from linearmodels.iv import IV2SLS
from statsmodels.api import add_constant

In [2]:
np.seterr(divide = 'ignore')

df = pd.read_csv('data/output/stocks/herding_stockMarket_2002-2022.csv')
df = df.dropna()

# add constant
df['Index'] = df.index 
df = add_constant(df, has_constant='add')

df.head()

Unnamed: 0,const,Datadate,MktRf,Smb,Hml,Rmw,Cma,Rf,Mom,Rm,AbsoluteRm,SquaredRm,Csad,LeftTail,RightTail,Index
1,1.0,2002-01-03,0.0099,0.0077,-0.004,-0.003,-0.008,7e-05,-0.0043,0.00997,0.00997,9.9e-05,0.012651,0,0,1
2,1.0,2002-01-04,0.007,0.002,0.0039,-0.0008,-0.0023,7e-05,-0.0029,0.00707,0.00707,5e-05,0.009823,0,0,2
3,1.0,2002-01-07,-0.007,-0.0024,0.0085,-0.0007,0.0016,7e-05,0.0036,-0.00693,0.00693,4.8e-05,0.001171,0,0,3
4,1.0,2002-01-08,-0.0023,0.0121,0.0029,-0.0021,-0.0001,7e-05,0.0035,-0.00223,0.00223,5e-06,0.005999,0,0,4
5,1.0,2002-01-09,-0.0045,0.0008,-0.0011,0.0022,0.0007,7e-05,0.0005,-0.00443,0.00443,2e-05,0.000605,0,0,5


In [3]:
# period splitting
df = df[(df['Datadate'] >= '2002-01-01') & (df['Datadate'] < '2023-01-01')]

## Model 1

In [4]:
res_ols_v1 = IV2SLS(
    df.Csad, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v1)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.6927
Estimator:                        OLS   Adj. R-squared:                 0.6924
No. Observations:                5139   F-statistic:                 1.158e+04
Date:                Thu, Aug 24 2023   P-value (F-stat)                0.0000
Time:                        20:10:18   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0118     0.0061     1.9237     0.0544     -0.0002      0.0238
AbsoluteRm     0.8149     0.0149     54.752     0.00

## Model 2

In [5]:
res_ols_v21 = IV2SLS(
    df.Csad, df[
        ['MktRf', 'Smb', 'Hml', 'Rmw', 'Cma', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v21)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.0073
Estimator:                        OLS   Adj. R-squared:                 0.0062
No. Observations:                5139   F-statistic:                    37.916
Date:                Thu, Aug 24 2023   P-value (F-stat)                0.0000
Time:                        20:10:18   Distribution:                  chi2(6)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
MktRf         -0.0314     0.0121    -2.5919     0.0095     -0.0552     -0.0077
Smb            0.0572     0.0233     2.4613     0.01

In [6]:
res_ols_v22 = IV2SLS(
    df.Csad, df[
        ['MktRf', 'Smb', 'Hml', 'Mom', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v22)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.0220
Estimator:                        OLS   Adj. R-squared:                 0.0210
No. Observations:                5139   F-statistic:                    115.54
Date:                Thu, Aug 24 2023   P-value (F-stat)                0.0000
Time:                        20:10:18   Distribution:                  chi2(5)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
MktRf         -0.0555     0.0115    -4.8250     0.0000     -0.0781     -0.0330
Smb            0.0621     0.0226     2.7476     0.00

## Model 3

In [7]:
df['Csad_Fund'] = df['Csad'] - res_ols_v21.resids

res_ols_v3 = IV2SLS(
    df.Csad_Fund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v3)

                            OLS Estimation Summary                            
Dep. Variable:              Csad_Fund   R-squared:                      0.5895
Estimator:                        OLS   Adj. R-squared:                 0.5891
No. Observations:                5139   F-statistic:                    7378.5
Date:                Thu, Aug 24 2023   P-value (F-stat)                0.0000
Time:                        20:10:18   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm            -0.0236     0.0006    -38.953     0.0000     -0.0248     -0.0224
AbsoluteRm     0.0048     0.0015     3.2567     0.00

In [8]:
df['Csad_Fund'] = df['Csad'] - res_ols_v22.resids

res_ols_v3 = IV2SLS(
    df.Csad_Fund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v3)

                            OLS Estimation Summary                            
Dep. Variable:              Csad_Fund   R-squared:                      0.1974
Estimator:                        OLS   Adj. R-squared:                 0.1968
No. Observations:                5139   F-statistic:                    1264.0
Date:                Thu, Aug 24 2023   P-value (F-stat)                0.0000
Time:                        20:10:18   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm            -0.0233     0.0015    -15.887     0.0000     -0.0262     -0.0204
AbsoluteRm     0.0118     0.0036     3.3138     0.00

## Model 4

In [9]:
df['CSAD_nonFund'] = res_ols_v21.resids

res_ols_v4 = IV2SLS(
    df.CSAD_nonFund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v4)

                            OLS Estimation Summary                            
Dep. Variable:           CSAD_nonFund   R-squared:                      0.6965
Estimator:                        OLS   Adj. R-squared:                 0.6963
No. Observations:                5139   F-statistic:                 1.179e+04
Date:                Thu, Aug 24 2023   P-value (F-stat)                0.0000
Time:                        20:10:18   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0354     0.0061     5.8343     0.0000      0.0235      0.0473
AbsoluteRm     0.8101     0.0147     54.973     0.00

In [10]:
df['CSAD_nonFund'] = res_ols_v22.resids

res_ols_v4 = IV2SLS(
    df.CSAD_nonFund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v4)

                            OLS Estimation Summary                            
Dep. Variable:           CSAD_nonFund   R-squared:                      0.6955
Estimator:                        OLS   Adj. R-squared:                 0.6952
No. Observations:                5139   F-statistic:                 1.174e+04
Date:                Thu, Aug 24 2023   P-value (F-stat)                0.0000
Time:                        20:10:18   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0351     0.0060     5.8211     0.0000      0.0233      0.0469
AbsoluteRm     0.8031     0.0147     54.810     0.00