In [1]:
import numpy as np
import pandas as pd
from linearmodels.iv import IVGMM
from linearmodels.iv import IV2SLS
from statsmodels.api import add_constant

In [2]:
np.seterr(divide = 'ignore')

df = pd.read_csv('data/output/stocks/herding_stockMarket_2002-2022.csv')
df = df.dropna()

# add constant
df['Index'] = df.index 
df = add_constant(df, has_constant='add')

df.head()

Unnamed: 0,const,Datadate,MktRf,Smb,Hml,Rmw,Cma,Rf,Mom,Rm,AbsoluteRm,SquaredRm,Csad,LeftTail,RightTail,Index
1,1.0,2002-01-03,0.0099,0.0077,-0.004,-0.003,-0.008,7e-05,-0.0043,0.00997,0.00997,9.9e-05,0.012959,0,0,1
2,1.0,2002-01-04,0.007,0.002,0.0039,-0.0008,-0.0023,7e-05,-0.0029,0.00707,0.00707,5e-05,0.010224,0,0,2
3,1.0,2002-01-07,-0.007,-0.0024,0.0085,-0.0007,0.0016,7e-05,0.0036,-0.00693,0.00693,4.8e-05,0.001126,0,0,3
4,1.0,2002-01-08,-0.0023,0.0121,0.0029,-0.0021,-0.0001,7e-05,0.0035,-0.00223,0.00223,5e-06,0.00618,0,0,4
5,1.0,2002-01-09,-0.0045,0.0008,-0.0011,0.0022,0.0007,7e-05,0.0005,-0.00443,0.00443,2e-05,0.000681,0,0,5


In [3]:
# period splitting
df = df[(df['Datadate'] >= '2002-01-01') & (df['Datadate'] < '2023-01-01')]

## Model 1

In [4]:
res_ols_v1 = IV2SLS(
    df.Csad, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v1)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.6811
Estimator:                        OLS   Adj. R-squared:                 0.6808
No. Observations:                5035   F-statistic:                 1.075e+04
Date:                Wed, Aug 30 2023   P-value (F-stat)                0.0000
Time:                        12:44:13   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0121     0.0063     1.9021     0.0572     -0.0004      0.0245
AbsoluteRm     0.8268     0.0154     53.723     0.00

## Model 2

In [5]:
res_ols_v21 = IV2SLS(
    df.Csad, df[
        ['MktRf', 'Smb', 'Hml', 'Rmw', 'Cma', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v21)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.0061
Estimator:                        OLS   Adj. R-squared:                 0.0049
No. Observations:                5035   F-statistic:                    30.665
Date:                Wed, Aug 30 2023   P-value (F-stat)                0.0000
Time:                        12:44:14   Distribution:                  chi2(6)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
MktRf         -0.0285     0.0123    -2.3106     0.0209     -0.0526     -0.0043
Smb            0.0521     0.0235     2.2178     0.02

In [6]:
res_ols_v22 = IV2SLS(
    df.Csad, df[
        ['MktRf', 'Smb', 'Hml', 'Mom', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v22)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.0194
Estimator:                        OLS   Adj. R-squared:                 0.0185
No. Observations:                5035   F-statistic:                    99.790
Date:                Wed, Aug 30 2023   P-value (F-stat)                0.0000
Time:                        12:44:15   Distribution:                  chi2(5)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
MktRf         -0.0493     0.0117    -4.2099     0.0000     -0.0723     -0.0264
Smb            0.0639     0.0229     2.7942     0.00

## Model 3

In [7]:
df['Csad_Fund'] = df['Csad'] - res_ols_v21.resids

res_ols_v3 = IV2SLS(
    df.Csad_Fund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v3)

                            OLS Estimation Summary                            
Dep. Variable:              Csad_Fund   R-squared:                      0.4251
Estimator:                        OLS   Adj. R-squared:                 0.4247
No. Observations:                5035   F-statistic:                    3723.8
Date:                Wed, Aug 30 2023   P-value (F-stat)                0.0000
Time:                        12:44:17   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm            -0.0199     0.0007    -30.014     0.0000     -0.0212     -0.0186
AbsoluteRm     0.0040     0.0016     2.4750     0.01

In [8]:
df['Csad_Fund'] = df['Csad'] - res_ols_v22.resids

res_ols_v3 = IV2SLS(
    df.Csad_Fund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v3)

                            OLS Estimation Summary                            
Dep. Variable:              Csad_Fund   R-squared:                      0.1335
Estimator:                        OLS   Adj. R-squared:                 0.1328
No. Observations:                5035   F-statistic:                    775.83
Date:                Wed, Aug 30 2023   P-value (F-stat)                0.0000
Time:                        12:44:18   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm            -0.0196     0.0015    -13.480     0.0000     -0.0225     -0.0168
AbsoluteRm     0.0109     0.0035     3.0933     0.00

## Model 4

In [9]:
df['CSAD_nonFund'] = res_ols_v21.resids

res_ols_v4 = IV2SLS(
    df.CSAD_nonFund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v4)

                            OLS Estimation Summary                            
Dep. Variable:           CSAD_nonFund   R-squared:                      0.6856
Estimator:                        OLS   Adj. R-squared:                 0.6853
No. Observations:                5035   F-statistic:                 1.098e+04
Date:                Wed, Aug 30 2023   P-value (F-stat)                0.0000
Time:                        12:44:20   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0319     0.0063     5.0885     0.0000      0.0196      0.0442
AbsoluteRm     0.8228     0.0152     54.007     0.00

In [10]:
df['CSAD_nonFund'] = res_ols_v22.resids

res_ols_v4 = IV2SLS(
    df.CSAD_nonFund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v4)

                            OLS Estimation Summary                            
Dep. Variable:           CSAD_nonFund   R-squared:                      0.6842
Estimator:                        OLS   Adj. R-squared:                 0.6840
No. Observations:                5035   F-statistic:                 1.091e+04
Date:                Wed, Aug 30 2023   P-value (F-stat)                0.0000
Time:                        12:44:21   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0317     0.0062     5.0741     0.0000      0.0194      0.0439
AbsoluteRm     0.8159     0.0152     53.800     0.00