In [11]:
import numpy as np
import pandas as pd
from linearmodels.iv import IVGMM
from linearmodels.iv import IV2SLS
from statsmodels.api import add_constant

In [12]:
np.seterr(divide = 'ignore')

df = pd.read_csv('data/output/stocks/herding_stockMarket_2002-2022.csv')
df = df.dropna()

# add constant
df['Index'] = df.index 
df = add_constant(df, has_constant='add')

df.head()

Unnamed: 0,const,Datadate,MktRf,Smb,Hml,Rmw,Cma,Rf,Mom,Rm,AbsoluteRm,SquaredRm,Csad,LeftTail,RightTail,Index
1,1.0,2002-01-03,0.0099,0.0077,-0.004,-0.003,-0.008,7e-05,-0.0043,0.00997,0.00997,9.9e-05,0.012959,0,0,1
2,1.0,2002-01-04,0.007,0.002,0.0039,-0.0008,-0.0023,7e-05,-0.0029,0.00707,0.00707,5e-05,0.010224,0,0,2
3,1.0,2002-01-07,-0.007,-0.0024,0.0085,-0.0007,0.0016,7e-05,0.0036,-0.00693,0.00693,4.8e-05,0.001126,0,0,3
4,1.0,2002-01-08,-0.0023,0.0121,0.0029,-0.0021,-0.0001,7e-05,0.0035,-0.00223,0.00223,5e-06,0.00618,0,0,4
5,1.0,2002-01-09,-0.0045,0.0008,-0.0011,0.0022,0.0007,7e-05,0.0005,-0.00443,0.00443,2e-05,0.000681,0,0,5


In [13]:
# period splitting
df = df[(df['Datadate'] >= '2002-01-01') & (df['Datadate'] < '2023-01-01')]

## Model 1

In [14]:
res_ols_v1 = IV2SLS(
    df.Csad, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v1)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.6815
Estimator:                        OLS   Adj. R-squared:                 0.6812
No. Observations:                5139   F-statistic:                   1.1e+04
Date:                Tue, Aug 29 2023   P-value (F-stat)                0.0000
Time:                        15:14:51   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0145     0.0062     2.3268     0.0200      0.0023      0.0268
AbsoluteRm     0.8265     0.0152     54.484     0.00

## Model 2

In [15]:
res_ols_v21 = IV2SLS(
    df.Csad, df[
        ['MktRf', 'Smb', 'Hml', 'Rmw', 'Cma', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v21)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.0079
Estimator:                        OLS   Adj. R-squared:                 0.0067
No. Observations:                5139   F-statistic:                    40.736
Date:                Tue, Aug 29 2023   P-value (F-stat)                0.0000
Time:                        15:14:51   Distribution:                  chi2(6)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
MktRf         -0.0275     0.0121    -2.2690     0.0233     -0.0513     -0.0037
Smb            0.0564     0.0233     2.4220     0.01

In [16]:
res_ols_v22 = IV2SLS(
    df.Csad, df[
        ['MktRf', 'Smb', 'Hml', 'Mom', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v22)

                            OLS Estimation Summary                            
Dep. Variable:                   Csad   R-squared:                      0.0219
Estimator:                        OLS   Adj. R-squared:                 0.0210
No. Observations:                5139   F-statistic:                    115.11
Date:                Tue, Aug 29 2023   P-value (F-stat)                0.0000
Time:                        15:14:51   Distribution:                  chi2(5)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
MktRf         -0.0518     0.0115    -4.4921     0.0000     -0.0743     -0.0292
Smb            0.0612     0.0226     2.7060     0.00

## Model 3

In [17]:
df['Csad_Fund'] = df['Csad'] - res_ols_v21.resids

res_ols_v3 = IV2SLS(
    df.Csad_Fund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v3)

                            OLS Estimation Summary                            
Dep. Variable:              Csad_Fund   R-squared:                      0.6235
Estimator:                        OLS   Adj. R-squared:                 0.6232
No. Observations:                5139   F-statistic:                    8509.8
Date:                Tue, Aug 29 2023   P-value (F-stat)                0.0000
Time:                        15:14:51   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm            -0.0204     0.0006    -33.948     0.0000     -0.0216     -0.0193
AbsoluteRm     0.0046     0.0015     3.1506     0.00

In [18]:
df['Csad_Fund'] = df['Csad'] - res_ols_v22.resids

res_ols_v3 = IV2SLS(
    df.Csad_Fund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v3)

                            OLS Estimation Summary                            
Dep. Variable:              Csad_Fund   R-squared:                      0.2249
Estimator:                        OLS   Adj. R-squared:                 0.2243
No. Observations:                5139   F-statistic:                    1491.1
Date:                Tue, Aug 29 2023   P-value (F-stat)                0.0000
Time:                        15:14:51   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm            -0.0202     0.0014    -13.987     0.0000     -0.0230     -0.0173
AbsoluteRm     0.0116     0.0035     3.3126     0.00

## Model 4

In [19]:
df['CSAD_nonFund'] = res_ols_v21.resids

res_ols_v4 = IV2SLS(
    df.CSAD_nonFund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v4)

                            OLS Estimation Summary                            
Dep. Variable:           CSAD_nonFund   R-squared:                      0.6847
Estimator:                        OLS   Adj. R-squared:                 0.6844
No. Observations:                5139   F-statistic:                 1.116e+04
Date:                Tue, Aug 29 2023   P-value (F-stat)                0.0000
Time:                        15:14:51   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0350     0.0062     5.6506     0.0000      0.0228      0.0471
AbsoluteRm     0.8219     0.0150     54.669     0.00

In [20]:
df['CSAD_nonFund'] = res_ols_v22.resids

res_ols_v4 = IV2SLS(
    df.CSAD_nonFund, df[
        ['Rm', 'AbsoluteRm', 'SquaredRm', 'Index', 'const']
    ], None, None
).fit(cov_type='unadjusted')

print(res_ols_v4)

                            OLS Estimation Summary                            
Dep. Variable:           CSAD_nonFund   R-squared:                      0.6835
Estimator:                        OLS   Adj. R-squared:                 0.6833
No. Observations:                5139   F-statistic:                  1.11e+04
Date:                Tue, Aug 29 2023   P-value (F-stat)                0.0000
Time:                        15:14:51   Distribution:                  chi2(4)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Rm             0.0347     0.0062     5.6362     0.0000      0.0226      0.0468
AbsoluteRm     0.8149     0.0150     54.489     0.00