In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [2]:
np.seterr(divide = 'ignore')

df = pd.read_csv('data/output/stocks/herding_stockMarket_2002-2022.csv')
df = df.dropna()
df.head()

Unnamed: 0,Datadate,MktRf,Smb,Hml,Rmw,Cma,Rf,Mom,Rm,AbsoluteRm,SquaredRm,Csad,LeftTail,RightTail
1,2002-01-03,0.0099,0.0077,-0.004,-0.003,-0.008,7e-05,-0.0043,0.00997,0.00997,9.9e-05,0.012651,0,0
2,2002-01-04,0.007,0.002,0.0039,-0.0008,-0.0023,7e-05,-0.0029,0.00707,0.00707,5e-05,0.009823,0,0
3,2002-01-07,-0.007,-0.0024,0.0085,-0.0007,0.0016,7e-05,0.0036,-0.00693,0.00693,4.8e-05,0.001171,0,0
4,2002-01-08,-0.0023,0.0121,0.0029,-0.0021,-0.0001,7e-05,0.0035,-0.00223,0.00223,5e-06,0.005999,0,0
5,2002-01-09,-0.0045,0.0008,-0.0011,0.0022,0.0007,7e-05,0.0005,-0.00443,0.00443,2e-05,0.000605,0,0


In [3]:
# period splitting
df = df[(df['Datadate'] >= '2002-01-01') & (df['Datadate'] < '2023-01-01')]

## Model 1

In [4]:
x1 = df[
    ['Rm', 'AbsoluteRm', 'SquaredRm']
]
y1 = df['Csad']

x1 = sm.add_constant(x1)

model1 = sm.OLS(y1, x1).fit()

print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:                   Csad   R-squared:                       0.685
Model:                            OLS   Adj. R-squared:                  0.685
Method:                 Least Squares   F-statistic:                     3727.
Date:                Fri, 18 Aug 2023   Prob (F-statistic):               0.00
Time:                        07:05:50   Log-Likelihood:                 19464.
No. Observations:                5139   AIC:                        -3.892e+04
Df Residuals:                    5135   BIC:                        -3.889e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0021      0.000     18.006      0.0

## Model 2

In [5]:
x2 = df[
    ['MktRf', 'Smb', 'Hml', 'Rmw', 'Cma', 'Rf', 'Mom']
]
y2 = df['Csad']

x2 = sm.add_constant(x2)

model2 = sm.OLS(y2, x2).fit()

print(model2.summary())

                            OLS Regression Results                            
Dep. Variable:                   Csad   R-squared:                       0.033
Model:                            OLS   Adj. R-squared:                  0.032
Method:                 Least Squares   F-statistic:                     25.09
Date:                Fri, 18 Aug 2023   Prob (F-statistic):           6.57e-34
Time:                        07:05:50   Log-Likelihood:                 16580.
No. Observations:                5139   AIC:                        -3.314e+04
Df Residuals:                    5131   BIC:                        -3.309e+04
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0098      0.000     57.254      0.0

## Model 3

In [6]:
df['Csad_Fund'] = df['Csad'] - np.sqrt(model2.scale)

In [7]:
x3 = df[
    ['Rm', 'AbsoluteRm', 'SquaredRm']
]
y3 = df['Csad_Fund']

x3 = sm.add_constant(x3)

model3 = sm.OLS(y3, x3).fit()

print(model3.summary())

                            OLS Regression Results                            
Dep. Variable:              Csad_Fund   R-squared:                       0.685
Model:                            OLS   Adj. R-squared:                  0.685
Method:                 Least Squares   F-statistic:                     3727.
Date:                Fri, 18 Aug 2023   Prob (F-statistic):               0.00
Time:                        07:05:50   Log-Likelihood:                 19464.
No. Observations:                5139   AIC:                        -3.892e+04
Df Residuals:                    5135   BIC:                        -3.889e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0075      0.000    -62.533      0.0

## Model 4

In [8]:
df['CSAD_nonFund'] = np.sqrt(model2.scale)

In [9]:
x4 = df[
    ['Rm', 'AbsoluteRm', 'SquaredRm']
]
y4 = df['CSAD_nonFund']

x4 = sm.add_constant(x3)

model4 = sm.OLS(y4, x4).fit()

print(model4.summary())

                            OLS Regression Results                            
Dep. Variable:           CSAD_nonFund   R-squared:                        -inf
Model:                            OLS   Adj. R-squared:                   -inf
Method:                 Least Squares   F-statistic:                    -1712.
Date:                Fri, 18 Aug 2023   Prob (F-statistic):               1.00
Time:                        07:05:50   Log-Likelihood:             1.9718e+05
No. Observations:                5139   AIC:                        -3.943e+05
Df Residuals:                    5135   BIC:                        -3.943e+05
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0096   1.14e-19   8.41e+16      0.0