## Please replicate Table 1 of Petersen (2009) RFS paper. This paper is here. Please focus on 4 cases:
(0%, 0%) (0%, 50%) (50%, 0%) (50%, 50%).

In [228]:
import numpy as np
import pandas as pd
import statsmodels as statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as sm1
import warnings
warnings.filterwarnings("ignore")
from linearmodels import PanelOLS, FamaMacBeth

In [2]:
df=pd.read_table(
    "http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt",
    names=["firmid", "year", "x", "y"],
    delim_whitespace=True,
)
df=df[['firmid', 'year']]

In [178]:
def EstimatingStandardErrors(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r):

    estimated_ols_beta_list=[]
    estimated_ols_se_list=[]
    estimated_ols_tvalue_list=[]

    estimated_cluster_ols_beta_list=[]
    estimated_cluster_ols_se_list=[]
    estimated_cluster_ols_tvalue_list=[]

    for i in range(0, numiter , 1):

        list_x=[]
        list_r=[]

        for firm in range(0, numfirm, 1):

            error_x=np.sqrt(1-firm_x)*np.random.normal(0, std_x, numyyear)
            firm_temp=np.sqrt(firm_x)*np.random.normal(0, std_x, 1)
            firm_temp=[firm_temp[0] for i in range(0,numyyear,1)]
            x=error_x+firm_temp
            list_x.append(x)

            error_r=np.sqrt(1-firm_r)*np.random.normal(0, std_r, numyyear)
            firm_temp=np.sqrt(firm_r)*np.random.normal(0, std_r, 1)
            firm_temp=[firm_temp[0] for i in range(0,numyyear,1)]
            r=error_r+firm_temp
            list_r.append(r)

        df['x']=np.array(list_x).flatten()
        df['r']=np.array(list_r).flatten()
        df['y']=df['x']*beta+df['r']

        # running OLS regression
        Y = df['y']
        X = df['x']

        model = sm.OLS(endog=Y, exog=X)

        results = model.fit()
        estimated_ols_beta_list.append(results.params['x'])
        estimated_ols_se_list.append(results.bse['x'])
        estimated_ols_tvalue_list.append(results.tvalues['x']) # t>2.58

        results=\
        model.fit(
        cov_type="cluster",
        cov_kwds={"groups": df["firmid"]},
        use_t=True)
        estimated_cluster_ols_beta_list.append(results.params['x'])
        estimated_cluster_ols_se_list.append(results.bse['x'])
        estimated_cluster_ols_tvalue_list.append(results.tvalues['x'])
    
    print("Average beta_OLS: {}".format(np.mean(estimated_ols_beta_list)))
    print("Std of beta_OLS: {}".format(np.std(estimated_ols_beta_list)))
    print("Average SE_OLS: {}".format(np.mean(estimated_ols_se_list)))
    print("Average SE_Cluster: {}".format(np.mean(estimated_cluster_ols_se_list)))

## Q1

In [176]:
numiter=100 # 100 simulation of a panel data set
numfirm=500 # 500 firms
numyyear=10 # 10 years per firm
beta=1 # The true slope coefficient is 1, 
std_x=1 # the standard deviation of the independent variable is 1 
std_r=2 # the standard deviation of the residual term is 2.
# firm_x=0.5 # The fraction of the independent variable’s variance which is due to a firm specific component
# firm_r=0.5 # The fraction of the residual variance which is due to a firm specific component

In [180]:
# Case 1. (0%, 0%)
firm_x=0.0
firm_r=0.0

EstimatingStandardErrors(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

# When there is no firm effect in the independent variable
# (i.e. the independent variable is independent across observations), the standard errors
# estimated by OLS are also unbiased

Average beta_OLS: 1.00735650233282
Std of beta_OLS: 0.025818918705525017
Average SE_OLS: 0.028262151528697547
Average SE_Cluster: 0.02821080297441014


In [181]:
# Case 2. (0%, 50%)
firm_x=0.0
firm_r=0.5

EstimatingStandardErrors(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 0.995925117245851
Std of beta_OLS: 0.029422593110733208
Average SE_OLS: 0.028441814125799783
Average SE_Cluster: 0.028532941936971975


In [182]:
# Case 3. (50%, 0%)
firm_x=0.5
firm_r=0.0

EstimatingStandardErrors(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 0.9994778823285421
Std of beta_OLS: 0.02962801953227441
Average SE_OLS: 0.02838821796820115
Average SE_Cluster: 0.028322929028867606


In [183]:
# Case 4. (50%, 50%)
firm_x=0.5
firm_r=0.5

EstimatingStandardErrors(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

# when fifty percent of the variability in both the residual 
# and the independent variable is due to the firm effect (ρX = ρε = 0.50), 
# the OLS estimated standard error is one half of the true standard error
# The standard errors estimated by OLS do not rise 
# as the firm effect increases across either the columns (i.e. in the independent
# variable) or across the rows (i.e. in the residual). The true standard error does rise.

Average beta_OLS: 1.005437105277036
Std of beta_OLS: 0.054873392127638705
Average SE_OLS: 0.028177109532289347
Average SE_Cluster: 0.050612523455717054


## Q2: report for Fama-MacBeth regression

In [214]:
def EstimatingStandardErrorsFamaMacBeth(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r):

    estimated_ols_beta_list=[]
    estimated_ols_se_list=[]
    estimated_ols_tvalue_list=[]

    estimated_cluster_ols_beta_list=[]
    estimated_cluster_ols_se_list=[]
    estimated_cluster_ols_tvalue_list=[]
    
    estimated_FamaMacBeth_beta_list=[]
    estimated_FamaMacBeth_se_list=[]
    estimated_FamaMacBeth_tvalue_list=[]

    for i in range(0, numiter , 1):

        list_x=[]
        list_r=[]

        for firm in range(0, numfirm, 1):

            error_x=np.sqrt(1-firm_x)*np.random.normal(0, std_x, numyyear)
            firm_temp=np.sqrt(firm_x)*np.random.normal(0, std_x, 1)
            firm_temp=[firm_temp[0] for i in range(0,numyyear,1)]
            x=error_x+firm_temp
            list_x.append(x)

            error_r=np.sqrt(1-firm_r)*np.random.normal(0, std_r, numyyear)
            firm_temp=np.sqrt(firm_r)*np.random.normal(0, std_r, 1)
            firm_temp=[firm_temp[0] for i in range(0,numyyear,1)]
            r=error_r+firm_temp
            list_r.append(r)

        df['x']=np.array(list_x).flatten()
        df['r']=np.array(list_r).flatten()
        df['y']=df['x']*beta+df['r']
            
        # running OLS regression
        Y = df['y']
        X = df['x']
        X = sm.add_constant(X)

        model = sm.OLS(endog=Y, exog=X)

        results = model.fit()
        estimated_ols_beta_list.append(results.params['x'])
        estimated_ols_se_list.append(results.bse['x'])
        estimated_ols_tvalue_list.append(results.tvalues['x']) # t>2.58

        results=\
        model.fit(
        cov_type="cluster",
        cov_kwds={"groups": df["firmid"]},
        use_t=True)
        estimated_cluster_ols_beta_list.append(results.params['x'])
        estimated_cluster_ols_se_list.append(results.bse['x'])
        estimated_cluster_ols_tvalue_list.append(results.tvalues['x'])
        
        df2 = df.set_index(["firmid", "year"])
        results=\
        FamaMacBeth.from_formula("y ~ x", data=df2).fit()
        estimated_FamaMacBeth_beta_list.append(results.params['x'])
        estimated_FamaMacBeth_se_list.append(results.std_errors['x'])
        estimated_FamaMacBeth_tvalue_list.append(results.tstats['x'])
    
    print("Average beta_OLS: {}".format(np.mean(estimated_ols_beta_list)))
    print("Std of beta_OLS: {}".format(np.std(estimated_ols_beta_list)))
    print("Average SE_OLS: {}".format(np.mean(estimated_ols_se_list)))
    print("Average SE_Cluster: {}".format(np.mean(estimated_cluster_ols_se_list)))
    print("Average SE_FamaMacBeth: {}".format(np.mean(estimated_FamaMacBeth_se_list)))

In [255]:
numiter=100 # 100 simulation of a panel data set
numfirm=500 # 500 firms
numyyear=10 # 10 years per firm
beta=1 # The true slope coefficient is 1, 
std_x=1 # the standard deviation of the independent variable is 1 
std_r=2 # the standard deviation of the residual term is 2.

In [216]:
# Case 1. (0%, 0%)
firm_x=0.0
firm_r=0.0

EstimatingStandardErrorsFamaMacBeth(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 1.0014262186641516
Std of beta_OLS: 0.02894015391451759
Average SE_OLS: 0.02820254506797718
Average SE_Cluster: 0.02811831873269093
Average SE_FamaMacBeth: 0.02671389546439515


In [217]:
# Case 2. (0%, 50%)
firm_x=0.0
firm_r=0.5

EstimatingStandardErrorsFamaMacBeth(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 0.9916169756418125
Std of beta_OLS: 0.027089074792616435
Average SE_OLS: 0.02832865351804748
Average SE_Cluster: 0.02811585435062963
Average SE_FamaMacBeth: 0.027123222952507565


In [218]:
# Case 3. (50%, 0%)
firm_x=0.5
firm_r=0.0

EstimatingStandardErrorsFamaMacBeth(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 1.00272299114204
Std of beta_OLS: 0.025779356393309843
Average SE_OLS: 0.02831276026929911
Average SE_Cluster: 0.028490926553193532
Average SE_FamaMacBeth: 0.027345922174558543


In [219]:
# Case 4. (50%, 50%)
firm_x=0.5
firm_r=0.5

EstimatingStandardErrorsFamaMacBeth(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 1.0077070148014455
Std of beta_OLS: 0.04505934090687732
Average SE_OLS: 0.028300816444651777
Average SE_Cluster: 0.05054520093751738
Average SE_FamaMacBeth: 0.02424507803748266


## Q3: estimate the model after including firm fixed effects.

In [245]:
def EstimatingStandardErrorsFirmFixedEffect(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r):

    estimated_ols_beta_list=[]
    estimated_ols_se_list=[]
    estimated_ols_tvalue_list=[]

    estimated_cluster_ols_beta_list=[]
    estimated_cluster_ols_se_list=[]
    estimated_cluster_ols_tvalue_list=[]

    for i in range(0, numiter , 1):

        list_x=[]
        list_r=[]

        for firm in range(0, numfirm, 1):

            error_x=np.sqrt(1-firm_x)*np.random.normal(0, std_x, numyyear)
            firm_temp=np.sqrt(firm_x)*np.random.normal(0, std_x, 1)
            firm_temp=[firm_temp[0] for i in range(0,numyyear,1)]
            x=error_x+firm_temp
            list_x.append(x)

            error_r=np.sqrt(1-firm_r)*np.random.normal(0, std_r, numyyear)
            firm_temp=np.sqrt(firm_r)*np.random.normal(0, std_r, 1)
            firm_temp=[firm_temp[0] for i in range(0,numyyear,1)]
            r=error_r+firm_temp
            list_r.append(r)

        df['x']=np.array(list_x).flatten()
        df['r']=np.array(list_r).flatten()
        df['y']=df['x']*beta+df['r']
            
        # running OLS regression with firm fixed effect
        
        model=sm1.ols(formula="y ~ x + C(firmid)", data=df)    
        results = model.fit(use_t=True)
            
        estimated_ols_beta_list.append(results.params['x'])
        estimated_ols_se_list.append(results.bse['x'])
        estimated_ols_tvalue_list.append(results.tvalues['x'])
        
        # running OLS regression
        Y = df['y']
        X = df['x']
        X = sm.add_constant(X)

        model = sm.OLS(endog=Y, exog=X)
        
        results=\
        model.fit(
            cov_type="cluster",
            cov_kwds={"groups": df["firmid"]},
        use_t=True)
        estimated_cluster_ols_beta_list.append(results.params['x'])
        estimated_cluster_ols_se_list.append(results.bse['x'])
        estimated_cluster_ols_tvalue_list.append(results.tvalues['x'])

    print("Average beta_OLS: {}".format(np.mean(estimated_ols_beta_list)))
    print("Std of beta_OLS: {}".format(np.std(estimated_ols_beta_list)))
    print("Average SE_OLS: {}".format(np.mean(estimated_ols_se_list)))
    print("Average SE_Cluster: {}".format(np.mean(estimated_cluster_ols_se_list)))

In [249]:
numiter=100 # 100 simulation of a panel data set
numfirm=500 # 500 firms
numyyear=10 # 10 years per firm
beta=1 # The true slope coefficient is 1, 
std_x=1 # the standard deviation of the independent variable is 1 
std_r=2 # the standard deviation of the residual term is 2.

In [252]:
# Case 1. (0%, 0%)
firm_x=0.0
firm_r=0.0

EstimatingStandardErrorsFirmFixedEffect(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 0.997177458842596
Std of beta_OLS: 0.029482593250637314
Average SE_OLS: 0.02985686885397376
Average SE_Cluster: 0.028210990009711934


In [253]:
# Case 2. (0%, 50%)
firm_x=0.0
firm_r=0.5

EstimatingStandardErrorsFirmFixedEffect(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 0.9988872480641366
Std of beta_OLS: 0.018793846759020627
Average SE_OLS: 0.021107666376306025
Average SE_Cluster: 0.028262414012445652


In [254]:
# Case 3. (50%, 0%)
firm_x=0.5
firm_r=0.0

EstimatingStandardErrorsFirmFixedEffect(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 1.0083655503106115
Std of beta_OLS: 0.04087383111573506
Average SE_OLS: 0.04215152789681704
Average SE_Cluster: 0.028234030109810338


In [256]:
# Case 4. (50%, 50%)
firm_x=0.5
firm_r=0.5

EstimatingStandardErrorsFirmFixedEffect(df, numiter, numfirm, numyyear, beta, std_x, std_r, firm_x, firm_r)

Average beta_OLS: 1.0040148225440795
Std of beta_OLS: 0.030159813628826415
Average SE_OLS: 0.029804085370860454
Average SE_Cluster: 0.050588812435432985
