# Fixed Effects OR Random Effects Estimation

### Intro and objectives


### In this lab you will learn:
1. examples of random effects estimation
2. how to fit fixed and random effects models in Python
3. how to determine which one to use



In [None]:
!pip install wooldridge
!pip install linearmodels
import wooldridge as woo
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import linearmodels as plm
import numpy as np
import scipy.stats as stats

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Example. Has the Return to Education Changed over Time?

#### The data in WAGEPAN are from Vella and Verbeek (1998). Each of the 545 men in the sample worked in every year from 1980 through 1987. Some variables in the data set change over time: experience, marital status, and union status are the three important ones. Other variables do not change: race and education are the key examples.

#### If we use fixed effects, we cannot include race, education, or experience in the equation as they would be eliminated in the transformed model. Random effects models do allow us to include them though



#### We assume a model of the following form:

$log(wage)=\beta_0+\beta_1*education+\beta_2*black+\beta_3*hisp+\beta_4*exper+\beta_5*exper^2+\beta_6*married+\beta_7*union+\delta_0*y_{1980}+…+\delta_8*y_{1987}$






In [None]:
wagepan = woo.dataWoo('wagepan')
wagepan = wagepan.set_index(['nr', 'year'], drop=False)

In [None]:
wagepan.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,nr,year,agric,black,bus,construc,ent,exper,fin,hisp,...,union,lwage,d81,d82,d83,d84,d85,d86,d87,expersq
nr,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
13,1980,13,1980,0,0,1,0,0,1,0,0,...,0,1.19754,0,0,0,0,0,0,0,1
13,1981,13,1981,0,0,0,0,0,2,0,0,...,1,1.85306,1,0,0,0,0,0,0,4
13,1982,13,1982,0,0,1,0,0,3,0,0,...,0,1.344462,0,1,0,0,0,0,0,9
13,1983,13,1983,0,0,1,0,0,4,0,0,...,0,1.433213,0,0,1,0,0,0,0,16
13,1984,13,1984,0,0,0,0,0,5,0,0,...,0,1.568125,0,0,0,1,0,0,0,25


In [None]:
wagepan.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4360 entries, (13, 1980) to (12548, 1987)
Data columns (total 44 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   nr        4360 non-null   int64  
 1   year      4360 non-null   int64  
 2   agric     4360 non-null   int64  
 3   black     4360 non-null   int64  
 4   bus       4360 non-null   int64  
 5   construc  4360 non-null   int64  
 6   ent       4360 non-null   int64  
 7   exper     4360 non-null   int64  
 8   fin       4360 non-null   int64  
 9   hisp      4360 non-null   int64  
 10  poorhlth  4360 non-null   int64  
 11  hours     4360 non-null   int64  
 12  manuf     4360 non-null   int64  
 13  married   4360 non-null   int64  
 14  min       4360 non-null   int64  
 15  nrthcen   4360 non-null   int64  
 16  nrtheast  4360 non-null   int64  
 17  occ1      4360 non-null   int64  
 18  occ2      4360 non-null   int64  
 19  occ3      4360 non-null   int64  
 20  occ4      43

In [None]:
wagepan[['nr','year','black','bus','construc','exper','hisp','union','lwage','d81','d82','d83','expersq','married']].head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,nr,year,black,bus,construc,exper,hisp,union,lwage,d81,d82,d83,expersq,married
nr,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
13,1980,13,1980,0,1,0,1,0,0,1.19754,0,0,0,1,0
13,1981,13,1981,0,0,0,2,0,1,1.85306,1,0,0,4,0
13,1982,13,1982,0,1,0,3,0,0,1.344462,0,1,0,9,0
13,1983,13,1983,0,1,0,4,0,0,1.433213,0,0,1,16,0
13,1984,13,1984,0,0,0,5,0,0,1.568125,0,0,0,25,0
13,1985,13,1985,0,1,0,6,0,0,1.699891,0,0,0,36,0
13,1986,13,1986,0,1,0,7,0,0,-0.720263,0,0,0,49,0
13,1987,13,1987,0,1,0,8,0,0,1.669188,0,0,0,64,0
17,1980,17,1980,0,0,0,4,0,0,1.675962,0,0,0,16,0
17,1981,17,1981,0,0,0,5,0,0,1.518398,1,0,0,25,0


In [None]:
# estimation of FE and RE:
reg_fe = plm.PanelOLS.from_formula(formula='lwage ~ I(exper**2) + married +'
                                           'union + C(year) + EntityEffects',
                                   data=wagepan)
results_fe = reg_fe.fit()



In [None]:
b_fe = results_fe.params
b_fe_cov = results_fe.cov

In [None]:
# print regression table:
table = pd.DataFrame({'b': round(results_fe.params, 4),
                      'se': round(results_fe.std_errors, 4),
                      't': round(results_fe.tstats, 4),
                      'pval': round(results_fe.pvalues, 4)})
print(f'table: \n{table}\n')

table: 
                      b      se        t    pval
C(year)[T.1980]  1.4260  0.0183  77.7484  0.0000
C(year)[T.1981]  1.5772  0.0216  72.9656  0.0000
C(year)[T.1982]  1.6790  0.0265  63.2583  0.0000
C(year)[T.1983]  1.7805  0.0333  53.4392  0.0000
C(year)[T.1984]  1.9161  0.0417  45.9816  0.0000
C(year)[T.1985]  2.0435  0.0515  39.6460  0.0000
C(year)[T.1986]  2.1915  0.0630  34.7714  0.0000
C(year)[T.1987]  2.3510  0.0762  30.8669  0.0000
I(exper**2)     -0.0052  0.0007  -7.3612  0.0000
married          0.0467  0.0183   2.5494  0.0108
union            0.0800  0.0193   4.1430  0.0000



In [None]:
reg_re = plm.RandomEffects.from_formula(
    formula='lwage ~ educ + black + hisp + exper + I(exper**2)'
            '+ married + union + C(year)', data=wagepan)
results_re = reg_re.fit()


In [None]:
b_re = results_re.params
b_re_cov = results_re.cov

In [None]:
# print regression table:
table = pd.DataFrame({'b': round(results_re.params, 4),
                      'se': round(results_re.std_errors, 4),
                      't': round(results_re.tstats, 4),
                      'pval': round(results_re.pvalues, 4)})
print(f'table: \n{table}\n')

table: 
                      b      se       t    pval
C(year)[T.1980]  0.0234  0.1514  0.1546  0.8771
C(year)[T.1981]  0.0638  0.1601  0.3988  0.6901
C(year)[T.1982]  0.0543  0.1690  0.3211  0.7481
C(year)[T.1983]  0.0436  0.1780  0.2450  0.8065
C(year)[T.1984]  0.0664  0.1871  0.3551  0.7225
C(year)[T.1985]  0.0811  0.1961  0.4136  0.6792
C(year)[T.1986]  0.1152  0.2052  0.5617  0.5744
C(year)[T.1987]  0.1583  0.2143  0.7386  0.4602
I(exper**2)     -0.0047  0.0007 -6.8623  0.0000
black           -0.1394  0.0480 -2.9054  0.0037
educ             0.0919  0.0107  8.5744  0.0000
exper            0.1058  0.0154  6.8706  0.0000
hisp             0.0217  0.0428  0.5078  0.6116
married          0.0638  0.0168  3.8035  0.0001
union            0.1059  0.0179  5.9289  0.0000



In [None]:
# Hausman test of FE vs. RE
# (I) find overlapping coefficients:
common_coef = set(results_fe.params.index).intersection(results_re.params.index)

In [None]:
# (II) calculate differences between FE and RE:
b_diff = np.array(results_fe.params[common_coef] - results_re.params[common_coef])
df = len(b_diff)
b_diff.reshape((df, 1))
b_cov_diff = np.array(b_fe_cov.loc[common_coef, common_coef] -
                      b_re_cov.loc[common_coef, common_coef])
#b_cov_diff.reshape((df, df))

### We perform the Hausman Test to formally test for difference in the models

In [None]:
# (III) calculate test statistic:
stat = abs(np.transpose(b_diff) @ np.linalg.inv(b_cov_diff) @ b_diff)
pval = 1 - stats.chi2.cdf(stat, df)


In [None]:
print(f'stat: {stat}\n')
print(f'pval: {pval}\n')

stat: 43.42707117638388

pval: 9.1506138486519e-06



## How do we interpret the results?

#### Based on the two  fitted models, we conclude:

####1. A visual inspeccion of the coefficients associated to the factors reveal they are similar
###2. The Hausman test indicates that both models Fixed Effects and Random Effects are similar. It really does no matter which one we use. In this case the random effects model provides more information as it includes time-constant factors  