# OLS and Fixed Effect

In [18]:
from linearmodels.datasets import wage_panel

data = wage_panel.load()
data.head(10)

# nr: individual id
# lwage: log of wage -> y
# married -> T

Unnamed: 0,nr,year,black,exper,hisp,hours,married,educ,union,lwage,expersq,occupation
0,13,1980,0,1,0,2672,0,14,0,1.19754,1,9
1,13,1981,0,2,0,2320,0,14,1,1.85306,4,9
2,13,1982,0,3,0,2940,0,14,0,1.344462,9,9
3,13,1983,0,4,0,2960,0,14,0,1.433213,16,9
4,13,1984,0,5,0,3071,0,14,0,1.568125,25,5
5,13,1985,0,6,0,2864,0,14,0,1.699891,36,2
6,13,1986,0,7,0,2994,0,14,0,-0.720263,49,2
7,13,1987,0,8,0,2640,0,14,0,1.669188,64,2
8,17,1980,0,4,0,2484,0,13,0,1.675962,16,2
9,17,1981,0,5,0,2804,0,13,0,1.518398,25,2


## 1. OLS with Control: Ceteris Paribus -- to control what we can see

In [13]:
import statsmodels.formula.api as smf

ols_model = smf.ols('lwage ~ expersq + union + married + hours', data=data).fit()
ols_model.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.5327,0.032,47.842,0.000,1.470,1.595
expersq,0.0012,0.000,6.208,0.000,0.001,0.002
union,0.1679,0.018,9.239,0.000,0.132,0.204
married,0.1966,0.016,11.948,0.000,0.164,0.229
hours,-3.33e-05,1.42e-05,-2.352,0.019,-6.11e-05,-5.54e-06


## 2. Using Fixed Effect -- to control what we cannot see

### 2-1. Entity Effects (time-fixed confounders)

In [14]:
from linearmodels.panel import PanelOLS
mod = PanelOLS.from_formula("lwage ~ expersq + union + married + hours + EntityEffects",
                            data=data.set_index(["nr", "year"]))

result = mod.fit(cov_type='clustered', cluster_entity=True)
result.summary.tables[1]

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
expersq,0.0040,0.0002,16.552,0.0000,0.0035,0.0044
union,0.0784,0.0236,3.3225,0.0009,0.0322,0.1247
married,0.1147,0.0220,5.2213,0.0000,0.0716,0.1577
hours,-8.46e-05,2.22e-05,-3.8105,0.0001,-0.0001,-4.107e-05


### 2-2. Time Effects (entity-fixed confounders)

In [15]:
from linearmodels.panel import PanelOLS

mod = PanelOLS.from_formula("lwage ~ expersq + union + married + hours + TimeEffects",
                            data=data.set_index(["nr", "year"]))

result = mod.fit(cov_type='clustered', cluster_time=True)
result.summary.tables[1]

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
expersq,-0.0021,0.0002,-10.845,0.0000,-0.0025,-0.0017
union,0.1721,0.0210,8.2153,0.0000,0.1311,0.2132
married,0.1634,0.0070,23.221,0.0000,0.1496,0.1772
hours,-6.535e-05,3.629e-05,-1.8010,0.0718,-0.0001,5.789e-06


### 2-3. Use Both Entity and Time Effects

In [16]:
from linearmodels.panel import PanelOLS

mod = PanelOLS.from_formula("lwage ~ expersq + union + married + hours + EntityEffects + TimeEffects",
                            data=data.set_index(["nr", "year"]))

result = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)
result.summary.tables[1]

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
expersq,-0.0062,0.0008,-8.1479,0.0000,-0.0077,-0.0047
union,0.0727,0.0228,3.1858,0.0015,0.0279,0.1174
married,0.0476,0.0177,2.6906,0.0072,0.0129,0.0823
hours,-0.0001,3.546e-05,-3.8258,0.0001,-0.0002,-6.614e-05


## Example discussion
<script src="https://utteranc.es/client.js"
        repo="myeongseok-gwon/teachbook-test"
        issue-term="pathname"
        theme="github-light"
        crossorigin="anonymous"
        async>
</script>