In [5]:
# uncomment and execute the following if necessary

# !pip install linearmodels
# !pip install pystout

These examples are taken from Kevin Sheppard's user guide for the linearmodels package (https://bashtage.github.io/linearmodels/index.html).  

In [6]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import pystout as pystout
from linearmodels.panel import PooledOLS
from linearmodels.panel import PanelOLS
from linearmodels.datasets import wage_panel

data = wage_panel.load()
year = pd.Categorical(data.year)
data = data.set_index(["nr", "year"])
data["year"] = year

# print(wage_panel.DESCR)
# data.head()


F. Vella and M. Verbeek (1998), "Whose Wages Do Unions Raise? A Dynamic Model
of Unionism and Wage Rate Determination for Young Men," Journal of Applied
Econometrics 13, 163-183.

nr                       person identifier
year                     1980 to 1987
black                    =1 if black
exper                    labor market experience
hisp                     =1 if Hispanic
hours                    annual hours worked
married                  =1 if married
educ                     years of schooling
union                    =1 if in union
lwage                    log(wage)
expersq                  exper^2
occupation               Occupation code



Unnamed: 0_level_0,Unnamed: 1_level_0,black,exper,hisp,hours,married,educ,union,lwage,expersq,occupation,year
nr,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
13,1980,0,1,0,2672,0,14,0,1.19754,1,9,1980
13,1981,0,2,0,2320,0,14,1,1.85306,4,9,1981
13,1982,0,3,0,2940,0,14,0,1.344462,9,9,1982
13,1983,0,4,0,2960,0,14,0,1.433213,16,9,1983
13,1984,0,5,0,3071,0,14,0,1.568125,25,5,1984


### Pooled OLS with White standard errors

In [10]:
model1 = PooledOLS.from_formula(
    """
    lwage ~ black
    + hisp
    + exper
    + expersq
    + married
    + educ
    + union
    + year
    """,
    data=data
)
result1 = model1.fit(cov_type="robust")

# print(result1)

### Fixed effects and clustered standard errors

Here it is important that the data be in a multi-indexed data frame, with the entity (firm or person or ...) as the outside index and time as the inside index.

In [16]:
from linearmodels.panel import PanelOLS

# entity fixed effects
model2 = PanelOLS.from_formula(
    """
    lwage ~ expersq
    + married
    + union
    + EntityEffects
    """,
    data=data,
)
result2 = model2.fit(cov_type="clustered", cluster_entity=True)

# time fixed effects
model3 = PanelOLS.from_formula(
    """
    lwage ~ black
    + hisp
    + exper
    + expersq
    + married
    + educ
    + union
    + TimeEffects
    """,
    data=data,
)
result3 = model3.fit(cov_type="clustered", cluster_time=True)

# time and entity fixed effects
model4 = PanelOLS.from_formula(
    """
    lwage ~ expersq
    + married
    + union
    + EntityEffects
    + TimeEffects
    """,
    data=data,
)
result4 = model4.fit(cov_type="clustered", cluster_entity=True, cluster_time=True)

In [17]:
pystout(
    models=[result1, result2, result3, result4],
    file="table.tex",
       exogvars=[
        'union', 
        'married',
        'expersq',
        'exper', 
        'black',
        'hisp',
        'educ'
        ],
    stars={0.1: "*", 0.05: "**", 0.01: "***"},
    addnotes=[
        "(1): time dummy variables, White standard errors",
        "(2): entity fixed effects, standard errors clustered by entity",
        "(3): time fixed effects, standard errors clustered by year",
        "(4): entity and time fixed effects, two-way clustered standard errors",
        "$^*p<0.1$, $^{**}p<0.05$, $^{***}p<0.01$",
        ],
    modstat={"nobs": "Obs"},
    title="Log Wages",
    label="tab:wage"
    )

In [15]:
print(result1)

                          PooledOLS Estimation Summary                          
Dep. Variable:                  lwage   R-squared:                        0.9232
Estimator:                  PooledOLS   R-squared (Between):              0.9577
No. Observations:                4360   R-squared (Within):               0.1674
Date:                Fri, Aug 26 2022   R-squared (Overall):              0.9232
Time:                        08:46:54   Log-likelihood                   -2989.4
Cov. Estimator:                Robust                                           
                                        F-statistic:                      7473.4
Entities:                         545   P-value                           0.0000
Avg Obs:                       8.0000   Distribution:                  F(7,4353)
Min Obs:                       8.0000                                           
Max Obs:                       8.0000   F-statistic (robust):             8006.9
                            