# Using maketables with linearmodels

This notebook demonstrates how to create publication-ready regression tables from linearmodels panel data models using maketables.

## Setup

First, install the required packages if you haven't already:

```bash
pip install maketables linearmodels
```

In [1]:
import numpy as np
import pandas as pd
from linearmodels import PanelOLS, RandomEffects, PooledOLS
import maketables as mt


## Load Panel Data

We'll use wage panel data from linearmodels examples.

In [2]:
# Load wage panel data
from linearmodels.datasets import wage_panel

data = wage_panel.load()

# Set panel structure
data = data.set_index(['nr', 'year'])


## Example 1: Basic Panel Regression Comparison

Compare Pooled OLS, Fixed Effects, and Random Effects models.

In [3]:
# Pooled OLS (no fixed effects)
pooled = PooledOLS.from_formula("lwage ~ expersq + union + married", data)
pooled_res = pooled.fit(cov_type='clustered', cluster_entity=True)

# Fixed Effects (Entity)
fe = PanelOLS.from_formula("lwage ~ expersq + union + married  + EntityEffects", data)
fe_res = fe.fit(cov_type='clustered', cluster_entity=True)

# Random Effects
re = RandomEffects.from_formula("lwage ~ expersq + union + married  + EntityEffects", data)
re_res = re.fit(cov_type='clustered', cluster_entity=True)


In [4]:
# Create comparison table
table1 = mt.ETable(
    [pooled_res, fe_res, re_res],
    model_heads=['Pooled OLS', 'Fixed Effects', 'Random Effects'],
    caption='Wage Regression: Comparison of Panel Estimators',
    model_stats=['N', 'r2', 'r2_within'],
    notes='Clustered standard errors by entity in parentheses.'
)

table1

Wage Regression: Comparison of Panel Estimators,Wage Regression: Comparison of Panel Estimators,Wage Regression: Comparison of Panel Estimators,Wage Regression: Comparison of Panel Estimators
Unnamed: 0_level_1,lwage,lwage,lwage
Unnamed: 0_level_2,Pooled OLS,Fixed Effects,Random Effects
Unnamed: 0_level_3,(1),(2),(3)
coef,coef,coef,coef
expersq,0.013*** (0.001),0.004*** (0.000),0.005*** (0.000)
union,0.757*** (0.045),0.083*** (0.024),0.200*** (0.024)
married,0.737*** (0.047),0.107*** (0.022),0.204*** (0.022)
fe,fe,fe,fe
nr,-,x,-
stats,stats,stats,stats
Observations,4360,4360,4360
R2,0.687,0.137,0.212
Within R2,-1.498,0.137,0.1




## Example 2: Time and Entity Fixed Effects

Estimate models with different fixed effects specifications.

In [5]:
# Model 1: No fixed effects
mod1 = PanelOLS.from_formula('lwage ~ expersq + union + married', data)
res1 = mod1.fit(cov_type='clustered', cluster_entity=True)

# Model 2: Entity fixed effects
mod2 = PanelOLS.from_formula('lwage ~ expersq + union + married + EntityEffects', data)
res2 = mod2.fit(cov_type='clustered', cluster_entity=True)

# Model 3: Time fixed effects
mod3 = PanelOLS.from_formula('lwage ~ expersq + union + married + TimeEffects', data)
res3 = mod3.fit(cov_type='clustered', cluster_entity=True)

# Model 4: Both entity and time fixed effects
mod4 = PanelOLS.from_formula('lwage ~ expersq + union + married + EntityEffects + TimeEffects', data)
res4 = mod4.fit(cov_type='clustered', cluster_entity=True)

# Create table with variable labels
labels = {
    'expersq': 'Experience²',
    'union': 'Union Member',
    'married': 'Married',
    'lwage': 'Log(Wage)',
    'nr': 'Individual ID',
    'year': 'Year',
}

mt.ETable(
    [res1, res2, res3, res4],
    model_heads=['Pooled', 'Entity FE', 'Time FE', 'Two-way FE'],
    caption='Fixed Effects Specifications',
    labels=labels,
    model_stats=['N', 'r2', 'r2_within', 'r2_between'],
    show_fe=True,
    notes='Clustered standard errors by entity in parentheses.'
)

Fixed Effects Specifications,Fixed Effects Specifications,Fixed Effects Specifications,Fixed Effects Specifications,Fixed Effects Specifications
Unnamed: 0_level_1,Log(Wage),Log(Wage),Log(Wage),Log(Wage)
Unnamed: 0_level_2,Pooled,Entity FE,Time FE,Two-way FE
Unnamed: 0_level_3,(1),(2),(3),(4)
coef,coef,coef,coef,coef
Experience²,0.013*** (0.001),0.004*** (0.000),-0.002*** (0.000),-0.005*** (0.001)
Union Member,0.757*** (0.045),0.083*** (0.024),0.177*** (0.029),0.080*** (0.024)
Married,0.737*** (0.047),0.107*** (0.022),0.152*** (0.027),0.047** (0.022)
fe,fe,fe,fe,fe
Individual ID,-,x,-,x
Year,-,-,x,x
stats,stats,stats,stats,stats
Observations,4360,4360,4360,4360
R2,0.687,0.137,0.053,0.022




## Example 3: Different Standard Error Specifications

In [6]:
# Same model, different standard errors
formula = 'lwage ~ expersq + union + married + EntityEffects'
mod = PanelOLS.from_formula(formula, data)

# Homoskedastic
res_hom = mod.fit(cov_type='unadjusted')

# Heteroskedastic-robust
res_robust = mod.fit(cov_type='robust')

# Clustered by entity
res_cluster = mod.fit(cov_type='clustered', cluster_entity=True)

print("Models with different SEs estimated!")

Models with different SEs estimated!


In [7]:
table3 = mt.ETable(
    [res_hom, res_robust, res_cluster],
    model_heads=['Homoskedastic', 'Robust', 'Clustered'],
    caption='Comparison of Standard Error Specifications',
    labels=labels,
    model_stats=['N', 'r2_within', 'se_type'],
    notes='Entity fixed effects included in all specifications.'
)

table3

Comparison of Standard Error Specifications,Comparison of Standard Error Specifications,Comparison of Standard Error Specifications,Comparison of Standard Error Specifications
Unnamed: 0_level_1,Log(Wage),Log(Wage),Log(Wage)
Unnamed: 0_level_2,Homoskedastic,Robust,Clustered
Unnamed: 0_level_3,(1),(2),(3)
coef,coef,coef,coef
Experience²,0.004*** (0.000),0.004*** (0.000),0.004*** (0.000)
Union Member,0.083*** (0.020),0.083*** (0.020),0.083*** (0.024)
Married,0.107*** (0.018),0.107*** (0.018),0.107*** (0.022)
fe,fe,fe,fe
Individual ID,x,x,x
stats,stats,stats,stats
Observations,4360,4360,4360
Within R2,0.137,0.137,0.137
S.E. type,-,-,-




In [8]:
## Example 4: High-Dimensional Fixed Effects with AbsorbingLS

from linearmodels import AbsorbingLS

# Create sample data with high-dimensional fixed effects
np.random.seed(42)

# Simulate firm-worker matched data
n_firms = 500
n_workers = 1000
n_periods = 8

# Generate matched panel
obs_per_match = 3
n_obs = n_firms * obs_per_match

firm_ids = np.repeat(np.arange(n_firms), obs_per_match)
worker_ids = np.random.choice(n_workers, size=n_obs)
year_ids = np.random.choice(n_periods, size=n_obs) + 2010

# Create data with firm and worker effects
firm_effects = np.random.randn(n_firms)
worker_effects = np.random.randn(n_workers)

df_hdfe = pd.DataFrame({
    'firm_id': firm_ids,
    'worker_id': worker_ids,
    'year': year_ids,
    'lwage': (firm_effects[firm_ids] + 
              worker_effects[worker_ids] + 
              np.random.randn(n_obs) * 0.5),
    'experience': np.random.randint(0, 30, n_obs),
    'training': np.random.binomial(1, 0.3, n_obs),
    'female': np.random.binomial(1, 0.45, n_obs)
})

print(f"Dataset: {len(df_hdfe)} observations")
print(f"Firms: {df_hdfe['firm_id'].nunique()}")
print(f"Workers: {df_hdfe['worker_id'].nunique()}")
print(f"Years: {df_hdfe['year'].nunique()}")

# Prepare data for AbsorbingLS
dependent = df_hdfe[['lwage']]
exog = df_hdfe[['experience', 'training', 'female']]

# Model 1: No fixed effects (baseline)
mod_ols = AbsorbingLS(dependent=dependent, exog=exog, absorb=None)
res_ols = mod_ols.fit(cov_type='robust')

# Model 2: Absorb firm fixed effects
mod_firm = AbsorbingLS(dependent=dependent, exog=exog, absorb=df_hdfe[['firm_id']])
res_firm = mod_firm.fit(cov_type='clustered', clusters=df_hdfe[['firm_id']])

# Model 3: Absorb both firm and year fixed effects
mod_firm_year = AbsorbingLS(dependent=dependent, exog=exog, absorb=df_hdfe[['firm_id', 'year']])
res_firm_year = mod_firm_year.fit(cov_type='clustered', clusters=df_hdfe[['firm_id']])

# Model 4: Absorb firm, year, and firm×year interaction
df_hdfe['firm_year'] = df_hdfe['firm_id'].astype(str) + '_' + df_hdfe['year'].astype(str)
mod_interaction = AbsorbingLS(dependent=dependent, exog=exog, absorb=df_hdfe[['firm_id', 'year', 'firm_year']])
res_interaction = mod_interaction.fit(cov_type='clustered', clusters=df_hdfe[['firm_id']])

print(f"\nOLS: {len(res_ols.params)} coefficients")
print(f"Firm FE absorbed: {len(res_firm.params)} coefficients shown ({df_hdfe['firm_id'].nunique()} absorbed)")
print(f"Firm+Year FE absorbed: {len(res_firm_year.params)} coefficients shown")
print(f"Firm×Year FE absorbed: {len(res_interaction.params)} coefficients shown")

# Create comparison table
hdfe_labels = {
    'experience': 'Experience (years)',
    'training': 'Training Program',
    'female': 'Female',
    'lwage': 'Log(Wage)'
}

mt.ETable(
    [res_ols, res_firm, res_firm_year, res_interaction],
    model_heads=['OLS', 'Firm FE', 'Firm+Year FE', 'Firm×Year FE'],
    caption='High-Dimensional Fixed Effects with AbsorbingLS',
    labels=hdfe_labels,
    model_stats=['N', 'r2', 'adj_r2'],
    notes='All models include experience, training, and gender. Models 2-4 progressively absorb high-dimensional fixed effects. Clustered standard errors by firm in models 2-4.'
)

Dataset: 1500 observations
Firms: 500
Workers: 755
Years: 8

OLS: 3 coefficients
Firm FE absorbed: 3 coefficients shown (500 absorbed)
Firm+Year FE absorbed: 3 coefficients shown
Firm×Year FE absorbed: 3 coefficients shown


High-Dimensional Fixed Effects with AbsorbingLS,High-Dimensional Fixed Effects with AbsorbingLS,High-Dimensional Fixed Effects with AbsorbingLS,High-Dimensional Fixed Effects with AbsorbingLS,High-Dimensional Fixed Effects with AbsorbingLS
Unnamed: 0_level_1,Log(Wage),Log(Wage),Log(Wage),Log(Wage)
Unnamed: 0_level_2,OLS,Firm FE,Firm+Year FE,Firm×Year FE
Unnamed: 0_level_3,(1),(2),(3),(4)
coef,coef,coef,coef,coef
Experience (years),-0.003 (0.003),-0.002 (0.004),0.001 (0.004),0.001 (0.004)
Training Program,-0.010 (0.078),-0.007 (0.078),0.017 (0.081),0.017 (0.081)
Female,0.002 (0.073),0.006 (0.076),0.030 (0.078),0.030 (0.078)
fe,fe,fe,fe,fe
firm_id,-,x,x,x
firm_year,-,-,-,x
year,-,-,x,x
stats,stats,stats,stats,stats
Observations,1500,1500,1500,1500




In [9]:
## Example 5: Instrumental Variables Regression

from linearmodels.iv import IV2SLS
from linearmodels.datasets import meps

# Load MEPS data (cross-sectional, not panel)
meps_data = meps.load()
print(f"MEPS dataset: {len(meps_data)} observations")

# For IV2SLS, we can use formulas directly without PooledOLS
# The data is cross-sectional, not panel data

# 2SLS with single instrument (SSI ratio)
iv1 = IV2SLS.from_formula('ldrugexp ~ 1 + totchr + age + female + blhisp + linc + [hi_empunion ~ ssiratio]', data=meps_data)
iv1_res = iv1.fit(cov_type='robust')

# 2SLS with multiple instruments (SSI ratio, multlc, firmsz)
iv2 = IV2SLS.from_formula('ldrugexp ~ 1 + totchr + age + female + blhisp + linc + [hi_empunion ~ ssiratio + multlc + firmsz]', data=meps_data)
iv2_res = iv2.fit(cov_type='robust')

# For comparison, estimate "OLS" using IV2SLS without instruments (same as OLS)
ols_iv = IV2SLS.from_formula('ldrugexp ~ 1 + hi_empunion + totchr + age + female + blhisp + linc', data=meps_data)
ols_res = ols_iv.fit(cov_type='robust')


# Create comparison table
iv_labels = {
    'hi_empunion': 'Health Insurance (Employer/Union)',
    'linc': 'Log(Income)',
    'age': 'Age',
    'female': 'Female',
    'blhisp': 'Black/Hispanic',
    'totchr': 'Chronic Conditions',
    'ldrugexp': 'Log(Drug Expenditure)'
}

mt.ETable(
    [ols_res, iv1_res, iv2_res],
    model_heads=['OLS', 'IV (SSI)', 'IV (SSI+multlc+firmsz)'],
    caption='Instrumental Variables: Health Insurance and Drug Expenditure',
    labels=iv_labels,
    model_stats=['N', 'r2'],
)

MEPS dataset: 10391 observations


Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(


Instrumental Variables: Health Insurance and Drug Expenditure,Instrumental Variables: Health Insurance and Drug Expenditure,Instrumental Variables: Health Insurance and Drug Expenditure,Instrumental Variables: Health Insurance and Drug Expenditure
Unnamed: 0_level_1,Log(Drug Expenditure),Log(Drug Expenditure),Log(Drug Expenditure)
Unnamed: 0_level_2,OLS,IV (SSI),IV (SSI+multlc+firmsz)
Unnamed: 0_level_3,(1),(2),(3)
coef,coef,coef,coef
Health Insurance (Employer/Union),0.074*** (0.026),-0.898*** (0.221),-1.033*** (0.206)
Chronic Conditions,0.440*** (0.009),0.450*** (0.010),0.452*** (0.010)
Age,-0.004* (0.002),-0.013*** (0.003),-0.015*** (0.003)
Female,0.058** (0.025),-0.020 (0.033),-0.031 (0.032)
Black/Hispanic,-0.151*** (0.034),-0.217*** (0.039),-0.227*** (0.040)
Log(Income),0.010 (0.014),0.087*** (0.023),0.098*** (0.022)
Intercept,5.861*** (0.157),6.787*** (0.269),6.916*** (0.260)
stats,stats,stats,stats
Observations,10089,10089,10089




In [10]:

# Use models from Example 2 (Fixed Effects specifications)
# Display 95% confidence intervals instead of standard errors
mt.ETable(
    [res1, res2, res3, res4],
    model_heads=['Pooled', 'Entity FE', 'Time FE', 'Two-way FE'],
    caption='Fixed Effects with 95% Confidence Intervals',
    labels=labels,
    coef_fmt="b \n [ci95l, ci95u]",
    digits=3,
    model_stats=['N', 'r2_within'],
    notes='95% confidence intervals in brackets. Clustered standard errors by entity.'
)

Fixed Effects with 95% Confidence Intervals,Fixed Effects with 95% Confidence Intervals,Fixed Effects with 95% Confidence Intervals,Fixed Effects with 95% Confidence Intervals,Fixed Effects with 95% Confidence Intervals
Unnamed: 0_level_1,Log(Wage),Log(Wage),Log(Wage),Log(Wage)
Unnamed: 0_level_2,Pooled,Entity FE,Time FE,Two-way FE
Unnamed: 0_level_3,(1),(2),(3),(4)
coef,coef,coef,coef,coef
Experience²,"0.013*** [0.012, 0.015]","0.004*** [0.003, 0.004]","-0.002*** [-0.003, -0.001]","-0.005*** [-0.007, -0.003]"
Union Member,"0.757*** [0.668, 0.845]","0.083*** [0.036, 0.129]","0.177*** [0.12, 0.234]","0.080*** [0.032, 0.128]"
Married,"0.737*** [0.645, 0.829]","0.107*** [0.065, 0.15]","0.152*** [0.099, 0.206]","0.047** [0.003, 0.091]"
fe,fe,fe,fe,fe
Individual ID,-,x,-,x
Year,-,-,x,x
stats,stats,stats,stats,stats
Observations,4360,4360,4360,4360
Within R2,-1.498,0.137,-0.104,-0.481


