# Minimal Oaxaca Demo

This notebook demonstrates a minimal example of running the Oaxaca-Blinder decomposition and printing the results.

In [11]:
%load_ext autoreload
%autoreload 2
import pandas as pd

from oaxaca import Oaxaca

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
# Load sample data
df = pd.read_csv("sample_data.csv")
df.head()

Unnamed: 0,age,female,foreign_born,LTHS,high_school,some_college,college,advanced_degree,education_level,ln_real_wage
0,52,0,1,0,1,0,0,0,high_school,2.140066
1,46,1,1,0,1,0,0,0,high_school,
2,31,1,1,0,1,0,0,0,high_school,2.499795
3,35,0,1,0,1,0,0,0,high_school,2.70805
4,19,0,0,0,1,0,0,0,high_school,2.079442


In [13]:
# Initialize and fit the Oaxaca model
oaxaca_model = Oaxaca()

# Fit the model using a simple formula
results = oaxaca_model.fit(
    formula="exp(ln_real_wage) ~ -1 + age + female + C(education_level)", data=df, group_variable="foreign_born"
)

print("Model fitted successfully!")
print(f"Groups: {results.groups_}")

Model fitted successfully!
Groups: [0, 1]


In [14]:
# Run two-fold decomposition
twofold_decomposition = results.two_fold(weights={0: 1.0, 1: 0.0})

In [15]:
# Display detailed summary - shows rich HTML output in Jupyter
twofold_decomposition

Variable,Explained,Expl %,Unexplained,Unexpl %,Total,Tot %
age,-1.7491,-58.0%,7.5585,250.6%,5.8094,192.6%
female,-0.5231,-17.3%,-1.1653,-38.6%,-1.6883,-56.0%
C(education_level),2.4545,81.4%,-3.5599,-118.1%,-1.1055,-36.7%
C(education_level)[LTHS],-1.5272,-50.6%,-1.5892,-52.7%,-3.1163,-103.3%
C(education_level)[advanced_degree],0.8993,29.8%,-0.4043,-13.4%,0.495,16.4%
C(education_level)[college],0.8965,29.7%,0.2337,7.8%,1.1303,37.5%
C(education_level)[high_school],-0.5832,-19.3%,-1.2954,-43.0%,-1.8786,-62.3%
C(education_level)[some_college],2.769,91.8%,-0.5048,-16.7%,2.2642,75.1%
Total,0.1822,6.0%,2.8333,94.0%,3.0156,100.0%


In [16]:
twofold_decomposition.detailed_contributions()

Unnamed: 0_level_0,Unnamed: 1_level_0,explained_detailed,explained_detailed_pct,unexplained_detailed,unexplained_detailed_pct,total,total_pct,variable_type
variable_group,category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
age,age,-1.749147,-58.003785,7.558531,250.649793,5.809383,192.646008,continuous
female,female,-0.523082,-17.346017,-1.165265,-38.641548,-1.688347,-55.987565,continuous
C(education_level),C(education_level)[LTHS],-1.527188,-50.643366,-1.589151,-52.698137,-3.11634,-103.341503,categorical
C(education_level),C(education_level)[advanced_degree],0.899294,29.821657,-0.404325,-13.407897,0.494969,16.41376,categorical
C(education_level),C(education_level)[college],0.896542,29.730385,0.233714,7.750237,1.130256,37.480622,categorical
C(education_level),C(education_level)[high_school],-0.583211,-19.339953,-1.295377,-42.956219,-1.878587,-62.296171,categorical
C(education_level),C(education_level)[some_college],2.76904,91.824645,-0.504801,-16.739796,2.264239,75.084849,categorical


In [17]:
threefold_decomposition = results.three_fold()

In [18]:
threefold_decomposition

Variable,Explained,Expl %,Unexplained,Unexpl %,Total,Tot %,Unnamed: 7,Unnamed: 8
age,-0.5168,-17.1%,7.5585,250.6%,-1.2324,-40.9%,5.8094,192.6%
female,-0.2727,-9.0%,-1.1653,-38.6%,-0.2504,-8.3%,-1.6883,-56.0%
C(education_level),2.406,79.8%,-3.5599,-118.1%,0.0485,1.6%,-1.1055,-36.7%
C(education_level)[LTHS],-2.631,-87.2%,-1.5892,-52.7%,1.1038,36.6%,-3.1163,-103.3%
C(education_level)[advanced_degree],1.3849,45.9%,-0.4043,-13.4%,-0.4856,-16.1%,0.495,16.4%
C(education_level)[college],0.7619,25.3%,0.2337,7.8%,0.1347,4.5%,1.1303,37.5%
C(education_level)[high_school],-0.8249,-27.4%,-1.2954,-43.0%,0.2417,8.0%,-1.8786,-62.3%
C(education_level)[some_college],3.7151,123.2%,-0.5048,-16.7%,-0.9461,-31.4%,2.2642,75.1%
Total,1.6165,53.6%,2.8333,94.0%,-1.4343,-47.6%,3.0156,100.0%
