# Inflation vs. Wages (Overall)
2025-09-14

## 1. Import Functions and Set Variables

In [1]:
from _notebook_setup import *
import plotly.express as px
import plotly.graph_objects as go

hit_api = False
save_dfs = False
save_figs = True

current_year= int(datetime.now().strftime("%Y"))
years = list(range(1984, current_year + 1)) # starting in 1983 instead of 1979 because CPI data is normalized to 8/1983 due to updated expenditure weights

✅ Notebook setup complete!
✅ Available APIs: bls
✅ Available libraries: pd, np, plt, sns, datetime
✅ Helper functions: save_data(), save_figure(), save_plotly_figure(), load_data()
📁 Data directory: /Users/annebode/Documents/selfevidence.github.io/data
📁 Output directory: /Users/annebode/Documents/selfevidence.github.io/data/output
📊 Ready for analysis!


## 2. Import Data

### 2.a CPI (Unadjusted)

In [2]:
# got series_ids from https://www.bls.gov/help/hlpforma.htm#OCWC
base_series_ids = {
    "CUUR": "CPI (Unadjusted)"
 }

regions = {
    "0000": "National",
}

items = {
    "SA0": "All items",
    "SA0L1E": "Core CPI (excludes food & energy)",
    "SAF": "Food and beverages",
    "SAH": "Housing",
    "SAM": "Medical care",
}

series_ids = [base_series_id + region + item for base_series_id in base_series_ids for region in regions for item in items]

if hit_api:
    df_cpi = bls.get_data(
        series_ids = series_ids,
        years = years,
    )
    df_cpi = bls.clean_data_cpi_unadjusted(df_cpi, base_series_ids, regions, items)
    if save_dfs:
        save_data(df=df_cpi, filename='01_inflation_data.csv')
else:
    df_cpi = load_data(filename='01_inflation_data.csv')
    df_cpi['date'] = pd.to_datetime(df_cpi['date'])

df_cpi.head()

📂 Loaded: /Users/annebode/Documents/selfevidence.github.io/data/output/processed_data/01_inflation_data.csv (2500 rows)


Unnamed: 0,series_id,year,period,value,data_type,region,item,date
0,CUUR0000SA0,2003,M12,184.3,CPI (Unadjusted),National,All items,2003-12-01
1,CUUR0000SA0,2003,M11,184.5,CPI (Unadjusted),National,All items,2003-11-01
2,CUUR0000SA0,2003,M10,185.0,CPI (Unadjusted),National,All items,2003-10-01
3,CUUR0000SA0,2003,M09,185.2,CPI (Unadjusted),National,All items,2003-09-01
4,CUUR0000SA0,2003,M08,184.6,CPI (Unadjusted),National,All items,2003-08-01


### 2.b CPS Weekly Nominal Earnings

In [3]:
# series_ids obtained from: https://data.bls.gov/PDQWeb/le
series_id_dict = {
    
    # First Decile
    'LEU0252911200': {
        'description': '(unadj)- Usual weekly earnings (first decile), Employed full time, Wage and salary workers',
        'percentile': 10,
        'race': 'All'
    },
    # First Quartile
    'LEU0252911300': {
        'description': '(unadj)- Usual weekly earnings (first quartile), Employed full time, Wage and salary workers',
        'percentile': 25,
        'race': 'All'
    },
    # Second Quartile
    'LEU0252881500': {
        'description': '(unadj)- Usual weekly earnings (second quartile), Employed full time, Wage and salary workers',
        'percentile': 50,
        'race': 'All'
    },
    # Third Quartile
    'LEU0252911400': {
        'description': '(unadj)- Usual weekly earnings (third quartile), Employed full time, Wage and salary workers',
        'percentile': 75,
        'race': 'All'
    },
    # Ninth Decile
    'LEU0252911500': {
        'description': '(unadj)- Usual weekly earnings (ninth decile), Employed full time, Wage and salary workers',
        'percentile': 90,
        'race': 'All'
    },
}

if hit_api:
    df_wages = bls.get_data(
        series_ids = list(series_id_dict.keys()),
        years = years,
    )
    df_wages = bls.clean_data_weekly_nominal_earnings(df_wages, series_id_dict)
    if save_dfs:
        save_data(df=df_wages, filename='01_wage_data.csv')

else:
    df_wages = load_data(filename='01_wage_data.csv')
    df_wages['date'] = pd.to_datetime(df_wages['date'])

df_wages.head()

📂 Loaded: /Users/annebode/Documents/selfevidence.github.io/data/output/processed_data/01_wage_data.csv (574 rows)


Unnamed: 0,series_id,year,period,value,data_type,description,percentile,race,date
0,LEU0252911200,2003,Q04,303.0,CPS Weekly Nominal Earnings,"(unadj)- Usual weekly earnings (first decile),...",10,All,2003-10-01
1,LEU0252911200,2003,Q03,300.0,CPS Weekly Nominal Earnings,"(unadj)- Usual weekly earnings (first decile),...",10,All,2003-07-01
2,LEU0252911200,2003,Q02,301.0,CPS Weekly Nominal Earnings,"(unadj)- Usual weekly earnings (first decile),...",10,All,2003-04-01
3,LEU0252911200,2003,Q01,300.0,CPS Weekly Nominal Earnings,"(unadj)- Usual weekly earnings (first decile),...",10,All,2003-01-01
4,LEU0252911200,2002,Q04,298.0,CPS Weekly Nominal Earnings,"(unadj)- Usual weekly earnings (first decile),...",10,All,2002-10-01


## 3. Analyze Data

### 3.a Median Wage vs. Inflation (1984 - Present Day)

In [4]:
df_1_wage = df_wages[df_wages['percentile'] == 50].copy(deep=True)

# normalize wage data
first_date = df_1_wage['date'].min()
normalization_wage = df_1_wage[df_1_wage['date'] == first_date]['value'].item()
df_1_wage['wage_adj'] = df_1_wage['value'] / normalization_wage * 100

# calculate cagr of wage_data
last_date = df_1_wage['date'].max()
comparison_wage = df_1_wage[df_1_wage['date'] == last_date]['value'].item()
df_1_wage['quarterly_diff'] = (pd.to_datetime(last_date).to_period('Q') - df_1_wage['date'].dt.to_period('Q')).apply(lambda x: x.n)
df_1_wage['cagr_wage'] = (comparison_wage / df_1_wage['value']) ** (4 / df_1_wage['quarterly_diff']) - 1

# create string date column for plotting
df_1_wage = df_1_wage.sort_values(by='date').reset_index(drop=True)
df_1_wage['date_str'] = pd.to_datetime(df_1_wage['date']).dt.strftime('%Y-%m-%d')

df_1_wage.head()


Unnamed: 0,series_id,year,period,value,data_type,description,percentile,race,date,wage_adj,quarterly_diff,cagr_wage,date_str
0,LEU0252881500,1984,Q01,323.0,CPS Weekly Nominal Earnings,(unadj)- Usual weekly earnings (second quartil...,50,All,1984-01-01,100.0,165,0.032244,1984-01-01
1,LEU0252881500,1984,Q02,323.0,CPS Weekly Nominal Earnings,(unadj)- Usual weekly earnings (second quartil...,50,All,1984-04-01,100.0,164,0.032444,1984-04-01
2,LEU0252881500,1984,Q03,322.0,CPS Weekly Nominal Earnings,(unadj)- Usual weekly earnings (second quartil...,50,All,1984-07-01,99.690402,163,0.032725,1984-07-01
3,LEU0252881500,1984,Q04,335.0,CPS Weekly Nominal Earnings,(unadj)- Usual weekly earnings (second quartil...,50,All,1984-10-01,103.71517,162,0.031921,1984-10-01
4,LEU0252881500,1985,Q01,336.0,CPS Weekly Nominal Earnings,(unadj)- Usual weekly earnings (second quartil...,50,All,1985-01-01,104.024768,161,0.032046,1985-01-01


In [5]:
# get baseline inflation per item
first_date = df_cpi['date'].min()
df_normalization_inflation = df_cpi[df_cpi['date']==first_date][['series_id', 'value']]
df_normalization_inflation.rename(columns={'value': 'baseline_cpi'}, inplace=True)
# normalize inflation data
df_1_inflation = pd.merge(df_cpi, df_normalization_inflation, on = 'series_id', how = 'left')
df_1_inflation['inflation_adj'] = df_1_inflation['value'] / df_1_inflation['baseline_cpi'] * 100

# get most recent inflation per item
last_date = df_1_inflation['date'].max()
df_comparison_inflation = df_cpi[df_cpi['date']==last_date][['series_id', 'value']]
df_comparison_inflation.rename(columns={'value': 'comparison_cpi'}, inplace=True)
# calculate cagr of inflation data
df_1_inflation = pd.merge(df_1_inflation, df_comparison_inflation, on = 'series_id', how = 'left')
df_1_inflation['monthly_diff'] = (pd.to_datetime(last_date).to_period('M') - df_1_inflation['date'].dt.to_period('M')).apply(lambda x: x.n)
df_1_inflation['cagr_inflation'] = (df_1_inflation['comparison_cpi'] / df_1_inflation['value']) ** (12 / df_1_inflation['monthly_diff']) - 1

# create string date column for plotting
df_1_inflation = df_1_inflation.sort_values(by='date').reset_index(drop=True)
df_1_inflation['date_str'] = pd.to_datetime(df_1_inflation['date']).dt.strftime('%Y-%m-%d')

df_1_inflation.head()

Unnamed: 0,series_id,year,period,value,data_type,region,item,date,baseline_cpi,inflation_adj,comparison_cpi,monthly_diff,cagr_inflation,date_str
0,CUUR0000SAH,1984,M01,101.4,CPI (Unadjusted),National,Housing,1984-01-01,101.4,100.0,349.277,499,0.030189,1984-01-01
1,CUUR0000SAM,1984,M01,104.0,CPI (Unadjusted),National,Medical care,1984-01-01,104.0,100.0,583.875,499,0.042363,1984-01-01
2,CUUR0000SA0,1984,M01,101.9,CPI (Unadjusted),National,All items,1984-01-01,101.9,100.0,323.976,499,0.028206,1984-01-01
3,CUUR0000SAF,1984,M01,102.0,CPI (Unadjusted),National,Food and beverages,1984-01-01,102.0,100.0,338.366,499,0.029257,1984-01-01
4,CUUR0000SA0L1E,1984,M01,102.3,CPI (Unadjusted),National,Core CPI (excludes food & energy),1984-01-01,102.3,100.0,329.97,499,0.028563,1984-01-01


In [6]:
fig = px.line(df_1_inflation, x='date_str', y='inflation_adj', color='item')

fig.update_layout(

    template='plotly_white',
    width=None,
    height=700,
    autosize=True,
    margin=dict(l=60, r=40, t=100, b=80),

    title={
        'text': '📈 <b>The Rising Cost of Living</b><br><span style="font-size:16px; color:#6b7280">Consumer Price Index by Category (Inflation-Adjusted)</span>',
        'x': 0.5,
        'font': {'size': 20}
    },

    xaxis=dict(
        title={'text': '', 'font': {'size': 16, 'color': '#2E4057'}},
    ),

    yaxis=dict(
        title={'text': 'Inflation-Adjusted CPI', 'font': {'size': 16, 'color': '#2E4057'}},
    ),

    # Legend - moved to top with custom title
    legend=dict(
        title={'text': 'Category', 'font': {'size': 14, 'color': '#2E4057'}},
        orientation='h',
        yanchor='top',
        y=1.0,  # Position below title
        xanchor='center',
        x=0.5,
        font={'size': 12}
    ),
)

fig.update_traces(
    line=dict(width=3),
    hovertemplate='<b>%{fullData.name}</b><br>' +
                  'Date: %{x}<br>' +
                  'CPI: %{y:.0f}<br>' +
                  '<extra></extra>'  # Removes trace box
)

# # Use a curated color palette
# # fig.for_each_trace(lambda t: t.update(line=dict(width=3)))

if save_figs:
    save_plotly_figure(
        fig=fig,
        filename='01_cpi_chart_1984',
        for_blog=True
    )

go.FigureWidget(fig)

📝 Blog version saved: /Users/annebode/Documents/selfevidence.github.io/docs/assets/charts/01_cpi_chart_1984.html
📊 Plotly figure saved: html: /Users/annebode/Documents/selfevidence.github.io/data/output/figures/01_cpi_chart_1984.html
✨ To embed in Jekyll post, use:
<iframe src="{{ site.baseurl }}/assets/charts/01_cpi_chart_1984.html" width="100%" height="700" frameborder="0"></iframe>


FigureWidget({
    'data': [{'hovertemplate': '<b>%{fullData.name}</b><br>Date: %{x}<br>CPI: %{y:.0f}<br><extra></extra>',
              'legendgroup': 'Housing',
              'line': {'color': '#636efa', 'dash': 'solid', 'width': 3},
              'marker': {'symbol': 'circle'},
              'mode': 'lines',
              'name': 'Housing',
              'showlegend': True,
              'type': 'scattergl',
              'uid': 'bf2e7103-073e-4d79-9626-f22e559fa01f',
              'x': array(['1984-01-01', '1984-02-01', '1984-03-01', ..., '2025-06-01',
                          '2025-07-01', '2025-08-01'], shape=(500,), dtype=object),
              'xaxis': 'x',
              'y': {'bdata': ('AAAAAAAAWUC9GD3ljh9ZQNZVInQuLF' ... 'GzbHVAepc8GHB2dUClqHYvRod1QA=='),
                    'dtype': 'f8'},
              'yaxis': 'y'},
             {'hovertemplate': '<b>%{fullData.name}</b><br>Date: %{x}<br>CPI: %{y:.0f}<br><extra></extra>',
              'legendgroup': 'Medical care',
     

In [7]:
df_plot = pd.merge(
    df_1_inflation[df_1_inflation['item'].isin(['All items', 'Housing'])][['date', 'date_str', 'item','cagr_inflation']],
    df_1_wage[['date', 'date_str', 'cagr_wage']],
    on=['date', 'date_str'],
    how='inner'
)

df_plot['Wage Growth in Excess of Inflation (CAGR)'] = df_plot['cagr_wage'] - df_plot['cagr_inflation']
df_plot = df_plot[df_plot['date'] < (datetime.now().replace(year=datetime.now().year - 1))]

fig = px.line(df_plot, x='date_str', y=['Wage Growth in Excess of Inflation (CAGR)'], color='item')

fig.update_layout(

    template='plotly_white',
    width=None,
    height=700,
    autosize=True,
    margin=dict(l=60, r=40, t=100, b=80),

    title={
        'text': "📈 <b>The Good Ol' Days Index</b><br><span style='font-size:16px; color:#6b7280'>Median Wage CAGR, in excess of inflation (from date, to now)</span>",
        'x': 0.5,
        'font': {'size': 20}
    },

    xaxis=dict(
        title={'text': ''},
    ),

    yaxis=dict(
        title={'text': 'CAGR', 'font': {'size': 16, 'color': '#2E4057'}},
        tickformat='.1%'
    ),

    # Legend - moved to top with custom title
    legend=dict(
        title={'text': ''},
        orientation='h',
        yanchor='top',
        y=1.0,  # Position below title
        xanchor='center',
        x=0.45,
        font={'size': 12}
    ),
)

if save_figs:
    save_plotly_figure(
        fig=fig,
        filename='01_cagr_chart_median_wage_vs_all_housing',
        for_blog=True
    )

go.FigureWidget(fig)


📝 Blog version saved: /Users/annebode/Documents/selfevidence.github.io/docs/assets/charts/01_cagr_chart_median_wage_vs_all_housing.html
📊 Plotly figure saved: html: /Users/annebode/Documents/selfevidence.github.io/data/output/figures/01_cagr_chart_median_wage_vs_all_housing.html
✨ To embed in Jekyll post, use:
<iframe src="{{ site.baseurl }}/assets/charts/01_cagr_chart_median_wage_vs_all_housing.html" width="100%" height="700" frameborder="0"></iframe>


FigureWidget({
    'data': [{'hovertemplate': ('item=Housing<br>variable=Wage ' ... '}<br>value=%{y}<extra></extra>'),
              'legendgroup': 'Housing',
              'line': {'color': '#636efa', 'dash': 'solid'},
              'marker': {'symbol': 'circle'},
              'mode': 'lines',
              'name': 'Housing',
              'orientation': 'v',
              'showlegend': True,
              'type': 'scatter',
              'uid': 'e548378a-6ae3-49e0-bcf2-1ec2d5b34991',
              'x': array(['1984-01-01', '1984-04-01', '1984-07-01', '1984-10-01', '1985-01-01',
                          '1985-04-01', '1985-07-01', '1985-10-01', '1986-01-01', '1986-04-01',
                          '1986-07-01', '1986-10-01', '1987-01-01', '1987-04-01', '1987-07-01',
                          '1987-10-01', '1988-01-01', '1988-04-01', '1988-07-01', '1988-10-01',
                          '1989-01-01', '1989-04-01', '1989-07-01', '1989-10-01', '1990-01-01',
                          '1