In [3]:
import pandas as pd
import altair as alt
df = pd.read_pickle('backup_data.pkl')

### Hypothesis: Medical care inflation was more rapid from 2019-2021 than at any other two-year period from 2000-2021, possibly due to the COVID-19 pandemic.

In [56]:
# We only need to look at the medical care category.
df = df[df['Category'] == 'Medical care']

# Build new dataframe showing year-over-year changes after reindexing by data and grouping by category.
df_y = df.set_index('date').resample('Y').mean().pct_change()
df_y.reset_index(inplace=True)
df_y['year'] = pd.DatetimeIndex(df_y['date']).year

# First chart - bar chart of yearly inflation rates.
alt.Chart(pd.DataFrame(df_y).dropna(),title='Yearly Price Changes for Medical Care from 2000 to 2021').mark_bar().encode(
    x = alt.X('year:O', title=None, sort='y'),
    y = alt.Y('value', title='Change', axis=alt.Axis(format='%'))
)

Yet another incorrect hypothesis. With 2021 showing the lowest inflation rate, and 2020 being somewhere in the middle, there is no way our hypothesis is true. 2001 and 2002 had the highest two inflation rates, so 2000-2002 seems to be the period with the most inflation. Still, our graph does not tell this story explicitly, so there is work to be done.

In [64]:
# Changing the pct_change calculation to take 2-year chunks.
df_y = df.set_index('date').resample('Y').mean()
df_y.reset_index(inplace=True)
df_y['year'] = pd.DatetimeIndex(df_y['date']).year
df_y['pct_change'] = df_y.value.pct_change(periods=2)
df_y['label'] = (df_y['year'] -2).astype(str) + ' to ' + df_y['year'].astype(str)

# Second chart - bar chart of bi-yearly inflation rates.
alt.Chart(pd.DataFrame(df_y).dropna(),title='Bi-yearly Price Changes for Medical Care from 2000 to 2021').mark_bar().encode(
    x = alt.X('label:O', title=None, sort='y'),
    y = alt.Y('pct_change', title='Change', axis=alt.Axis(format='%'))
)

The data now aligns with our two-year window task, but I want to highlight the bar relating to our hypothesis.

In [65]:
# Third chart - bar chart of bi-yearly inflation rates with target range highlighted red.
alt.Chart(pd.DataFrame(df_y).dropna(),title='Bi-yearly Price Changes for Medical Care from 2000 to 2021').mark_bar().encode(
    x = alt.X('label:O', title=None, sort='y'),
    y = alt.Y('pct_change', title='Change', axis=alt.Axis(format='%')),
    color=alt.condition(
        alt.datum.label == '2019 to 2021',
        alt.value('red'),
        alt.value('lightgrey')
    )
)

In all of the charts above we're using the mean for the year to represent the year. We could check all possible 24-month groupings to see if some of them had a greater change.

In [138]:
df_m = df.set_index('date').sort_values('date')
df_m['pct_change'] = df_m.value.pct_change(periods=24)
df_m.reset_index(inplace=True)
df_m['year'] = pd.DatetimeIndex(df_m['date']).year
df_m['label'] = df_m['periodName'].astype(str) + ' ' + (df_m['year'] -2).astype(str) + ' to ' + df_m['year'].astype(str)

# Fourth chart - bar chart of rolling 24-month inflation rates.
alt.Chart(pd.DataFrame(df_m).dropna(),title='Rolling 24-month Price Changes for Medical Care since 2002').mark_bar().encode(
    x = alt.X('label:O', title=None, sort='y'),
    y = alt.Y('pct_change', title='Change', axis=alt.Axis(format='%'))
)

Having this many bars is just silly. Let's try a different approach.

In [142]:
#Going to use months to help space out our index, and add hover labels.
df_m['month'] = pd.DatetimeIndex(df_m['date']).month
df_m['spacing'] = df_m['year'] + (df_m['month'] / 12)
df_m['mouseover'] = df_m['label'] + ': ' + (df_m['pct_change'] * 100).round(2).astype(str) + '%'

# Fifth chart - scatter of 24-month rolling changes.
alt.Chart(pd.DataFrame(df_m).dropna(),title='Rolling 24-month Price Changes for Medical Care since 2002').mark_point().encode(
    y = alt.Y('pct_change', title='Change', axis=alt.Axis(format='%')),
    x = alt.X('spacing', title='Year', axis = alt.Axis(format='0'), scale=alt.Scale(domain=[2002, 2022])),
    tooltip=('mouseover')
).interactive()

Let's adopt the highlight trick from our previous iterations.

In [150]:
# Sixth chart - scatter of 24-month rolling changes with red highlight.
alt.Chart(pd.DataFrame(df_m).dropna(),title='Rolling 24-Month Price Changes for Medical Care since 2002 with 2021 in Red').mark_point().encode(
    y = alt.Y('pct_change', title='Change', axis=alt.Axis(format='%')),
    x = alt.X('spacing', title='Year', axis = alt.Axis(format='0'), scale=alt.Scale(domain=[2002, 2022])),
    tooltip=('mouseover'),
    color=alt.condition(
        alt.datum.year == 2021,
        alt.value('red'),
        alt.value('lightgrey')
    )
).interactive(
).properties(height=300, width=400)

### Conclusion: The hypothesis is false.

Medical expense inflation was surprisingly low during 2019-2021, despite the ongoing Covid-19 pandemic. Paradoxically, this was one of the lowest periods of medical expense inflation since 2000. Medical Care frequently topped 8% inflation for most of the first decade of the millenium. Later years saw annualized inflation hovering in the range of ~3% to ~8%.