# BLS inflation data for select commodities

In [1]:
import requests
import pandas as pd
import numpy as np
import altair as alt
import altair_stiles as altstiles
import io

alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [2]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.set_option("display.max_colwidth", None)

In [3]:
month = pd.Timestamp("today").strftime("%Y-%m")

In [4]:
now = pd.Timestamp.now().strftime("%b. %d, %Y")

In [5]:
now

'Apr. 10, 2024'

---

## Get Codes

#### Read BLS' inflation and price codes

In [6]:
headers = {
    "Content-type": "application/json",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
}
codes_data = requests.get(
    "https://download.bls.gov/pub/time.series/ap/ap.series", headers=headers
)

In [7]:
# Create a file-like object from the text content
codes_data_text = io.StringIO(codes_data.text)

# Read the tab-delimited text file using pd.read_csv()
codes_src = pd.read_csv(codes_data_text, sep="\t")

# Now you can work with the DataFrame codes_src as usual

#### Just the codes for city averages with monthly values

In [8]:
codes_df = codes_src[
    (codes_src["area_code"] == "0000") & (codes_src["begin_period"] == "M01")
].copy()

In [9]:
codes_df.columns = codes_df.columns.str.strip()
codes_df.series_id = codes_df.series_id.str.strip()

In [10]:
codes_df.query('series_title.str.contains("Flour")')

Unnamed: 0,series_id,area_code,item_code,series_title,footnote_codes,begin_year,begin_period,end_year,end_period
0,APU0000701111,0,701111,"Flour, white, all purpose, per lb. (453.6 gm) in U.S. city average, average price, not seasonally adjusted",,1980,M01,2024,M03


---

## Get data about all prices

#### Read BLS inflation and price measures

In [11]:
our_codes = [
    # gasoline
    "APU00007471A",
    # bread
    "APU0000702111",
    # chicken
    "APU0000706111",
    # bacon
    "APU0000704111",
    # eggs
    "APU0000708111",
    # coffee
    "APU0000717311",
    # steak
    "APU0000703613",
    # chicken legs
    "APU0000706212",
    # potato chips
    "APU0000718311",
    # ground beef
    "APU0000703113",
    # sugar
    "APU0000715211",
    # cookies
    "APU0000702421",
    # spaghetti and macaroni
    "APU0000701322",
    # rice
    "APU0000701312",
    # ice cream
    "APU0000710411",
    # tomatoes
    "APU0000712311",
]

---

In [12]:
prices_data = requests.get(
    "https://download.bls.gov/pub/time.series/ap/ap.data.0.Current", headers=headers
)

prices_data_text = io.StringIO(prices_data.text)

In [13]:
prices_src = pd.read_csv(prices_data_text, sep="\t", low_memory=False)

In [14]:
prices_src.columns = prices_src.columns.str.strip()

In [15]:
prices_src.head()

Unnamed: 0,series_id,year,period,value,footnote_codes
0,APU0000701111,1995,M01,0.238,
1,APU0000701111,1995,M02,0.242,
2,APU0000701111,1995,M03,0.242,
3,APU0000701111,1995,M04,0.236,
4,APU0000701111,1995,M05,0.244,


In [16]:
prices_src = prices_src.drop(["footnote_codes"], axis=1)

In [17]:
prices_src["series_id"] = prices_src["series_id"].str.strip()
prices_src["year"] = prices_src["year"].astype(str).str.strip()
prices_src["period"] = prices_src["period"].str.strip()
prices_src["value"] = prices_src["value"].astype(str).str.strip()

In [18]:
prices_src.value = prices_src.value.replace("-", np.nan).astype(float)

In [19]:
prices_df = prices_src[prices_src["series_id"].isin(our_codes)].copy()

---

## Merge

#### Combine prices and code dataframes

In [20]:
df = pd.merge(prices_df, codes_df[["series_id", "series_title"]], on="series_id")

In [21]:
df["month"] = df["period"].str[1:]

In [22]:
df["date"] = pd.to_datetime(df["year"].astype(str) + "-" + df["month"] + "-" + "01")

In [23]:
df = df.sort_values("date", ascending=False)

In [24]:
df["commodity"] = df["series_title"].str.split(", ", expand=True)[0]
df["commodity"] = df["commodity"].str.replace(" ", "_", regex=False)

In [25]:
df["series_title"] = (
    df["series_title"]
    .str.replace(
        "in U.S. city average, average price, not seasonally adjusted", "", regex=False
    )
    .str.replace(" (453.6 gm)", "", regex=False)
)

In [26]:
df["date"] = df["date"].astype(str)

---

#### Individual commodities

In [27]:
for c in df.commodity.unique():
    df[df["commodity"] == c].to_csv(
        f"data/processed/commodities/{c.lower()}_latest.csv", index=False
    )

In [28]:
for c in df.commodity.unique():
    df[df["commodity"] == c].to_json(
        f"data/processed/commodities/{c.lower()}_latest.json",
        indent=4,
        orient="records",
    )

In [29]:
for c in df.commodity.unique():
    print(c)

Gasoline
Potato_chips
Bacon
Ice_cream
Coffee
Spaghetti_and_macaroni
Steak
Sugar
Ground_beef
Chicken_legs
Rice
Bread
Cookies
Eggs
Tomatoes
Chicken


---

#### Calculate year-over-year change by commodity and month

In [30]:
df = df.sort_values(["commodity", "date"], ascending=[True, True])

In [31]:
df["annual_change"] = df.groupby("commodity")["value"].pct_change(periods=12)

In [32]:
df_recent = df.query('date >= "2018-01-01"')

In [33]:
df_recent.head(12)

Unnamed: 0,series_id,year,period,value,series_title,month,date,commodity,annual_change
2357,APU0000704111,2018,M01,5.654,"Bacon, sliced, per lb.",1,2018-01-01,Bacon,0.091506
2358,APU0000704111,2018,M02,5.533,"Bacon, sliced, per lb.",2,2018-02-01,Bacon,0.037308
2359,APU0000704111,2018,M03,5.527,"Bacon, sliced, per lb.",3,2018-03-01,Bacon,-0.037946
2360,APU0000704111,2018,M04,5.421,"Bacon, sliced, per lb.",4,2018-04-01,Bacon,-0.061299
2361,APU0000704111,2018,M05,5.452,"Bacon, sliced, per lb.",5,2018-05-01,Bacon,-0.043341
2362,APU0000704111,2018,M06,5.247,"Bacon, sliced, per lb.",6,2018-06-01,Bacon,-0.073786
2363,APU0000704111,2018,M07,5.418,"Bacon, sliced, per lb.",7,2018-07-01,Bacon,-0.068912
2364,APU0000704111,2018,M08,5.577,"Bacon, sliced, per lb.",8,2018-08-01,Bacon,-0.106393
2365,APU0000704111,2018,M09,5.501,"Bacon, sliced, per lb.",9,2018-09-01,Bacon,-0.135878
2366,APU0000704111,2018,M10,5.37,"Bacon, sliced, per lb.",10,2018-10-01,Bacon,-0.114738


In [34]:
alt.Chart(df_recent.query('commodity != "Eggs"')).mark_bar().encode(
    x=alt.X("date:T", title="", axis=alt.Axis(format="%Y", tickCount=4)),
    y=alt.Y("annual_change:Q", title="", axis=alt.Axis(format="%", tickCount=4)),
    color=alt.condition(
        alt.datum.annual_change > 0,
        alt.value("#f18851"),  # The positive color
        alt.value("#53a796"),  # The negative color
    ),
    facet=alt.Facet("commodity", columns=4, title=""),
).properties(width=200, height=100, title="Year over year change in average price")

---

#### Calculate change since Covid

In [35]:
df_covid = df.query('date >= "2020-02-01"')

In [None]:
df_covid.commodity.value_counts()

In [59]:
df_covid

Unnamed: 0,series_id,year,period,value,series_title,month,date,commodity,annual_change
2382,APU0000704111,2020,M02,5.497,"Bacon, sliced, per lb.",2,2020-02-01,Bacon,-0.000909
2383,APU0000704111,2020,M03,5.257,"Bacon, sliced, per lb.",3,2020-03-01,Bacon,-0.062589
2384,APU0000704111,2020,M04,5.346,"Bacon, sliced, per lb.",4,2020-04-01,Bacon,-0.037104
2385,APU0000704111,2020,M05,5.348,"Bacon, sliced, per lb.",5,2020-05-01,Bacon,-0.079835
2386,APU0000704111,2020,M06,5.772,"Bacon, sliced, per lb.",6,2020-06-01,Bacon,-0.018033
2387,APU0000704111,2020,M07,5.776,"Bacon, sliced, per lb.",7,2020-07-01,Bacon,0.013156
2388,APU0000704111,2020,M08,5.559,"Bacon, sliced, per lb.",8,2020-08-01,Bacon,-0.003049
2389,APU0000704111,2020,M09,5.619,"Bacon, sliced, per lb.",9,2020-09-01,Bacon,0.008616
2390,APU0000704111,2020,M10,5.722,"Bacon, sliced, per lb.",10,2020-10-01,Bacon,0.012206
2391,APU0000704111,2020,M11,5.754,"Bacon, sliced, per lb.",11,2020-11-01,Bacon,0.045232


In [37]:
years = ["2020", "2024"]
periods = ["M03"]

In [38]:
covid_change = (
    pd.pivot_table(
        df_covid[(df_covid["period"].isin(periods)) & (df_covid["year"].isin(years))],
        index=["series_title"],
        values="value",
        columns="year",
    )
    .reset_index()
    .rename(columns={"2024": "mar_2024", "2020": "mar_2020"})
)

In [39]:
covid_change["mar_2024"] = covid_change["mar_2024"].round(2)
covid_change["mar_2020"] = covid_change["mar_2020"].round(2)

In [40]:
covid_change["pct_change_since_covid"] = (
    ((covid_change["mar_2024"] - covid_change["mar_2020"]) / covid_change["mar_2020"])
    * 100
).round(1)

In [None]:
covid_change['series_title_short'] = covid_change['series_title'].str.split(',', expand=True)[0]

In [54]:
covid_change

year,series_title,mar_2020,mar_2024,pct_change_since_covid,series_title_short
0,"Bacon, sliced, per lb.",5.26,6.61,25.7,Bacon
1,"Bread, white, pan, per lb.",1.37,2.0,46.0,Bread
2,"Chicken legs, bone-in, per lb.",1.54,1.82,18.2,Chicken legs
3,"Chicken, fresh, whole, per lb.",1.4,1.99,42.1,Chicken
4,"Coffee, 100%, ground roast, all sizes, per lb.",4.33,5.96,37.6,Coffee
5,"Cookies, chocolate chip, per lb.",3.63,5.14,41.6,Cookies
6,"Eggs, grade A, large, per doz.",1.52,2.99,96.7,Eggs
7,"Gasoline, all types, per gallon/3.785 liters",2.33,3.71,59.2,Gasoline
8,"Ground beef, lean and extra lean, per lb.",5.64,6.73,19.3,Ground beef
9,"Ice cream, prepackaged, bulk, regular, per 1/2 gal. (1.9 lit)",4.92,5.73,16.5,Ice cream


In [55]:
staples_list = ['Bacon', 'Chicken', 'Eggs', 'Rice', 'Bread', 'Cookies']

In [56]:
covid_change.query(f'series_title_short.isin({staples_list})')

year,series_title,mar_2020,mar_2024,pct_change_since_covid,series_title_short
0,"Bacon, sliced, per lb.",5.26,6.61,25.7,Bacon
1,"Bread, white, pan, per lb.",1.37,2.0,46.0,Bread
3,"Chicken, fresh, whole, per lb.",1.4,1.99,42.1,Chicken
5,"Cookies, chocolate chip, per lb.",3.63,5.14,41.6,Cookies
6,"Eggs, grade A, large, per doz.",1.52,2.99,96.7,Eggs
11,"Rice, white, long grain, uncooked, per lb.",0.73,1.01,38.4,Rice


In [57]:
covid_change.query(f'series_title_short.isin({staples_list})').pivot_table(values=['mar_2020', 'mar_2024'], columns='series_title_short').reset_index()

series_title_short,year,Bacon,Bread,Chicken,Cookies,Eggs,Rice
0,mar_2020,5.26,1.37,1.4,3.63,1.52,0.73
1,mar_2024,6.61,2.0,1.99,5.14,2.99,1.01


In [58]:
covid_change.query(f'series_title_short.isin({staples_list})').pivot_table(values=['mar_2020', 'mar_2024'], columns='series_title_short').reset_index().to_csv('data/processed/price_change_mar2020_mar2024.csv', index=False)

---

## Exports

#### All commodities

In [44]:
df.to_csv("data/processed/commodities/latest.csv", index=False)

In [45]:
df.to_json(
    "data/processed/commodities/latest.json",
    indent=4,
    orient="records",
)