# BLS price data

In [1]:
%load_ext lab_black

In [2]:
import altair as alt
import altair_stiles as altstiles
import pandas as pd
import numpy as np

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [4]:
today = pd.Timestamp("today").strftime("%m/%d/%Y")

---

## Get Codes

#### What's available from BLS?

In [7]:
series_url = "https://download.bls.gov/pub/time.series/ap/ap.series"

In [8]:
headers = {"User-Agent": "matt.stiles@warnermedia.com"}

In [9]:
series_src = pd.read_csv(series_url, sep="\t", storage_options=headers)

In [10]:
series_src.columns = series_src.columns.str.strip()

In [11]:
main_series = series_src.query("area_code == '0000' and end_year == 2023").copy()

In [12]:
main_series["series_id"] = main_series["series_id"].str.strip()

In [13]:
main_series["series_title"] = main_series["series_title"].str.replace(
    " in U.S. city average, average price, not seasonally adjusted", "", regex=False
)

In [14]:
main_series["measure"] = "U.S. city average, average price, not seasonally adjusted"

In [15]:
main_series = main_series.drop(
    ["footnote_codes", "area_code", "item_code", "begin_period", "end_period"], axis=1
)

In [16]:
main_series.head()

Unnamed: 0,series_id,series_title,begin_year,end_year,measure
0,APU0000701111,"Flour, white, all purpose, per lb. (453.6 gm)",1980,2023,"U.S. city average, average price, not seasonally adjusted"
2,APU0000701312,"Rice, white, long grain, uncooked, per lb. (453.6 gm)",1980,2023,"U.S. city average, average price, not seasonally adjusted"
4,APU0000701322,"Spaghetti and macaroni, per lb. (453.6 gm)",1984,2023,"U.S. city average, average price, not seasonally adjusted"
5,APU0000702111,"Bread, white, pan, per lb. (453.6 gm)",1980,2023,"U.S. city average, average price, not seasonally adjusted"
8,APU0000702212,"Bread, whole wheat, pan, per lb. (453.6 gm)",1980,2023,"U.S. city average, average price, not seasonally adjusted"


In [18]:
main_series.query('series_title.str.contains("Egg")')

Unnamed: 0,series_id,series_title,begin_year,end_year,measure
55,APU0000708111,"Eggs, grade A, large, per doz.",1980,2023,"U.S. city average, average price, not seasonally adjusted"


---

In [None]:
src_df = pd.read_json("https://ix.cnn.io/data/inflation/latest.json")

In [None]:
items = list(src_df.commodity.unique())
items

In [None]:
january = src_df.query('period == "M01"').copy()

In [None]:
recent_january = january.query("year == 2020 or year == 2023")

In [None]:
alt.Chart(recent_january).mark_line().encode(
    x="year:O",
    y=alt.Y("value", axis=alt.Axis(format="$")),
    color=alt.Color("commodity", scale=alt.Scale(scheme="category20")),
    # facet=alt.Facet("commodity", columns=4),
).properties(width=200, height=600)

In [None]:
recent_january_pivot = recent_january.pivot_table(
    columns="year", values="value", index="commodity"
).reset_index()

In [None]:
recent_january_pivot.columns = recent_january_pivot.columns.astype(str)

In [None]:
recent_january_pivot["pct_change"] = (
    (
        (recent_january_pivot["2023"] - recent_january_pivot["2020"])
        / recent_january_pivot["2020"]
    )
    * 100
).round(2)

In [None]:
recent_january_pivot.sort_values("pct_change", ascending=False)

In [None]:
recent_january_pivot["commodity"] = recent_january_pivot["commodity"].str.replace(
    "_", " ", regex=False
)

In [None]:
recent_january_pivot.to_csv("data/processed/recent_january_pivot.csv", index=False)