# Get, plot CPI data from the [BLS API](https://www.bls.gov/developers/)

#### Load Python tools and environment variables

In [1]:
%load_ext lab_black

In [2]:
import os
import json
import warnings
import requests
import pandas as pd
import altair as alt
import altair_cnn as altcnn
from datawrapper import Datawrapper

In [3]:
dw_token = os.environ.get("dw_api")
dw = Datawrapper(access_token=dw_token)

In [4]:
alt.themes.register("cnn", altcnn.theme)
alt.themes.enable("cnn")

ThemeRegistry.enable('cnn')

In [5]:
today = pd.Timestamp("today").strftime("%Y_%m_%d")

In [6]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
warnings.simplefilter("ignore")

In [7]:
api_key = os.getenv("BLS_API")

---

## Historical data

#### The API will only deliver limited years, so [import historical](https://data.bls.gov/timeseries/CUUR0000SA0), too

In [8]:
historical_src = (
    pd.read_excel(
        "data/raw/SeriesReport-20240212182933_65134c.xlsx",
        skiprows=11,
        dtype={"Year": str},
    )
    .query('~Period.str.startswith("S")')
    .rename(columns={"Series ID": "variable"})
)

In [9]:
historical_src.columns = historical_src.columns.str.lower().str.replace(" ", "_")

In [10]:
historical_src.tail()

Unnamed: 0,variable,year,period,value
1081,CUUR0000SA0,2023,M08,307.026
1082,CUUR0000SA0,2023,M09,307.789
1083,CUUR0000SA0,2023,M10,307.671
1084,CUUR0000SA0,2023,M11,307.051
1085,CUUR0000SA0,2023,M12,306.746


---

## Current data

In [11]:
variable = "CUUR0000SA0"

#### Start and end dates for request

In [12]:
start = "2022"
end = "2024"

#### Request params

In [13]:
headers = {"Content-type": "application/json"}

data = json.dumps(
    {
        # This is the CPI variable
        "seriesid": [variable],
        "startyear": start,
        "endyear": end,
        "registrationkey": api_key,
    }
)

In [14]:
variable

'CUUR0000SA0'

#### Get data from API

In [15]:
p = requests.post(
    "https://api.bls.gov/publicAPI/v2/timeseries/data/", data=data, headers=headers
)
json_data = json.loads(p.text)

#### Into a dataframe

In [16]:
src = pd.DataFrame(json_data["Results"]["series"][0]["data"])[
    ["year", "period", "periodName", "value"]
].rename(columns={"periodName": "month"})

In [17]:
src["variable"] = "CUUR0000SA0"

#### Concatenate the historical data with the most recent data

In [18]:
src_df = (
    pd.concat([src, historical_src])
    .sort_values(["year", "period"])
    .reset_index(drop=True)
    .drop_duplicates(subset=["year", "period"])
)

#### Clean up dates

In [19]:
def process_bls_data(df):
    df["date"] = pd.to_datetime(df["year"] + " " + str(df["period"].replace("M", "")))
    df["year"] = df["date"].year
    df["month"] = df["date"].month_name()
    df["month_year"] = pd.to_datetime(df["date"]).to_period("M")
    df = df[["date", "value", "month", "month_year", "variable"]]
    df.rename({"variable": "series"}, inplace=True)
    return df

In [20]:
df = src_df.apply(process_bls_data, axis=1)

#### The result:

In [21]:
df.tail()

Unnamed: 0,date,value,month,month_year,series
1026,2023-10-01,307.671,October,2023-10,CUUR0000SA0
1028,2023-11-01,307.051,November,2023-11,CUUR0000SA0
1030,2023-12-01,306.746,December,2023-12,CUUR0000SA0
1032,2024-01-01,308.417,January,2024-01,CUUR0000SA0
1033,2024-02-01,310.326,February,2024-02,CUUR0000SA0


#### Calculate 12-month change

In [22]:
df["change"] = (df.value.astype(float).pct_change(12) * 100).round(1)

---

## Recessions

#### Snag date ranges and metadata from wikipedia

In [23]:
recessions_src = pd.read_csv(
    "https://bit.ly/recession-dates", parse_dates=["start", "end"]
)

In [24]:
recessions_src["label"] = "Recessions"

In [25]:
recessions_src.tail()

Unnamed: 0,start,end,label
29,1981-07-01,1982-11-01,Recessions
30,1990-07-01,1991-03-01,Recessions
31,2001-03-01,2001-11-01,Recessions
32,2007-12-01,2009-06-01,Recessions
33,2020-02-01,2020-04-01,Recessions


---

## Chart

#### Annotated line chart with trend (and recessions) since 2000

In [26]:
df["month_year"] = df["month_year"].astype(str)

In [27]:
chart_df = df.query('date > "2000-01-01"')
chart_df["text"] = "Historical mean: " + (str(chart_df["change"].mean().round(1))) + "%"

In [28]:
# Measure trend line
line = (
    alt.Chart(chart_df)
    .mark_line(size=1.5)
    .encode(
        x=alt.X("date:T", axis=alt.Axis(tickCount=7), title=""),
        y=alt.Y("change:Q", axis=alt.Axis(tickCount=7), title=""),
    )
)

# Average line
rule = (
    alt.Chart(chart_df).mark_rule(color="#a3a3a3", size=0.5).encode(y="mean(change):Q")
)

# Anno for average value on line
text = rule.mark_text(
    align="left", baseline="middle", dy=-10, dx=40, color="#262626"
).encode(text="text")

# Display recessions
rect = (
    alt.Chart(recessions_src.query('start > "2000-01-01"'))
    .mark_rect(color="#f9f9f9")
    .encode(
        x="start:T",
        x2="end:T",
    )
)

recessions_text = rect.mark_text(
    align="left", baseline="middle", color="#262626"
).encode(text="label")

# Display final value on line
label = (
    alt.Chart(chart_df.query("date == date.max()"))
    .mark_text(dy=-12, dx=10, color="#262626")
    .encode(
        x=alt.X("date", title=" ", axis=alt.Axis(tickCount=7, format="%Y")),
        y=alt.Y("change", axis=alt.Axis(tickCount=6), title=""),
        text=alt.Text("change:Q"),
    )
)

# Display line symbol at the final value
points = (
    alt.Chart(chart_df.query("date == date.max()"))
    .mark_circle(color="#7C4EA5", size=40)
    .encode(
        x=alt.X("date", title="", axis=alt.Axis(tickCount=7, format="%Y")),
        y=alt.Y("change"),
    )
)

# Render the chart
(rect + rule + line + text + label + points).properties(
    width=800,
    title="CPI for all urban consumers, 12-month percent change",
)

In [35]:
# Render the chart for mobile
(rect + line + label + points).properties(
    width=320,
    title="Tracking US annual inflation",
)

---

## Exports

#### CSV

In [30]:
df.to_csv(f"data/processed/cpi_12_month_change_latest.csv", index=False)
df.to_csv(f"data/processed/cpi_12_month_change_latest_{today}.csv", index=False)

#### Out to Datawrapper

In [31]:
# df_dw = df.query('date > "2000-01-01"').reset_index(drop=True)[["date", "change"]]
# dw.add_data(chart_id="exBq3", data=df_dw)

<Response [204]>

In [32]:
round(df_dw["change"].mean(), 2)

2.57