# Get, plot unemployment data from the [BLS API](https://www.bls.gov/developers/)

#### Load python tools

In [1]:
%load_ext lab_black

In [2]:
import requests
import json
import pandas as pd
import glob
from pathlib import Path
import numpy as np
import os

In [3]:
import altair as alt
import altair_stiles as altstiles

alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('stiles')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [5]:
api_key = os.getenv("BLS_API")

---

## Current data via API

#### Which BLS series are we retrieving?

In [6]:
start = "2015"
end = "2023"

#### Request params

In [7]:
headers = {"Content-type": "application/json"}

data = json.dumps(
    {
        "seriesid": ["CUUR0000SA0"],
        "startyear": start,
        "endyear": end,
        "registrationkey": api_key,
    }
)

#### Get data from API

In [8]:
p = requests.post(
    "https://api.bls.gov/publicAPI/v2/timeseries/data/", data=data, headers=headers
)
json_data = json.loads(p.text)

#### Put the results in a list of dataframes and assign the series code as a reference column

In [9]:
dataframes = []

for series in json_data["Results"]["series"]:
    dataframes.append(pd.DataFrame(series["data"]).assign(variable=series["seriesID"]))

#### One long dataframe with all the series

In [10]:
src_df = pd.concat(dataframes).drop(["footnotes"], axis=1)

In [14]:
src_df.head()

Unnamed: 0,date,value,month,month_year,series
0,2023-11-01,307.051,November,2023-11,CUUR0000SA0
1,2023-10-01,307.671,October,2023-10,CUUR0000SA0
2,2023-09-01,307.789,September,2023-09,CUUR0000SA0
3,2023-08-01,307.026,August,2023-08,CUUR0000SA0
4,2023-07-01,305.691,July,2023-07,CUUR0000SA0


----

## Clean up API data

#### Process dates, slim dataframe and rename columns

In [12]:
def process_bls_data(df):
    df["date"] = pd.to_datetime(df["year"] + " " + df["periodName"])
    df["year"] = df["date"].year
    df["month"] = df["date"].month_name()
    df["month_year"] = pd.to_datetime(df["date"]).to_period("M")
    df = df[["date", "value", "month", "month_year", "variable"]]
    df.rename({"variable": "series"}, inplace=True)
    return df


src_df = src_df.apply(process_bls_data, axis=1)

#### Map the series codes to their names in the dataframe

In [13]:
src_df["series_name"] = src_df["series"].map(series_lookup).str.lower()

NameError: name 'series_lookup' is not defined

#### Set unit definition for each series

In [None]:
def series_units(df):
    if df["series_name"] == "unemployment rate":
        return "rate"
    else:
        return "number in thousands"


src_df["unit"] = src_df.apply(series_units, axis=1)

#### Make a copy of the dataframe

In [None]:
df = src_df.copy()

---

## Isolate just unemployment

In [None]:
unemployment_rate = df[df["series_name"] == "unemployment rate"][["date", "value"]]

In [None]:
unemployment_rate["value"] = unemployment_rate["value"].astype(float)

#### Limit historical dataframe to what the API doesn't provide

In [None]:
historical_unemployment_rate = historical_df[
    historical_df["date"] < unemployment_rate.date.min()
][["date", "value"]]

#### Concatenate the two dataframes

In [None]:
unemploy_rate_all = pd.concat(
    [historical_unemployment_rate, unemployment_rate]
).sort_values("date", ascending=False)

In [None]:
unemploy_rate_all["text"] = "Historical mean: " + (
    str(unemploy_rate_all["value"].mean().round(2))
)

---

## Recessions

#### Snag date ranges and metadata from wikipedia

In [None]:
wiki_src = pd.read_html(
    "https://en.wikipedia.org/wiki/List_of_recessions_in_the_United_States"
)[2]

In [None]:
wiki_src.columns = (
    wiki_src.columns.str.lower()
    .str.replace(" ", "_")
    .str.replace("_(peak_to_trough)", "", regex=False)
)

In [None]:
wiki_slim = wiki_src[["name", "period_range", "gdp_decline"]].copy()

In [None]:
wiki_slim[["start", "end"]] = (
    wiki_slim["period_range"].str.split("[").str[0].str.split("–", expand=True)
)

In [None]:
wiki_slim["gdp_decline"] = (
    wiki_slim["gdp_decline"]
    .str.split("[")
    .str[0]
    .str.replace("%", "", regex=False)
    .str.replace("−", "", regex=False)
    .astype(float)
) * -1

In [None]:
wiki_slim.drop(["period_range"], axis=1, inplace=True)

In [None]:
wiki_slim["start"] = pd.to_datetime(wiki_slim["start"])
wiki_slim["end"] = pd.to_datetime(wiki_slim["end"])

In [None]:
wiki_slim["start_year"] = wiki_slim["start"].dt.year

In [None]:
wiki_slim["type"] = "Recessions"

In [None]:
wiki_slim["gdp_decline_pos"] = wiki_slim["gdp_decline"] * -1

In [None]:
wiki_slim_labels = wiki_slim[wiki_slim["name"].str.contains("Great Recession|COVID")]

In [None]:
wiki_slim_labels

---

## Chart

In [None]:
line = (
    alt.Chart(unemploy_rate_all)
    .mark_line(size=2)
    .encode(
        x=alt.X("date:T", axis=alt.Axis(tickCount=7), title=""),
        y=alt.Y("value:Q", axis=alt.Axis(tickCount=7), title=" "),
    )
)

rule = alt.Chart(unemploy_rate_all).mark_rule(color="black").encode(y="mean(value):Q")

text = rule.mark_text(align="left", baseline="middle", dy=10, dx=-70).encode(
    text="text"
)

rect = (
    alt.Chart(wiki_slim[4:])
    .mark_rect(color="#e6e6e6")
    .encode(
        x="start:T",
        x2="end:T",
    )
)

rect_label = (
    alt.Chart(wiki_slim_labels)
    .mark_text(dy=-200, color="black")
    .encode(
        x=alt.X("start"),
        text=alt.Text("name"),
    )
)

label = (
    alt.Chart(unemploy_rate_all.query("date == date.max()"))
    .mark_text(dy=15, color="black")
    .encode(
        x=alt.X("date", title="", axis=alt.Axis(tickCount=7, format="%Y")),
        y=alt.Y("value", axis=alt.Axis(tickCount=6), title=" "),
        text=alt.Text("value:Q"),
    )
)

points = (
    alt.Chart(unemploy_rate_all.query("date == date.max()"))
    .mark_point(color="black")
    .encode(
        x=alt.X("date", title="", axis=alt.Axis(tickCount=7, format="%Y")),
        y=alt.Y("value"),
    )
)

(rect + line + rule + text + label + points + rect_label).properties(
    width=900,
    title="U.S. unemployment rate (%), seasonally adjusted, with historical recessions",
)

---

## Export

In [None]:
# for frame in df["series_name"].unique():
#     df.to_csv(
#         f'data/processed/bls_{frame.replace(" ", "_")}_{start}_{end}.csv',
#         index=False,
#     )

In [None]:
unemployment_rate.to_csv(f"data/processed/bls_unemployment_rate.csv", index=False)