# Get, plot unemployment data from the [BLS API](https://www.bls.gov/developers/)

#### Load python tools

In [1]:
%load_ext lab_black

In [2]:
import requests
import json
import pandas as pd
from credentials import api_key
import altair as alt
import altair_stiles as altstiles
import glob
from pathlib import Path
import numpy as np

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Historical data

#### The API will only deliver 20 years, so import historical, too

In [5]:
# (Seas) Unemployment Rate
# Unemployment rate
# Percent or rate
# 16 years and over
# 1948 to 2022

In [6]:
historical_src = pd.read_csv(
    "data/raw/bls_unemployment_rate_LNS14000000_historical.csv", dtype={"year": str}
).melt(
    value_vars=[
        "Jan",
        "Feb",
        "Mar",
        "Apr",
        "May",
        "Jun",
        "Jul",
        "Aug",
        "Sep",
        "Oct",
        "Nov",
        "Dec",
    ],
    id_vars=["year"],
    var_name="month",
)

In [7]:
historical_src.columns

Index(['year', 'month', 'value'], dtype='object')

In [8]:
historical_src["date"] = pd.to_datetime(
    historical_src["year"] + " " + historical_src["month"]
)
historical_src["year"] = historical_src["date"].dt.year
historical_src["month"] = historical_src["date"].dt.month_name()
historical_src["month_year"] = pd.to_datetime(historical_src["date"]).dt.to_period("M")

In [9]:
historical_src["variable"] = "unemployment rate"
historical_src["unit"] = "rate"

In [10]:
historical_df = (
    historical_src.dropna(subset=["value"]).sort_values("date", ascending=False).copy()
)

In [11]:
historical_df.head(12)

Unnamed: 0,year,month,value,date,month_year,variable,unit
224,2022,March,3.6,2022-03-01,2022-03,unemployment rate,rate
149,2022,February,3.8,2022-02-01,2022-02,unemployment rate,rate
74,2022,January,4.0,2022-01-01,2022-01,unemployment rate,rate
898,2021,December,3.9,2021-12-01,2021-12,unemployment rate,rate
823,2021,November,4.2,2021-11-01,2021-11,unemployment rate,rate
748,2021,October,4.6,2021-10-01,2021-10,unemployment rate,rate
673,2021,September,4.7,2021-09-01,2021-09,unemployment rate,rate
598,2021,August,5.2,2021-08-01,2021-08,unemployment rate,rate
523,2021,July,5.4,2021-07-01,2021-07,unemployment rate,rate
448,2021,June,5.9,2021-06-01,2021-06,unemployment rate,rate


---

## Current data via API

#### Which BLS series are we retrieving?

In [12]:
# These are all seaonally adjusted

series_lookup = {
    "LNS11000000": "Civilian Labor Force",
    "LNS12000000": "Civilian Employment",
    "LNS13000000": "Civilian Unemployment",
    "LNS14000000": "Unemployment Rate",
}

In [13]:
startyear = "2003"
endyear = "2022"

#### Request params

In [14]:
headers = {"Content-type": "application/json"}
data = json.dumps(
    {
        "seriesid": ["LNS11000000", "LNS12000000", "LNS13000000", "LNS14000000"],
        "startyear": startyear,
        "endyear": endyear,
        "registrationkey": api_key,
    }
)

#### Get data from API

In [15]:
p = requests.post(
    "https://api.bls.gov/publicAPI/v2/timeseries/data/", data=data, headers=headers
)
json_data = json.loads(p.text)

#### Put the results in a list of dataframes and assign the series code as a reference column

In [16]:
dataframes = []

for series in json_data["Results"]["series"]:
    dataframes.append(pd.DataFrame(series["data"]).assign(variable=series["seriesID"]))

#### One long dataframe with all the series

In [17]:
src_df = pd.concat(dataframes).drop(["footnotes", "latest", "period"], axis=1)

----

## Clean up API data

#### Process dates, slim dataframe and rename columns

In [18]:
def process_bls_data(df):
    df["date"] = pd.to_datetime(df["year"] + " " + df["periodName"])
    df["year"] = df["date"].year
    df["month"] = df["date"].month_name()
    df["month_year"] = pd.to_datetime(df["date"]).to_period("M")
    df = df[["date", "value", "month", "month_year", "variable"]]
    df.rename({"variable": "series"}, inplace=True)
    return df


src_df = src_df.apply(process_bls_data, axis=1)

#### Map the series codes to their names in the dataframe

In [19]:
src_df["series_name"] = src_df["series"].map(series_lookup).str.lower()

#### Set unit definition for each series

In [20]:
def series_units(df):
    if df["series_name"] == "unemployment rate":
        return "rate"
    else:
        return "number in thousands"


src_df["unit"] = src_df.apply(series_units, axis=1)

#### Make a copy of the dataframe

In [21]:
df = src_df.copy()

---

## Isolate just unemployment

In [22]:
unemployment_rate = df[df["series_name"] == "unemployment rate"][["date", "value"]]

In [23]:
unemployment_rate["value"] = unemployment_rate["value"].astype(float)

#### Limit historical dataframe to what the API doesn't provide

In [24]:
historical_unemployment_rate = historical_df[
    historical_df["date"] < unemployment_rate.date.min()
][["date", "value"]]

#### Concatenate the two dataframes

In [25]:
unemploy_rate_all = pd.concat(
    [historical_unemployment_rate, unemployment_rate]
).sort_values("date", ascending=False)

In [26]:
unemploy_rate_all["text"] = "Historical mean: " + (
    str(unemploy_rate_all["value"].mean().round(2))
)

---

## Recessions

#### Snag date ranges and metadata from wikipedia

In [27]:
wiki_src = pd.read_html(
    "https://en.wikipedia.org/wiki/List_of_recessions_in_the_United_States"
)[2]

In [28]:
wiki_src.columns = (
    wiki_src.columns.str.lower()
    .str.replace(" ", "_")
    .str.replace("_(peak_to_trough)", "", regex=False)
)

In [29]:
wiki_slim = wiki_src[["name", "period_range", "gdp_decline"]].copy()

In [30]:
wiki_slim[["start", "end"]] = (
    wiki_slim["period_range"].str.split("[").str[0].str.split("–", expand=True)
)

In [31]:
wiki_slim["gdp_decline"] = (
    wiki_slim["gdp_decline"]
    .str.split("[")
    .str[0]
    .str.replace("%", "", regex=False)
    .str.replace("−", "", regex=False)
    .astype(float)
) * -1

In [32]:
wiki_slim.drop(["period_range"], axis=1, inplace=True)

In [33]:
wiki_slim["start"] = pd.to_datetime(wiki_slim["start"])
wiki_slim["end"] = pd.to_datetime(wiki_slim["end"])

In [34]:
wiki_slim["start_year"] = wiki_slim["start"].dt.year

In [35]:
wiki_slim["type"] = "Recessions"

In [36]:
wiki_slim["gdp_decline_pos"] = wiki_slim["gdp_decline"] * -1

In [37]:
wiki_slim_labels = wiki_slim[wiki_slim["name"].str.contains("Great Recession|COVID")]

In [38]:
wiki_slim_labels

Unnamed: 0,name,gdp_decline,start,end,start_year,type,gdp_decline_pos
13,Great Recession,-5.1,2007-12-01,2009-06-01,2007,Recessions,5.1
14,COVID-19 recession,-19.2,2020-02-01,2020-04-01,2020,Recessions,19.2


---

## Chart

In [39]:
line = (
    alt.Chart(unemploy_rate_all)
    .mark_line(size=2)
    .encode(
        x=alt.X("date:T", axis=alt.Axis(tickCount=7), title=""),
        y=alt.Y("value:Q", axis=alt.Axis(tickCount=7), title=" "),
    )
)

rule = alt.Chart(unemploy_rate_all).mark_rule(color="black").encode(y="mean(value):Q")

text = rule.mark_text(align="left", baseline="middle", dy=10, dx=-70).encode(
    text="text"
)

rect = (
    alt.Chart(wiki_slim[4:])
    .mark_rect(color="#e6e6e6")
    .encode(
        x="start:T",
        x2="end:T",
    )
)

rect_label = (
    alt.Chart(wiki_slim_labels)
    .mark_text(dy=-200, color="black")
    .encode(
        x=alt.X("start"),
        text=alt.Text("name"),
    )
)

label = (
    alt.Chart(unemploy_rate_all.query("date == date.max()"))
    .mark_text(dy=15, color="black")
    .encode(
        x=alt.X("date", title="", axis=alt.Axis(tickCount=7, format="%Y")),
        y=alt.Y("value", axis=alt.Axis(tickCount=6), title=" "),
        text=alt.Text("value:Q"),
    )
)

points = (
    alt.Chart(unemploy_rate_all.query("date == date.max()"))
    .mark_point(color="black")
    .encode(
        x=alt.X("date", title="", axis=alt.Axis(tickCount=7, format="%Y")),
        y=alt.Y("value"),
    )
)

(rect + line + rule + text + label + points + rect_label).properties(
    width=900,
    title="U.S. unemployment rate (%), seasonally adjusted, with historical recessions",
)

---

## Export

In [40]:
for frame in df["series_name"].unique():
    df.to_csv(
        f'data/processed/bls_{frame.replace(" ", "_")}_{startyear}_{endyear}.csv',
        index=False,
    )

In [41]:
unemployment_rate.to_csv(f"data/processed/bls_unemployment_rate.csv", index=False)