# US Bureau of Labor Statistics: Employment situation

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [4]:
month = pd.Timestamp("today").strftime("%Y-%m")

---

## Read data

#### Import table from the bureau's [CES page](https://www.bls.gov/web/empsit/ceseesummary.htm)

In [5]:
src = (
    pd.read_html("https://www.bls.gov/web/empsit/ceseesummary.htm")[0].drop(0).head(23)
)

In [6]:
src.columns = src.columns.str.replace(".", "", regex=False).str.replace(
    "(p)", "", regex=False
)

In [7]:
src["Category"] = src["Category"].str.replace("(1)", "", regex=False)

In [8]:
df = src.iloc[2:].copy()

---

#### Just the topline `total nonfarm` category timeseries

In [9]:
total_nonfarm = pd.melt(
    src[src["Category"] == "Total nonfarm"].copy(),
    id_vars="Category",
    value_vars=df.columns[-13:].to_list(),
).rename(columns={"Category": "category", "variable": "month", "value": "jobs"})

In [10]:
total_nonfarm["date"] = pd.to_datetime(total_nonfarm["month"])

In [11]:
total_nonfarm

Unnamed: 0,category,month,jobs,date
0,Total nonfarm,Jan 2022,364,2022-01-01
1,Total nonfarm,Feb 2022,904,2022-02-01
2,Total nonfarm,Mar 2022,414,2022-03-01
3,Total nonfarm,Apr 2022,254,2022-04-01
4,Total nonfarm,May 2022,364,2022-05-01
5,Total nonfarm,June 2022,370,2022-06-01
6,Total nonfarm,July 2022,568,2022-07-01
7,Total nonfarm,Aug 2022,352,2022-08-01
8,Total nonfarm,Sept 2022,350,2022-09-01
9,Total nonfarm,Oct 2022,324,2022-10-01


---

#### Melt the dataframe so it's long

In [12]:
df_melted = pd.melt(
    df, id_vars="Category", value_vars=df.columns[-13:].to_list()
).rename(columns={"Category": "category", "variable": "month", "value": "jobs"})

In [13]:
df_melted["jobs"] = df_melted["jobs"].astype(float)

In [14]:
df_melted["date"] = pd.to_datetime(df_melted["month"])

In [15]:
df_melted

Unnamed: 0,category,month,jobs,date
0,Goods-producing,Jan 2022,28.0,2022-01-01
1,Mining and logging,Jan 2022,-1.0,2022-01-01
2,Construction,Jan 2022,-4.0,2022-01-01
3,Manufacturing,Jan 2022,33.0,2022-01-01
4,Durable goods,Jan 2022,21.0,2022-01-01
5,Motor vehicles and parts,Jan 2022,0.3,2022-01-01
6,Nondurable goods,Jan 2022,12.0,2022-01-01
7,Private service-providing,Jan 2022,317.0,2022-01-01
8,Wholesale trade,Jan 2022,23.8,2022-01-01
9,Retail trade,Jan 2022,30.1,2022-01-01


---

#### Just the individual industries

In [26]:
industries = [
    "Mining and logging",
    "Construction",
    "Manufacturing",
    "Wholesale trade",
    "Retail trade",
    "Transportation and warehousing",
    "Utilities",
    "Information",
    "Financial activities",
    "Professional and business services",
    "Private education and health services",
    "Leisure and hospitality",
    "Other services",
    "Government",
]

In [27]:
industries_df = df_melted[df_melted["category"].isin(industries)].reset_index(drop=True)

In [29]:
industries_df

Unnamed: 0,category,month,jobs,date
0,Mining and logging,Jan 2022,-1.0,2022-01-01
1,Construction,Jan 2022,-4.0,2022-01-01
2,Manufacturing,Jan 2022,33.0,2022-01-01
3,Wholesale trade,Jan 2022,23.8,2022-01-01
4,Retail trade,Jan 2022,30.1,2022-01-01
5,Transportation and warehousing,Jan 2022,44.8,2022-01-01
6,Utilities,Jan 2022,2.1,2022-01-01
7,Information,Jan 2022,12.0,2022-01-01
8,Financial activities,Jan 2022,6.0,2022-01-01
9,Professional and business services,Jan 2022,95.0,2022-01-01


---

#### Just the topline industry categories

In [19]:
topline_categories = ["Goods-producing", "Private service-providing", "Government"]

In [20]:
toplines_df = df_melted[df_melted["category"].isin(topline_categories)].reset_index(
    drop=True
)

In [21]:
toplines_df.head()

Unnamed: 0,category,month,jobs,date
0,Goods-producing,Jan 2022,28.0,2022-01-01
1,Private service-providing,Jan 2022,317.0,2022-01-01
2,Government,Jan 2022,19.0,2022-01-01
3,Goods-producing,Feb 2022,121.0,2022-02-01
4,Private service-providing,Feb 2022,776.0,2022-02-01


--- 

## Export

In [22]:
toplines_df["date"] = toplines_df["date"].astype(str)
industries_df["date"] = industries_df["date"].astype(str)
total_nonfarm["date"] = total_nonfarm["date"].astype(str)

In [23]:
toplines_df.to_json(
    f"data/processed/monthly_employment_situation_by_industry_toplines.json",
    indent=4,
    orient="records",
)

In [24]:
industries_df.to_json(
    f"data/processed/monthly_employment_situation_by_industries.json",
    indent=4,
    orient="records",
)

In [25]:
total_nonfarm.to_json(
    f"data/processed/monthly_employment_situation_total_nonfarm_latest.json",
    indent=4,
    orient="records",
)