# BLS: Employment situation

#### Import Python tools

In [1]:
import pandas as pd

In [2]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

In [3]:
month = pd.Timestamp("today").strftime("%Y-%m")

---

In [4]:
df = (
    pd.read_xml(
        "https://www.bls.gov/feed/empsit.rss", parse_dates=["published", "updated"]
    )
    .drop(list(range(0, 8)))[
        [
            "title",
            "published",
            "updated",
            "content",
        ]
    ]
    .reset_index(drop=True)
)

In [5]:
df["rate"] = df["title"].str[-4:].str.replace("%", "", regex=False).astype(float)

In [6]:
df[["employ_desc", "rate_desc"]] = df["title"].str.split(";", expand=True)

In [7]:
df[["employ_desc", "month"]] = df["employ_desc"].str.split(" in ", expand=True)

In [8]:
df["employ_total"] = (
    df["employ_desc"]
    .str.replace("Payroll employment increases by ", "")
    .str.replace("Payroll employment rises by ", "")
    .str.replace(",", "", regex=False)
).astype(int)

In [9]:
m = {'December':'12',
 'November':'11',
 'October':'10',
 'September':'09',
 'August':'08',
 'July':'07',
 'June':'06',
 'May':'05',
 'April':'04',
 'March':'03',
 'February':'02',
 'January':'01'}

In [10]:
df["date"] = pd.to_datetime(df["published"]).dt.date
df["year"] = pd.to_datetime(df["published"]).dt.year
df["month_num"] = df['month'].map(m)
df.loc[(df.month_num == '12'),'year']=df.year - 1
df['month_year'] = df["year"].astype(str) + "-" + df["month_num"]

In [11]:
df.head()

Unnamed: 0,title,published,updated,content,rate,employ_desc,rate_desc,month,employ_total,date,year,month_num,month_year
0,"Payroll employment increases by 223,000 in Dec...",2023-01-06T08:30:00Z,2023-01-06T08:30:00Z,Total nonfarm payroll employment increased by ...,3.5,"Payroll employment increases by 223,000",unemployment rate edges down to 3.5%,December,223000,2023-01-06,2022,12,2022-12
1,"Payroll employment increases by 263,000 in Nov...",2022-12-02T08:30:00Z,2022-12-02T08:30:00Z,Total nonfarm payroll employment increased by ...,3.7,"Payroll employment increases by 263,000",unemployment rate unchanged at 3.7%,November,263000,2022-12-02,2022,11,2022-11
2,"Payroll employment increases by 261,000 in Oct...",2022-11-04T08:30:00Z,2022-11-04T08:30:00Z,Total nonfarm payroll employment increased by ...,3.7,"Payroll employment increases by 261,000",unemployment rate rises to 3.7%,October,261000,2022-11-04,2022,10,2022-10
3,"Payroll employment increases by 263,000 in Sep...",2022-10-07T08:30:00Z,2022-10-07T08:30:00Z,Total nonfarm payroll employment increased by ...,3.5,"Payroll employment increases by 263,000",unemployment rate edges down to 3.5%,September,263000,2022-10-07,2022,9,2022-09
4,"Payroll employment increases by 315,000 in Aug...",2022-09-02T08:30:00Z,2022-09-02T08:30:00Z,Total nonfarm payroll employment increased by ...,3.7,"Payroll employment increases by 315,000",unemployment rate rises to 3.7%,August,315000,2022-09-02,2022,8,2022-08


In [12]:
df["notable"] = (
    df["content"]
    .str.split("percent.", expand=True)[1]
    .str.replace(", and", " and", regex=False)
)

In [13]:
for x in ["notable", "employ_desc", "rate_desc"]:
    df[x] = df[x].str.strip().str.capitalize()

In [14]:
df[
    [
        "date",
        "month",
        "employ_total",
        "rate",
        "notable",
        "employ_desc",
        "rate_desc",
    ]
].to_json(
    "data/processed/monthly_employment_situation_last_year.json",
    indent=4,
    orient="records",
)

In [15]:
slim = df[
    [
        "date",
        "month",
        "month_num",
        "employ_total",
        "rate",
        "notable",
        "employ_desc",
        "rate_desc",
    ]
].copy()

In [16]:
slim.head()

Unnamed: 0,date,month,month_num,employ_total,rate,notable,employ_desc,rate_desc
0,2023-01-06,December,12,223000,3.5,Notable job gains occurred in leisure and hosp...,"Payroll employment increases by 223,000",Unemployment rate edges down to 3.5%
1,2022-12-02,November,11,263000,3.7,Notable job gains occurred in leisure and hosp...,"Payroll employment increases by 263,000",Unemployment rate unchanged at 3.7%
2,2022-11-04,October,10,261000,3.7,"Notable job gains occurred in health care, pro...","Payroll employment increases by 261,000",Unemployment rate rises to 3.7%
3,2022-10-07,September,9,263000,3.5,Notable job gains occurred in leisure and hosp...,"Payroll employment increases by 263,000",Unemployment rate edges down to 3.5%
4,2022-09-02,August,8,315000,3.7,Notable job gains occurred in professional and...,"Payroll employment increases by 315,000",Unemployment rate rises to 3.7%


In [17]:
slim.to_json(
    f"data/processed/monthly_employment_situation_last_year_{month}.json",
    indent=4,
    orient="records",
)