## Unemployment Data by month for 2020 - 2023, Draft 001
- Data is sourced from the Bureau of Labor Statistics (BLS) API
- Inlcudes unemployment rate for CA, FL, NY, TX, WA and National.

In [2]:
# Import dependencies
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
from config import api_key

In [3]:
# Set base URL
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

In [4]:
# Entering unemployment rates by states into dictionary so they can remain easily identifiable, and to also be used
# in the DataFrame to give names to columns
series_dict = {
    'LASST060000000000003': 'California',
    'LASST120000000000003': 'Florida',
    'LASST480000000000003': 'Texas',
    'LASST530000000000003': 'Washington',
    'LASST360000000000003': 'New York',
    'LNS14000000':'National'
}

# Follow specific format in BLS API format, requiring establishment of headers
headers = {'Content-type': 'application/json'}

In [5]:
# Parameters dictionary
data = json.dumps({
    'seriesid':list(series_dict.keys()), #retrieves list of the dictionary keys from series_dict
    'startyear':2015,
    'endyear':2023,
    'registrationkey':api_key})

In [6]:
# Use requests.post to submit the request to BLS API
bls = requests.post(f"{url}?",headers=headers,data=data).json()['Results']['series'] 

In [7]:
# pretty print to get a clearer look at the json
print(json.dumps(bls, indent=4, sort_keys=True))

[
    {
        "data": [
            {
                "footnotes": [
                    {}
                ],
                "period": "M12",
                "periodName": "December",
                "value": "5.1",
                "year": "2023"
            },
            {
                "footnotes": [
                    {}
                ],
                "period": "M11",
                "periodName": "November",
                "value": "5.1",
                "year": "2023"
            },
            {
                "footnotes": [
                    {}
                ],
                "period": "M10",
                "periodName": "October",
                "value": "5.1",
                "year": "2023"
            },
            {
                "footnotes": [
                    {}
                ],
                "period": "M09",
                "periodName": "September",
                "value": "5.0",
                "year": "2023"
            },
            {


In [8]:
# define function to collect all unemployment rates for 2020-2023 (stored in 'value' key)
def BLS_Unemployment(name):
    data = [] # define list to store data
    for x in range(len(bls)): # first loop through every element in bls
        if name == bls[x]['seriesID']: # if name = seriesID, then
            for y in range(len(bls[x]['data'])): # loop through the data key in that bls element
                data.append(float(bls[x]['data'][y]['value'])) # append data to list
    return data

In [9]:
# run function on each state
california = BLS_Unemployment('LASST060000000000003')
florida = BLS_Unemployment('LASST120000000000003')
texas = BLS_Unemployment('LASST480000000000003')
washington = BLS_Unemployment('LASST530000000000003')
new_york = BLS_Unemployment('LASST360000000000003')
national = BLS_Unemployment('LNS14000000')

# create list of months using list comprehension
month_string = [f"{x['year']}-{x['period'][1:]}-01" for x in bls[0]['data']]

In [10]:
# create DataFrame
unemployment_df = pd.DataFrame({
    "Month":pd.to_datetime(month_string), # cast string into datetime
    "California":california,
    "Florida":florida,
    "Texas":texas,
    "Washington":washington,
    "New York":new_york,
    "National":national
})

# Sort by ascending date, reset index
unemployment_df = (unemployment_df.sort_values("Month",ascending=True)).reset_index(drop=True)

# Display preview
unemployment_df.head()

Unnamed: 0,Month,California,Florida,Texas,Washington,New York,National
0,2015-01-01,6.8,5.9,4.6,5.6,5.7,5.7
1,2015-02-01,6.7,5.8,4.5,5.5,5.6,5.5
2,2015-03-01,6.6,5.8,4.5,5.5,5.6,5.4
3,2015-04-01,6.5,5.7,4.5,5.4,5.5,5.4
4,2015-05-01,6.4,5.7,4.4,5.4,5.4,5.6


In [11]:
# Export unemployment_df into a csv
unemployment_df.to_csv("output/unemployment_rates_2020-2023.csv",index=False)