###  The Bureau of Labor Statistics' (BLS) Public Data Application Programming Interface (API) 

More information at https://www.bls.gov/developers/


Using the Local Area Unemployment Statistics, the SeriesId can be defined as a combination of the following:
`'LA'+'U'+area_code+measure_code`
- `'LA'` = Local Area Unemployment Statistics
- `'U'` = Unadjusted
- area_code
 - EX:`'MT4812420000000'` = Austin-Round Rock, TX Metropolitan Statistical Area
- measure_code:
 - `'03'` = unemployment rate
 - `'04'` = unemployment
 - `'05'` = employment
 - `'06'` = labor force


Source: https://www.bls.gov/help/hlpforma.htm#LA

In [2]:
import requests
import json
import pandas as pd

### Austin Metro Area

In [4]:
headers = {'Content-type': 'application/json'}
data = json.dumps({"seriesid": ['LAUMT481242000000003', 
                                'LAUMT481242000000004',
                                'LAUMT481242000000005',
                                'LAUMT481242000000006'],
                   "startyear":"2011", "endyear":"2020"})
p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
json_data = json.loads(p.text)

for series in json_data['Results']['series']:
    df = pd.DataFrame()
    seriesID = series['seriesID']
    years = []
    periods = []
    values = []
    dates = []
    for val in series['data']:
        years.append(val["year"])
        periods.append(val["period"])
        values.append(val["value"])
        dates.append(val["period"][-2:]+"/"+val["year"])
    
    df['series_id'] = [seriesID for _ in range(len(series['data']))]
    df['date'] = dates
    df['year'] = years
    df['period'] = periods
    df['value'] = values

    df.to_csv('./'+ seriesID +'.csv', index=False)

In [5]:
## Combine Austin Metro Data

df_unemploymentrate = pd.read_csv('./LAUMT481242000000003.csv')
df_unemployment = pd.read_csv('./LAUMT481242000000004.csv')
df_employment = pd.read_csv('./LAUMT481242000000005.csv')
df_laborforce = pd.read_csv('./LAUMT481242000000006.csv')

df_all = pd.concat([df_unemploymentrate, df_unemployment, df_employment, df_laborforce])

In [6]:
## Set Identifying name for series

df_all['series_name'] = [value[-1] for value in df_all['series_id']]

series_dict = {'3': 'unemployment_rate',
               '4': 'unemployment',
               '5': 'employment',
               '6': 'labor_force'}

df_all['series_name'] = df_all['series_name'].map(series_dict)

In [7]:
df_all

Unnamed: 0,series_id,date,year,period,value,series_name
0,LAUMT481242000000003,07/2020,2020,M07,6.7,unemployment_rate
1,LAUMT481242000000003,06/2020,2020,M06,7.3,unemployment_rate
2,LAUMT481242000000003,05/2020,2020,M05,11.4,unemployment_rate
3,LAUMT481242000000003,04/2020,2020,M04,12.2,unemployment_rate
4,LAUMT481242000000003,03/2020,2020,M03,3.8,unemployment_rate
...,...,...,...,...,...,...
110,LAUMT481242000000006,05/2011,2011,M05,957600.0,labor_force
111,LAUMT481242000000006,04/2011,2011,M04,957449.0,labor_force
112,LAUMT481242000000006,03/2011,2011,M03,952924.0,labor_force
113,LAUMT481242000000006,02/2011,2011,M02,949603.0,labor_force


### All US Cities and Towns 

In [3]:
# Get the area_code for all US cities and towns
r = requests.get('https://download.bls.gov/pub/time.series/la/la.area')

area_data = [row.split('\t') for row in r.text.split('\n')]
area_header = area_data[0]
area_body = area_data[1:]

areas_ls = []
for row in area_body:
    a_dict= {}
    for col_num, val in enumerate(row):
        a_dict[area_header[col_num]] = val
    areas_ls.append(a_dict)
    
df_area = pd.DataFrame(areas_ls).dropna()

# G = Cities and towns above 25,000 population --> https://data.bls.gov/PDQWeb/la
city_codes = df_area[df_area['area_type_code'] == 'G'].area_code.values 
city_names = df_area[df_area['area_type_code'] == 'G'].area_text.values 


In [4]:
df_area.head()

Unnamed: 0,area_type_code,area_code,area_text,display_level,selectable,sort_sequence\r
0,A,ST0100000000000,Alabama,0,T,1\r
1,A,ST0200000000000,Alaska,0,T,146\r
2,A,ST0400000000000,Arizona,0,T,188\r
3,A,ST0500000000000,Arkansas,0,T,252\r
4,A,ST0600000000000,California,0,T,378\r


In [63]:
# Create the SeriesID for all US cities and towns
cities_dict = {}
for i in range(len(city_codes)):
    key = city_codes[i]
    value = city_names[i]
    cities_dict[key] = value    

ls_series_ids = []
for city in city_codes:
    ls_series_ids.append('LAU'+city+'03') # unemploymentrate_series
    ls_series_ids.append('LAU'+city+'04') # unemployment_series
    ls_series_ids.append('LAU'+city+'05') # employment_series
    ls_series_ids.append('LAU'+city+'06') # laborforce_series

# Create list of lists with 50 seriesIds each since that is the max per API request
ls_50_series = [ls_series_ids[i:i + 50] for i in range(0, len(ls_series_ids), 50)]

In [91]:
# Pull data from API

with open('../../misc/api_keys/bureaulaborstats.txt') as f:
    apikey = f.read()

ls_all_df = [] 

for series_list in ls_50_series:
    headers = {'Content-type': 'application/json'}
    data = json.dumps({"seriesid": series_list,
                       "startyear":"2011", "endyear":"2020",
                       "registrationkey": apikey})
    p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
    json_data = json.loads(p.text)

    for series in json_data['Results']['series']:
        df = pd.DataFrame()
        seriesID = series['seriesID']
        citycode = seriesID[3:-2]
        years = []
        periods = []
        values = []
        dates = []
        for val in series['data']:
            years.append(val["year"])
            periods.append(val["period"])
            values.append(val["value"])
            dates.append(val["period"][-2:]+"/"+val["year"])

        df['series_id'] = [seriesID for _ in range(len(series['data']))]
        df['date'] = dates
        df['year'] = years
        df['period'] = periods
        df['value'] = values
        df['city'] = [cities_dict[citycode] for _ in range(len(series['data']))]
        df['series_type'] = [value[-1] for value in df['series_id']]
        df['series_type'] = df['series_type'].map({'3': 'unemployment_rate',
                                                   '4': 'unemployment',
                                                   '5': 'employment',
                                                   '6': 'labor_force'})
        
        ls_all_df.append(df)

pd.concat(ls_all_df).to_csv('./all_data_by_city.csv', index=False)

In [92]:
# Verify that data was pulled and saved
pd.read_csv('./all_data_by_city.csv')

Unnamed: 0,series_id,date,year,period,value,city,series_type
0,LAUCS090731000000003,07/2020,2020,M07,9.1,"Branford town, CT",unemployment_rate
1,LAUCS090731000000003,06/2020,2020,M06,9.1,"Branford town, CT",unemployment_rate
2,LAUCS090731000000003,05/2020,2020,M05,8.9,"Branford town, CT",unemployment_rate
3,LAUCS090731000000003,04/2020,2020,M04,8.0,"Branford town, CT",unemployment_rate
4,LAUCS090731000000003,03/2020,2020,M03,3.1,"Branford town, CT",unemployment_rate
...,...,...,...,...,...,...,...
850627,LAUCT564505000000006,05/2011,2011,M05,18591.0,"Laramie city, WY",labor_force
850628,LAUCT564505000000006,04/2011,2011,M04,18682.0,"Laramie city, WY",labor_force
850629,LAUCT564505000000006,03/2011,2011,M03,18533.0,"Laramie city, WY",labor_force
850630,LAUCT564505000000006,02/2011,2011,M02,18525.0,"Laramie city, WY",labor_force
