# Unemployment rate in U.S.

In this work I'm going to made use of the API from U.S. Bureau of Labor Statistics, to get data about the unemployment in each State.

In [67]:
import pandas as pd
import numpy as np
import requests
import json

# import table with states and codes
state_code = pd.read_csv('state_code.csv')
state_code.head()

Unnamed: 0,state_code,state_name,state_id
0,1,Alabama,AL
1,2,Alaska,AK
2,4,Arizona,AZ
3,5,Arkansas,AR
4,6,California,CA


In [68]:
# obtain the seriesid for each state from the list of state code
seriesid_list = []
for code in state_code['state_code']:
    if code < 10:
        seriesid_list.append('LASST0' + str(code) + '0000000000003')
    else: 
        seriesid_list.append('LASST' + str(code) + '0000000000003')
state_code['series_id'] = seriesid_list
# U.S. Bureau of Labor Statistics consents to pull API up to 50 seriesID
# So we drop the value of D.C. from the list of seriesID, leaving the IDs for the 50 states
state_code[state_code['state_name'] == 'District of Columbia']

Unnamed: 0,state_code,state_name,state_id,series_id
8,11,District of Columbia,DC,LASST110000000000003


In [69]:
# removing the seriesID from the list
seriesid_list.remove('LASST110000000000003')
len(seriesid_list)

50

In [70]:
# BLS allows only 25 seriesID's requests, so we split the list in two parts 
seriesid_listA = []
seriesid_listB = []
for i in range(len(seriesid_list)):
    if i < len(seriesid_list)/2:
        seriesid_listA.append(seriesid_list[i])
    else:
        seriesid_listB.append(seriesid_list[i])

In [71]:
# pull data from U.S. Bureau of Labor Statistics API 
# procede for seriesID_listA
headers = {'Content-type': 'application/json'}
parameters = json.dumps({"seriesid" : seriesid_listA, "startyear":"2019", "endyear":"2020"})
responseA = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=b205f2551a34471eae93f557017a8da6', data = parameters, headers=headers)
print(responseA.status_code) 
#print(response.content)

200


In [72]:
# pull data from U.S. Bureau of Labor Statistics API 
# procede for seriesID_listB
headers = {'Content-type': 'application/json'}
parameters = json.dumps({"seriesid" : seriesid_listB, "startyear":"2019", "endyear":"2020"})
responseB = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=b205f2551a34471eae93f557017a8da6', data = parameters, headers=headers)
print(responseB.status_code) 

200


In [73]:
# pull data from U.S. Bureau of Labor Statistics API 
# procede for D.C. seriesID
headers = {'Content-type': 'application/json'}
parameters = json.dumps({"seriesid" : ["LASST110000000000003"], "startyear":"2019", "endyear":"2020"})
responseDC = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=b205f2551a34471eae93f557017a8da6', data = parameters, headers=headers)
print(responseDC.status_code)
#print(responseDC.content)

200


In [74]:
df_list = [responseA.json(), responseB.json(), responseDC.json()]

In [75]:
# create a list of dataframe from json results
df_list = [responseA.json(), responseB.json(), responseDC.json()]

# create a pandas dataframe merging all the values in the dataframes
state = []
year = []
period = []
value = []
for i in range(len(df_list)):
    for serie in df_list[i]['Results']['series']:
         for x in serie['data']:
            state.append(state_code['state_name'][state_code['series_id'] == serie['seriesID']].item())
            #state.append(serie['seriesID'])
            year.append(x['year'])
            period.append(x['period'])
            value.append(x['value'])

un_data = pd.DataFrame(list(zip(state, year, period, value)), 
               columns =['state', 'year', 'period', 'value']) 
        
un_data['ID'] = data['state']+data['year']+data['period']

un_data

  if sys.path[0] == '':


Unnamed: 0,state,year,period,value,ID
0,Alabama,2020,M06,7.5,Alabama2020M06
1,Alabama,2020,M05,9.6,Alabama2020M05
2,Alabama,2020,M04,13.8,Alabama2020M04
3,Alabama,2020,M03,3.0,Alabama2020M03
4,Alabama,2020,M02,2.7,Alabama2020M02
...,...,...,...,...,...
913,District of Columbia,2019,M05,5.5,District of Columbia2019M05
914,District of Columbia,2019,M04,5.6,District of Columbia2019M04
915,District of Columbia,2019,M03,5.7,District of Columbia2019M03
916,District of Columbia,2019,M02,5.8,District of Columbia2019M02


In [76]:
# obtain the seriesid for LABOR FORCE per each state from the list of state code
LF_seriesid_list = []
for code in state_code['state_code']:
    if code < 10:
        LF_seriesid_list.append('LASST0' + str(code) + '0000000000006')
    else: 
        LF_seriesid_list.append('LASST' + str(code) + '0000000000006')
state_code['LF_series_id'] = LB_seriesid_list
# U.S. Bureau of Labor Statistics consents to pull API up to 50 seriesID
# So we drop the value of D.C. from the list of seriesID, leaving the IDs for the 50 states
state_code[state_code['state_name'] == 'District of Columbia']

Unnamed: 0,state_code,state_name,state_id,series_id,LF_series_id
8,11,District of Columbia,DC,LASST110000000000003,LASST110000000000006


In [77]:
# removing the seriesID from the list
LF_seriesid_list.remove('LASST110000000000006')
len(LF_seriesid_list)

50

In [78]:
# BLS allows only 25 seriesID's requests, so we split the list in two parts 
LF_seriesid_listA = []
LF_seriesid_listB = []
for i in range(len(LF_seriesid_list)):
    if i < len(LF_seriesid_list)/2:
        LF_seriesid_listA.append(LF_seriesid_list[i])
    else:
        LF_seriesid_listB.append(LF_seriesid_list[i])

In [79]:
# pull data from U.S. Bureau of Labor Statistics API 
# procede for seriesID_listA
headers = {'Content-type': 'application/json'}
parameters = json.dumps({"seriesid" : LF_seriesid_listA, "startyear":"2019", "endyear":"2020"})
LF_responseA = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=b205f2551a34471eae93f557017a8da6', data = parameters, headers=headers)
print(LF_responseA.status_code) 
#print(LF_responseA.content)

200


In [80]:
# pull data from U.S. Bureau of Labor Statistics API 
# procede for seriesID_listB
headers = {'Content-type': 'application/json'}
parameters = json.dumps({"seriesid" : LF_seriesid_listB, "startyear":"2019", "endyear":"2020"})
LF_responseB = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=b205f2551a34471eae93f557017a8da6', data = parameters, headers=headers)
print(LF_responseB.status_code) 

200


In [81]:
# pull data from U.S. Bureau of Labor Statistics API 
# procede for D.C. seriesID
headers = {'Content-type': 'application/json'}
parameters = json.dumps({"seriesid" : ["LASST110000000000006"], "startyear":"2019", "endyear":"2020"})
LF_responseDC = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=b205f2551a34471eae93f557017a8da6', data = parameters, headers=headers)
print(LF_responseDC.status_code)
#print(LF_responseDC.content)

200


In [82]:
# create a list of dataframe from json results
LF_df_list = [LF_responseA.json(), LF_responseB.json(), LF_responseDC.json()]

# create a pandas dataframe merging all the values in the dataframes
ID = []
LF_value = []
for i in range(len(LF_df_list)):
    for serie in LF_df_list[i]['Results']['series']:
         for x in serie['data']:
            ID.append(state_code['state_name'][state_code['LF_series_id'] == serie['seriesID']].item() \
                      + str(x['year']) + str(x['period']))
            LF_value.append(x['value'])

LF_data = pd.DataFrame(list(zip(ID, LF_value)), 
               columns =['ID', 'LF_value']) 
        
LF_data

  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,ID,LF_value
0,Alabama2020M06,2195843
1,Alabama2020M05,2239608
2,Alabama2020M04,2198485
3,Alabama2020M03,2218590
4,Alabama2020M02,2243960
...,...,...
913,District of Columbia2019M05,409126
914,District of Columbia2019M04,408698
915,District of Columbia2019M03,408166
916,District of Columbia2019M02,407526


In [83]:
data = pd.merge(un_data,
               LF_data,
               on = 'ID')
data

Unnamed: 0,state,year,period,value,ID,LF_value
0,Alabama,2020,M06,7.5,Alabama2020M06,2195843
1,Alabama,2020,M05,9.6,Alabama2020M05,2239608
2,Alabama,2020,M04,13.8,Alabama2020M04,2198485
3,Alabama,2020,M03,3.0,Alabama2020M03,2218590
4,Alabama,2020,M02,2.7,Alabama2020M02,2243960
...,...,...,...,...,...,...
913,District of Columbia,2019,M05,5.5,District of Columbia2019M05,409126
914,District of Columbia,2019,M04,5.6,District of Columbia2019M04,408698
915,District of Columbia,2019,M03,5.7,District of Columbia2019M03,408166
916,District of Columbia,2019,M02,5.8,District of Columbia2019M02,407526


In [84]:
# obtain the seriesid for EMPLOYMENT per each state from the list of state code
EMP_seriesid_list = []
for code in state_code['state_code']:
    if code < 10:
        EMP_seriesid_list.append('LASST0' + str(code) + '0000000000005')
    else: 
        EMP_seriesid_list.append('LASST' + str(code) + '0000000000005')
state_code['EMP_series_id'] = EMP_seriesid_list
# U.S. Bureau of Labor Statistics consents to pull API up to 50 seriesID
# So we drop the value of D.C. from the list of seriesID, leaving the IDs for the 50 states
state_code[state_code['state_name'] == 'District of Columbia']

Unnamed: 0,state_code,state_name,state_id,series_id,LF_series_id,EMP_series_id
8,11,District of Columbia,DC,LASST110000000000003,LASST110000000000006,LASST110000000000005


In [85]:
# removing the seriesID from the list
EMP_seriesid_list.remove('LASST110000000000005')
len(EMP_seriesid_list)

50

In [86]:
# BLS allows only 25 seriesID's requests, so we split the list in two parts 
EMP_seriesid_listA = []
EMP_seriesid_listB = []
for i in range(len(EMP_seriesid_list)):
    if i < len(EMP_seriesid_list)/2:
        EMP_seriesid_listA.append(EMP_seriesid_list[i])
    else:
        EMP_seriesid_listB.append(EMP_seriesid_list[i])

In [87]:
# pull data from U.S. Bureau of Labor Statistics API 
# procede for seriesID_listA
headers = {'Content-type': 'application/json'}
parameters = json.dumps({"seriesid" : EMP_seriesid_listA, "startyear":"2019", "endyear":"2020"})
EMP_responseA = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=b205f2551a34471eae93f557017a8da6', data = parameters, headers=headers)
print(EMP_responseA.status_code) 
#print(EMP_responseA.content)

200


In [88]:
# pull data from U.S. Bureau of Labor Statistics API 
# procede for seriesID_listB
headers = {'Content-type': 'application/json'}
parameters = json.dumps({"seriesid" : EMP_seriesid_listB, "startyear":"2019", "endyear":"2020"})
EMP_responseB = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=b205f2551a34471eae93f557017a8da6', data = parameters, headers=headers)
print(EMP_responseB.status_code) 

200


In [89]:
# pull data from U.S. Bureau of Labor Statistics API 
# procede for D.C. seriesID
headers = {'Content-type': 'application/json'}
parameters = json.dumps({"seriesid" : ["LASST110000000000005"], "startyear":"2019", "endyear":"2020"})
EMP_responseDC = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=b205f2551a34471eae93f557017a8da6', data = parameters, headers=headers)
print(EMP_responseDC.status_code)
#print(EMP_responseDC.content)

200


In [90]:
# create a list of dataframe from json results
EMP_df_list = [EMP_responseA.json(), EMP_responseB.json(), EMP_responseDC.json()]

# create a pandas dataframe merging all the values in the dataframes
ID = []
EMP_value = []
for i in range(len(EMP_df_list)):
    for serie in EMP_df_list[i]['Results']['series']:
         for x in serie['data']:
            ID.append(state_code['state_name'][state_code['EMP_series_id'] == serie['seriesID']].item() \
                      + str(x['year']) + str(x['period']))
            EMP_value.append(x['value'])

EMP_data = pd.DataFrame(list(zip(ID, EMP_value)), 
               columns =['ID', 'EMP_value']) 
        
EMP_data

  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,ID,EMP_value
0,Alabama2020M06,2030073
1,Alabama2020M05,2023565
2,Alabama2020M04,1895950
3,Alabama2020M03,2151586
4,Alabama2020M02,2184305
...,...,...
913,District of Columbia2019M05,386492
914,District of Columbia2019M04,385659
915,District of Columbia2019M03,384810
916,District of Columbia2019M02,384015


In [91]:
data = pd.merge(data,
               EMP_data,
               on = 'ID')
data

Unnamed: 0,state,year,period,value,ID,LF_value,EMP_value
0,Alabama,2020,M06,7.5,Alabama2020M06,2195843,2030073
1,Alabama,2020,M05,9.6,Alabama2020M05,2239608,2023565
2,Alabama,2020,M04,13.8,Alabama2020M04,2198485,1895950
3,Alabama,2020,M03,3.0,Alabama2020M03,2218590,2151586
4,Alabama,2020,M02,2.7,Alabama2020M02,2243960,2184305
...,...,...,...,...,...,...,...
913,District of Columbia,2019,M05,5.5,District of Columbia2019M05,409126,386492
914,District of Columbia,2019,M04,5.6,District of Columbia2019M04,408698,385659
915,District of Columbia,2019,M03,5.7,District of Columbia2019M03,408166,384810
916,District of Columbia,2019,M02,5.8,District of Columbia2019M02,407526,384015


In [92]:
import datetime
data['date'] = (data['period'].str[1:]+'/'+data['year']).astype('datetime64[M]')
#data['date'] = pd.to_datetime(data['date']).dt.to_period('M')
data['value'] = data['value'].astype('float64')
data.dtypes

state                object
year                 object
period               object
value               float64
ID                   object
LF_value             object
EMP_value            object
date         datetime64[ns]
dtype: object

In [93]:
data.to_csv('unemployemnt.csv')