In [466]:
import pandas as pd
pd.set_option('display.max_columns', None)

import requests
import json
import prettytable
import geopandas as gpd
import fiona

In [467]:
blsSeries = pd.read_excel('../msaBlsSeries.xlsx', sheet_name="blsSeries")
msa = gpd.read_file('../../../data/spatial/msa/usMsaCentroids.geojson')

In [468]:
months = {'January':1, 'February':2, 'March':3, 'April':4, 'May':5, 'June':6, 'July':7, 'August':8,
          'September':9,'October':10,'November':11,'December':12}

In [469]:
uRateList = blsSeries['unemploymentRateSeries'].tolist()
lfList = blsSeries['laborForceSeries'].tolist()

In [470]:
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=6aff0b6bece9458d9ecfb4cd10a3a375'

frames = []
items = [1,2,3,4,5,6,7,8]
start = 0
end = 50

for item in items:
    seriesId = []
    year = []
    month = []
    value = []

    print(item, start, end)
    chunk = uRateList[start:end]
    areas = ','.join(chunk)
    response = requests.post(url, data = {"seriesid":areas,"startyear":"2018","endyear":"2020"})
    
    json_data = json.loads(response.text)
    
    for series in json_data['Results']['series']:
        sid = series['seriesID']
        for item in series['data']:
            seriesId.append( sid )
            year.append( item['year'] )
            month.append ( item['periodName'] )
            value.append( item['value'] )
        
    df = pd.DataFrame({'seriesId':seriesId,'month':month,'year':year,'value':value})
    
    frames.append(df)
    
    start += 50
    end += 50

1 0 50
2 50 100
3 100 150
4 150 200
5 200 250
6 250 300
7 300 350
8 350 400


In [471]:
uRateData = pd.concat( frames )

In [472]:
uRateData['seriesId'].nunique()

381

In [473]:
uRate = blsSeries[['MSA','unemploymentRateSeries']]
uRateData = pd.merge(uRateData,uRate,how='left',left_on='seriesId',right_on='unemploymentRateSeries')
uRateData['Day'] = 1
uRateData['Month'] = uRateData.month.map(months)
uRateData['date'] = pd.to_datetime(uRateData[['Month','Day','year']])
uRateData.sort_values(['seriesId','date'], inplace=True)
uRateData['Month'] = uRateData['month'].str[:3]
uRateData['value'] = uRateData['value'].astype(float)

# Repeat that process but with the total labor force

In [474]:
url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=6aff0b6bece9458d9ecfb4cd10a3a375'

frames = []
items = [1,2,3,4,5,6,7,8]
start = 0
end = 50

for item in items:
    seriesId = []
    year = []
    month = []
    value = []

    print(item, start, end)
    chunk = lfList[start:end]
    areas = ','.join(chunk)
    response = requests.post(url, data = {"seriesid":areas,"startyear":"2018","endyear":"2020"})
    
    json_data = json.loads(response.text)
    
    for series in json_data['Results']['series']:
        sid = series['seriesID']
        for item in series['data']:
            seriesId.append( sid )
            year.append( item['year'] )
            month.append ( item['periodName'] )
            value.append( item['value'] )
        
    df = pd.DataFrame({'seriesId':seriesId,'month':month,'year':year,'value':value})
    
    frames.append(df)
    
    start += 50
    end += 50

1 0 50
2 50 100
3 100 150
4 150 200
5 200 250
6 250 300
7 300 350
8 350 400


In [475]:
lfData = pd.concat( frames )
lfData['seriesId'].nunique()

381

In [476]:
lfSeries = blsSeries[['MSA','laborForceSeries']]
lfData = pd.merge(lfData,lfSeries,how='left',left_on='seriesId',right_on='laborForceSeries')
lfData['Day'] = 1
lfData['Month'] = lfData.month.map(months)
lfData['date'] = pd.to_datetime(lfData[['Month','Day','year']])
lfData.sort_values(['seriesId','date'], inplace=True)
lfData['Month'] = lfData['month'].str[:3]
lfData['value'] = lfData['value'].astype(float)

In [477]:
uRateCurr = uRateData.loc[(uRateData['month'] == 'October') & (uRateData['year'] == '2020')]
uRateLast = uRateData.loc[(uRateData['month'] == 'October') & (uRateData['year'] == '2019')]

uRateCurr = uRateCurr[['MSA','seriesId','value']]
uRateCurr.rename(columns={'value':'October_2020_URate'}, inplace=True)

uRateLast = uRateLast[['seriesId','value']]
uRateLast.rename(columns={'value':'October_2019_URate'}, inplace=True)

uRateSum = pd.merge(uRateCurr, uRateLast, how='left', on='seriesId')
uRateSum['geoid'] = uRateSum['seriesId'].str[5:12]

In [478]:
lfCurr = lfData.loc[(lfData['month'] == 'October') & (lfData['year'] == '2020')]
lfLast = lfData.loc[(lfData['month'] == 'October') & (lfData['year'] == '2019')]

lfCurr = lfCurr[['MSA','seriesId','value']]
lfCurr.rename(columns={'value':'October_2020_LaborForce'}, inplace=True)

lfLast = lfLast[['seriesId','value']]
lfLast.rename(columns={'value':'October_2019_LaborForce'}, inplace=True)

lfSum = pd.merge(lfCurr, lfLast, how='left', on='seriesId')
lfSum['geoid'] = lfSum['seriesId'].str[5:12]

In [479]:
msa.head(2)

Unnamed: 0,geoid,cbsa,area,geometry
0,1312020,12020,"Athens-Clarke County, GA",POINT (-83.21379 33.94901)
1,1312060,12060,"Atlanta-Sandy Springs-Alpharetta, GA",POINT (-84.39957 33.69277)


In [484]:
lfSum['cbsa'] = lfSum['geoid'].str[-5:]
uRateSum['cbsa'] = uRateSum['geoid'].str[-5:]

In [485]:
lfSum.head(2)

Unnamed: 0,MSA,seriesId,October_2020_LaborForce,October_2019_LaborForce,geoid,cbsa
0,"Anniston-Oxford, AL",LAUMT011150000000006,44790.0,46508.0,111500,11500
1,"Auburn-Opelika, AL",LAUMT011222000000006,76179.0,77850.0,112220,12220


In [486]:
uRateSum.head(2)

Unnamed: 0,MSA,seriesId,October_2020_URate,October_2019_URate,geoid,cbsa
0,"Anniston-Oxford, AL",LAUMT011150000000003,6.6,3.0,111500,11500
1,"Auburn-Opelika, AL",LAUMT011222000000003,4.6,2.2,112220,12220


In [487]:
lfSum.shape

(381, 6)

In [488]:
uRateSum.shape

(381, 6)

In [489]:
sumStats = pd.merge(lfSum,uRateSum, how='left', on='cbsa')

In [492]:
sumStats = sumStats[['cbsa','geoid_x','MSA_x','October_2020_LaborForce','October_2020_URate']]

In [494]:
sumStats.head(2)

Unnamed: 0,cbsa,geoid_x,MSA_x,October_2020_LaborForce,October_2020_URate
0,11500,111500,"Anniston-Oxford, AL",44790.0,6.6
1,12220,112220,"Auburn-Opelika, AL",76179.0,4.6


In [498]:
msaSum = pd.merge(sumStats,msa,how='left',on='cbsa')

In [500]:
msaSum['state'] = msaSum['MSA_x'].str[-2:]
msaSum = msaSum[['cbsa','geoid','MSA_x','state','October_2020_LaborForce','October_2020_URate','geometry']]
msaSum.rename(columns={'MSA_x':'msa'}, inplace=True)


In [502]:
crs = {'init': 'epsg:4326'}
msaSum = gpd.GeoDataFrame(msaSum, crs=crs, geometry=msaSum['geometry'])

  return _prepare_from_string(" ".join(pjargs))


In [503]:
msaSum.head()

Unnamed: 0,cbsa,geoid,msa,state,October_2020_LaborForce,October_2020_URate,geometry
0,11500,111500,"Anniston-Oxford, AL",AL,44790.0,6.6,POINT (-85.82603 33.77143)
1,12220,112220,"Auburn-Opelika, AL",AL,76179.0,4.6,POINT (-85.35556 32.60114)
2,13820,113820,"Birmingham-Hoover, AL",AL,549137.0,5.3,POINT (-86.72819 33.40368)
3,19300,119300,"Daphne-Fairhope-Foley, AL",AL,95660.0,5.0,POINT (-87.74984 30.66097)
4,19460,119460,"Decatur, AL",AL,71437.0,4.1,POINT (-87.10264 34.49064)


In [507]:
nulls = msaSum[msaSum['October_2020_LaborForce'].isnull()]

In [508]:
nulls.head()

Unnamed: 0,cbsa,geoid,msa,state,October_2020_LaborForce,October_2020_URate,geometry


In [509]:
msaSum.to_file('../../../data/spatial/msa/usMsaData.geojson', driver="GeoJSON")

In [None]:
# summarize to get latest and previous year

In [414]:
# write files

msaList = pd.read_excel('../msaBlsSeries.xlsx', sheet_name="msaList")
series = pd.read_excel('../msaBlsSeries.xlsx', sheet_name="blsSeries")

In [415]:
msaList= pd.merge(msaList, series, how='left', on='MSA')

In [417]:
msaList.to_excel('deteleMsaList.xlsx', index=False)