# BLS Unemployment Data Analysis

From the [BLS Data site](https://www.bls.gov/data/), click on one-screen search to find the series IDs and format you are looking for.

## Changes in Unemployment by State

Local Area Unemployment Statistics

In [1]:
import pandas as pd
import blspandas
import api_key
import requests
import json

# Pull a list of state fips Ids.
bls_fips = blspandas.get_state_fips()

# Create a dictionary of BLS query Ids and States
bls_dict1, bls_dict2 = blspandas.get_bls_id_dicts(bls_fips,'LAUST','0000000000003')

# API key in config.py which contains: bls_key = 'key'
key = '?registrationkey={}'.format(api_key.bls_key)

# Query the BLS API using batches
query1 = blspandas.query_bls(bls_dict1,key)
query2 = blspandas.query_bls(bls_dict2,key)

# Combine the queries horizontally
combined_queries = pd.concat([query1,query2],axis=1)

# Clean the dataframe and melt to vertical format
df = blspandas.clean_bls_data(combined_queries)

df.head()

Unnamed: 0,State,Date,Pct_Unemployed
0,Montana,2020-06-01,7.0
1,Nebraska,2020-06-01,6.9
2,Nevada,2020-06-01,15.2
3,New Hampshire,2020-06-01,11.7
4,New Jersey,2020-06-01,16.4


In [164]:
import pandas as pd

df = pd.read_csv('BLS - Monthly state unemployment.csv')
df.head()

Unnamed: 0,Date,State,Pct_Unemployed
0,1/1/19,Alabama,4.2
1,2/1/19,Alabama,3.8
2,3/1/19,Alabama,3.4
3,4/1/19,Alabama,2.6
4,5/1/19,Alabama,2.5


### Change in unemployment

In [165]:
jan_emp = df[df['Date'].str.contains('1/1/20')][['State','Pct_Unemployed']].set_index('State')
jan_emp.head()

Unnamed: 0_level_0,Pct_Unemployed
State,Unnamed: 1_level_1
Alabama,3.2
Alaska,6.6
Arizona,4.6
Arkansas,4.1
California,4.3


In [166]:
june_emp = df[df['Date'].str.contains('6/1/20')][['State','Pct_Unemployed']].set_index('State')
june_emp.head()

Unnamed: 0_level_0,Pct_Unemployed
State,Unnamed: 1_level_1
Alabama,8.0
Alaska,12.3
Arizona,10.3
Arkansas,8.2
California,15.1


In [167]:
chg_emp = june_emp.subtract(jan_emp)
chg_emp.to_csv('Change in unemployment by STATE.csv')

In [168]:
chg_emp.head()

Unnamed: 0_level_0,Pct_Unemployed
State,Unnamed: 1_level_1
Alabama,4.8
Alaska,5.7
Arizona,5.7
Arkansas,4.1
California,10.8


__Find the top ten states with highest changes in unemployment__

In [155]:
top_chg_emp = chg_emp.sort_values(by='Pct_Unemployed',ascending=False)[:10]
top_chg_emp

Unnamed: 0_level_0,Pct_Unemployed
State,Unnamed: 1_level_1
Massachusetts,14.1
New Jersey,12.0
Hawaii,11.5
New York,11.5
Nevada,11.4
California,10.8
Michigan,10.6
Illinois,10.6
New Hampshire,8.6
Delaware,8.5


__Now we want to select only these states from our original dataset of monthly unemployment by state__

In [156]:
states = top_chg_emp.index

# df[df['A'].isin([3, 6])]
top_states_df = df[df['State'].isin(states)]
top_states_df

Unnamed: 0,Date,State,Pct_Unemployed
72,1/1/19,California,4.8
73,2/1/19,California,4.5
74,3/1/19,California,4.5
75,4/1/19,California,3.8
76,5/1/19,California,3.6
...,...,...,...
571,2/1/20,New York,3.9
572,3/1/20,New York,4.2
573,4/1/20,New York,15.1
574,5/1/20,New York,14.2


In [157]:
top_states_df.to_csv('States with highest changes in unemployment.csv',index=False)

## Unemployment and Race
- Go back to the bls.gov one-screen finder for [Labor Force Statistics including the National Unemployment Rate](https://www.bls.gov/data/)

In [16]:
import pandas as pd
import blspandas
import api_key
import requests
import json

bls_dict = {
    'LNU04076977': 'White',
    'LNU04076978': 'Black',
    'LNU04076979': 'Asian'
}

# API key in config.py which contains: bls_key = 'key'
key = '?registrationkey={}'.format(api_key.bls_key)

# Query the BLS API using batches
query = blspandas.query_bls(bls_dict,key)
'''
# Clean the dataframe and melt to vertical format
df = blspandas.clean_bls_data(combined_queries)

df.head()
'''

'\n# Clean the dataframe and melt to vertical format\ndf = blspandas.clean_bls_data(combined_queries)\n\ndf.head()\n'

In [23]:
query.to_csv('Racial unemployment stats.csv')

In [1]:
import pandas as pd

df = pd.read_csv('Racial unemployment stats.csv')

df.head()

Unnamed: 0,Date,White,Black,Asian
0,1/1/19,3.9,7.0,3.2
1,2/1/19,3.4,7.1,3.1
2,3/1/19,3.3,6.6,2.9
3,4/1/19,2.8,5.9,2.1
4,5/1/19,2.9,5.7,2.3


__Let's find some rolling percent changes for each race__

In [2]:
df['White_pct_change'] = df.White.pct_change()
df['Black_pct_change'] = df.Black.pct_change()
df['Asian_pct_change'] = df.Asian.pct_change()
df.head()

Unnamed: 0,Date,White,Black,Asian,White_pct_change,Black_pct_change,Asian_pct_change
0,1/1/19,3.9,7.0,3.2,,,
1,2/1/19,3.4,7.1,3.1,-0.128205,0.014286,-0.03125
2,3/1/19,3.3,6.6,2.9,-0.029412,-0.070423,-0.064516
3,4/1/19,2.8,5.9,2.1,-0.151515,-0.106061,-0.275862
4,5/1/19,2.9,5.7,2.3,0.035714,-0.033898,0.095238


In [4]:
df.to_csv('Racial unemployment stats - pct change.csv')

In [7]:
# Melt the dataframe into vertical format.
melted_pct_change = pd.melt(df, id_vars=['Date'],
                   value_vars=df.columns[4:],
                   var_name='Race',
                   value_name='Unemployment Rate Percent Change')

melted_unemp = pd.melt(df, id_vars=['Date'],
                   value_vars=df.columns[1:4],
                   var_name='Race',
                   value_name='Unemployment Rate')

joined_race_unemp_df = pd.concat([melted_pct_change,melted_unemp],axis=1)
joined_race_unemp_df.to_csv('Racial Stats unemployment rate and percent change.csv')

In [29]:
melted_df.to_csv('Race and unemployed percent change.csv')

Index(['White_pct_change', 'Black_pct_change', 'Asian_pct_change'], dtype='object')

# Unemployment by industry

Employment, Hours, and Earnings from the Current Employment Statistics survey (National)

__Goal: dataframe with Date | Employment Ct | Change | Percent Change__

In [1]:
import pandas as pd
import blspandas
import api_key
import requests
import json

bls_dict = {
    'CEU0600000001': 'Goods producing',
    'CEU0700000001': 'Service providing',
    'CEU0800000001': 'Private service providing',
    'CEU1000000001': 'Mining and logging',
    'CEU2000000001': 'Construction',
    'CEU3000000001': 'Manufacturing',
    'CEU3100000001': 'Durable goods',
    'CEU3200000001': 'Nondurable goods',
    'CEU4000000001': 'Trade Transportation and Utilities',
    'CEU4142000001': 'Wholesale Trade',
    'CEU4200000001': 'Retail Trade',
    'CEU4300000001': 'Transportation and Warehousing',
    'CEU4422000001': 'Utilities',
    'CEU5000000001': 'Information',
    'CEU5500000001': 'Financial activities',
    'CEU6000000001': 'Professional and business services',
    'CEU6500000001': 'Education and health services',
    'CEU7000000001': 'Leisure and hospitality',
    'CEU8000000001': 'Other services',
    'CEU9000000001': 'Government' 
}

# API key in config.py which contains: bls_key = 'key'
key = '?registrationkey={}'.format(api_key.bls_key)

# Query the BLS API using batches
query = blspandas.query_bls(bls_dict,key).reset_index().rename(columns={'index':'Date'})

query.to_csv('Industry unemployment stats.csv')

In [141]:
import pandas as pd
df = pd.read_csv('Industry unemployment stats.csv')

'''
We are going to have three dataframes:

1. Count
2. Change
3. Percent changes

We will perform changes, melt them to vertical format, and combine them.
'''

# Let's prep pure count by melting to our desired format
# pure count
ind_ct = df.copy()
ind_ct.head()

Unnamed: 0,Date,Goods producing,Service providing,Private service providing,Mining and logging,Construction,Manufacturing,Durable goods,Nondurable goods,Trade Transportation and Utilities,...,Retail Trade,Transportation and Warehousing,Utilities,Information,Financial activities,Professional and business services,Education and health services,Leisure and hospitality,Other services,Government
0,1/1/19,20539,127340,104962,733,7069,12737,8023,4714,27598,...,15628.1,5582.6,549.7,2799,8618,20709,23724,15739,5775,22378
1,2/1/19,20554,128130,105337,730,7062,12762,8035,4727,27351,...,15429.4,5521.1,547.1,2830,8633,20841,24044,15841,5797,22793
2,3/1/19,20675,128684,105798,731,7170,12774,8039,4735,27358,...,15425.8,5525.3,549.4,2838,8658,20908,24113,16090,5833,22886
3,4/1/19,20884,129546,106598,732,7377,12775,8038,4737,27425,...,15474.7,5525.7,546.5,2832,8678,21169,24206,16406,5882,22948
4,5/1/19,21089,130020,107164,739,7540,12810,8052,4758,27554,...,15540.7,5564.7,548.2,2842,8707,21239,24121,16788,5913,22856


In [142]:
melted_ct = pd.melt(ind_ct, id_vars=['Date'],
                   value_vars=ind_ct.columns[1:],
                   var_name='Industry',
                   value_name='Employment')

melted_ct.head()

Unnamed: 0,Date,Industry,Employment
0,1/1/19,Goods producing,20539.0
1,2/1/19,Goods producing,20554.0
2,3/1/19,Goods producing,20675.0
3,4/1/19,Goods producing,20884.0
4,5/1/19,Goods producing,21089.0


In [143]:
melted_ct.to_csv('employment per industry.csv', index=False)

__Change down the column / per industry__

In [144]:
# Calculate change down each column
ind_ct_chg = ind_ct.set_index('Date').diff(axis=0).reset_index()
ind_ct_chg.head()

Unnamed: 0,Date,Goods producing,Service providing,Private service providing,Mining and logging,Construction,Manufacturing,Durable goods,Nondurable goods,Trade Transportation and Utilities,...,Retail Trade,Transportation and Warehousing,Utilities,Information,Financial activities,Professional and business services,Education and health services,Leisure and hospitality,Other services,Government
0,1/1/19,,,,,,,,,,...,,,,,,,,,,
1,2/1/19,15.0,790.0,375.0,-3.0,-7.0,25.0,12.0,13.0,-247.0,...,-198.7,-61.5,-2.6,31.0,15.0,132.0,320.0,102.0,22.0,415.0
2,3/1/19,121.0,554.0,461.0,1.0,108.0,12.0,4.0,8.0,7.0,...,-3.6,4.2,2.3,8.0,25.0,67.0,69.0,249.0,36.0,93.0
3,4/1/19,209.0,862.0,800.0,1.0,207.0,1.0,-1.0,2.0,67.0,...,48.9,0.4,-2.9,-6.0,20.0,261.0,93.0,316.0,49.0,62.0
4,5/1/19,205.0,474.0,566.0,7.0,163.0,35.0,14.0,21.0,129.0,...,66.0,39.0,1.7,10.0,29.0,70.0,-85.0,382.0,31.0,-92.0


In [145]:
# Melt to vertical format
melted_ind_ct_chg = pd.melt(ind_ct_chg, id_vars=['Date'],
                   value_vars=ind_ct_chg.columns[1:],
                   var_name='Industry',
                   value_name='Change in Unemployment')

melted_ind_ct_chg = melted_ind_ct_chg.dropna()

melted_ind_ct_chg.head()

Unnamed: 0,Date,Industry,Change in Unemployment
1,2/1/19,Goods producing,15.0
2,3/1/19,Goods producing,121.0
3,4/1/19,Goods producing,209.0
4,5/1/19,Goods producing,205.0
5,6/1/19,Goods producing,278.0


In [147]:
melted_ind_ct_chg.to_csv('Change in employment by industry.csv',index=False)

__Calculate percent change down the column__ <br>
[Pandas percent change](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pct_change.html)

In [37]:
# Calculate percent change down each column
ind_pct_chg = ind_ct.set_index('Date').pct_change().reset_index()
ind_pct_chg.head()

Unnamed: 0,Date,Goods producing,Service providing,Private service providing,Mining and logging,Construction,Manufacturing,Durable goods,Nondurable goods,Trade Transportation and Utilities,...,Retail Trade,Transportation and Warehousing,Utilities,Information,Financial activities,Professional and business services,Education and health services,Leisure and hospitality,Other services,Government
0,1/1/19,,,,,,,,,,...,,,,,,,,,,
1,2/1/19,0.00073,0.006204,0.003573,-0.004093,-0.00099,0.001963,0.001496,0.002758,-0.00895,...,-0.012714,-0.011016,-0.00473,0.011075,0.001741,0.006374,0.013488,0.006481,0.00381,0.018545
2,3/1/19,0.005887,0.004324,0.004376,0.00137,0.015293,0.00094,0.000498,0.001692,0.000256,...,-0.000233,0.000761,0.004204,0.002827,0.002896,0.003215,0.00287,0.015719,0.00621,0.00408
3,4/1/19,0.010109,0.006699,0.007562,0.001368,0.02887,7.8e-05,-0.000124,0.000422,0.002449,...,0.00317,7.2e-05,-0.005278,-0.002114,0.00231,0.012483,0.003857,0.01964,0.0084,0.002709
4,5/1/19,0.009816,0.003659,0.00531,0.009563,0.022096,0.00274,0.001742,0.004433,0.004704,...,0.004265,0.007058,0.003111,0.003531,0.003342,0.003307,-0.003512,0.023284,0.00527,-0.004009


In [62]:
# Melt to vertical format
melted_ind_pct_chg = pd.melt(ind_pct_chg, id_vars=['Date'],
                   value_vars=ind_pct_chg.columns[1:],
                   var_name='Industry',
                   value_name='Percent Change in Unemployment')

melted_ind_pct_chg['Percent Change in Unemployment'] = round(melted_ind_pct_chg['Percent Change in Unemployment'],6)
melted_ind_pct_chg = melted_ind_pct_chg.dropna()
melted_ind_pct_chg.head()

Unnamed: 0,Date,Industry,Percent Change in Unemployment
1,2/1/19,Goods producing,0.00073
2,3/1/19,Goods producing,0.005887
3,4/1/19,Goods producing,0.010109
4,5/1/19,Goods producing,0.009816
5,6/1/19,Goods producing,0.013182


In [63]:
melted_ind_pct_chg.to_csv('Percent Change in Unemployment by Industry.csv', index=False)

### Combine the three dataframes into a new dataframe

In [56]:
# Merge the three dataframe
industry_emp_chg = melted_ct.merge(melted_ind_ct_chg, on='Date',).merge(melted_ind_pct_chg, on='Date')
industry_emp_chg.head()

Unnamed: 0,Date,Industry_x,Unemployment,Industry_y,Change in Unemployment,Industry,Percent Change in Unemployment
0,1/1/19,Goods producing,20539.0,Goods producing,,Goods producing,
1,1/1/19,Goods producing,20539.0,Goods producing,,Service providing,
2,1/1/19,Goods producing,20539.0,Goods producing,,Private service providing,
3,1/1/19,Goods producing,20539.0,Goods producing,,Mining and logging,
4,1/1/19,Goods producing,20539.0,Goods producing,,Construction,


In [57]:
# Choose only the columns we need, removing the unnecessary Date and Industry columns
industry_emp_chg = industry_emp_chg.loc[:,('Date','Industry','Unemployment','Change in Unemployment','Percent Change in Unemployment')]
industry_emp_chg.head()

Unnamed: 0,Date,Industry,Unemployment,Change in Unemployment,Percent Change in Unemployment
0,1/1/19,Goods producing,20539.0,,
1,1/1/19,Service providing,20539.0,,
2,1/1/19,Private service providing,20539.0,,
3,1/1/19,Mining and logging,20539.0,,
4,1/1/19,Construction,20539.0,,


In [58]:
industry_emp_chg.to_csv('Industry employment changes.csv',index=False)

# Jobs lost to COVID-19 by Industry

In [122]:
from datetime import datetime

jobs_lost = ind_ct.copy()

# Extract Year and Month from the Date column
jobs_lost['Month'] = pd.to_datetime(jobs_lost.Date).dt.month
jobs_lost['Year'] = pd.to_datetime(jobs_lost.Date).dt.year

# Subset January 2020 and June 2020
jobs_lost = jobs_lost[(jobs_lost.Year == 2020) & (jobs_lost.Month.isin([1,6]))]
jobs_lost

Unnamed: 0,Date,Goods producing,Service providing,Private service providing,Mining and logging,Construction,Manufacturing,Durable goods,Nondurable goods,Trade Transportation and Utilities,...,Utilities,Information,Financial activities,Professional and business services,Education and health services,Leisure and hospitality,Other services,Government,Month,Year
12,1/1/20,20707,129446,106849,701,7240,12766,8019,4747,27734,...,545.2,2857,8762,21139,24403,16092,5862,22597,1,2020
17,6/1/20,20156,118357,97204,628,7359,12169,7616,4553,25753,...,540.0,2586,8650,19836,22569,12556,5254,21153,6,2020


In [123]:
jobs_lost = jobs_lost.set_index('Date').diff(axis=0).iloc[-1,1:-2]
jobs_lost = pd.DataFrame(jobs_lost).reset_index()
jobs_lost = jobs_lost.rename(columns={'index':'Industry','6/1/20': 'Jobs lost'})

In [170]:
jobs_lost.head()

Unnamed: 0,Industry,Jobs lost
0,Service providing,-11089.0
1,Private service providing,-9645.0
2,Mining and logging,-73.0
3,Construction,119.0
4,Manufacturing,-597.0


In [169]:
jobs_lost.to_csv('jobs lost.csv',index=False)