# Unemployment and Labor Markets over the Business Cycle

We will take a look at the data on unemployment and functioning of labor markets in the United States

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

from pandas_datareader.data import DataReader

In [None]:
from matplotlib import rcParams

# Restore old behavior of rounding default axis ranges
rcParams['axes.autolimit_mode'] = 'round_numbers'
rcParams['axes.xmargin'] = 0
rcParams['axes.ymargin'] = 0

Below we are downloading two datasets. 

The first dataset, `fred`, contains monthly data on: 

- whether in a given month the US economy was in a recession state (`1`) or not (`0`) - `USREC`
- number of people in labor force in thousands - `CLF16OV`
- number of unemployed people in thousands - `UNEMPLOY`
- unemployment rate in percent - `UNRATE`
- number of job openings (vacancies) in thousands - `JTSJOL`
- job vacancy rate - `JTSJOR`

The second dataset, `hours`, contains quarterly data on:

- real GDP in billions of 2009 dollars - `GDPC1`
- total hours worked in the nonfarm business sector (index) - `HOANBS`
- average hours worked per employee in the nonfarm business sector (index) - `PRS85006023`
- number of employees in the nonfarm business sector (index) - `PRS85006013`

In [None]:
start = '1945-01'
end   = '2019-12'

In [None]:
# Get FRED data
fred = DataReader(['USREC', 'CLF16OV', 'UNEMPLOY', 'UNRATE', 'JTSJOL', 'JTSJOR'], 
                   'fred', start=start, end=end)

hours = DataReader(['GDPC1', 'HOANBS', 'PRS85006023', 'PRS85006013'], 
                    'fred', start=start, end=end)

Separate trend and cyclical components of GDP, hours and employment

In [None]:
hp_cycle = pd.DataFrame()
hp_trend = pd.DataFrame()

cf_cycle = pd.DataFrame()
cf_trend = pd.DataFrame()

for col in hours.columns:
    hp_cycle[col], hp_trend[col] = sm.tsa.filters.hpfilter(100*np.log(hours[col]).dropna(), lamb=1600)
    cf_cycle[col], cf_trend[col] = sm.tsa.filters.cffilter(100*np.log(hours[col]).dropna(), low=6, high=32)

Compare cyclical components of total hours worked vs its components: hours per employee and number of employees

In [None]:
hp_cycle.columns = ['Output','Total Hours','Hours per Employee','Employment']
cf_cycle.columns = ['Output','Total Hours','Hours per Employee','Employment']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5))

cf_cycle[['Total Hours','Employment']].to_period('D').plot(ax=ax1, style=['k-','r-'], lw=2)

ylim = ax1.get_ylim()

ax1.hlines(0, cf_cycle.index[0], cf_cycle.index[-1], linewidth=0.5)
ax1.fill_between(fred.index, ylim[0], ylim[1], fred['USREC'], facecolor='lightgrey', edgecolor='lightgrey')

l = ax1.legend(loc='upper right')
l.get_frame().set_linewidth(0)
l.get_frame().set_alpha(1)

cf_cycle[['Total Hours','Hours per Employee']].to_period('D').plot(ax=ax2, style=['k-','r-'], lw=2)

ylim = ax2.get_ylim()

ax2.hlines(0, cf_cycle.index[0], cf_cycle.index[-1], linewidth=0.5)
ax2.fill_between(fred.index, ylim[0], ylim[1], fred['USREC'], facecolor='lightgrey', edgecolor='lightgrey')

l = ax2.legend(loc='upper right')
l.get_frame().set_linewidth(0)
l.get_frame().set_alpha(1)

# plt.savefig('Hours_CF.pdf', bbox_inches='tight', pad_inches=0.05)
plt.show()

Calculate the variance-covariance matrix of total hours worked and its components

In [None]:
hp_cycle[['Total Hours','Employment','Hours per Employee']].cov()

## Constructing the vacancy rate time series

The statistics on job openings (vacancies) from the `JOLTS` program are available only starting from December 2000. However, there are data on `Help Wanted Index`, which were gathered by private companies. Thanks to the work of Regis Barnichon, we can use them.

In [None]:
dta = fred.copy()
dta.tail()

In [None]:
dta[['JTSJOR','UNRATE']]['2000-12':].plot(lw=2)
plt.legend(frameon=False)
plt.show()

Read in Regis Barnichon's Composite Help Wanted Index and join the two datasets

In [None]:
hwi = pd.read_csv('data/HWI_index_old.txt', delimiter='\t', skiprows=5)

# Manage dates
dates = []
for date in hwi['Date ']:
    dates.append(date[-2:]+'-01-'+date[0:4])

hwi.index = pd.to_datetime(dates)
hwi.index.rename('DATE', inplace=True)

# Cleanup
hwi = hwi.drop('Date ', 1)
hwi.columns = ['HWI']
hwi.tail()

In [None]:
# Join datasets
df = dta.join(hwi)
df.tail()

Adjust the index to observed levels and splice the data from two sources

In [None]:
df['Vacancies'] = df['JTSJOL']['2014-01-01'] * df['HWI'] / df['HWI']['2014-01-01']
df['Vacancies']['2005-01-01':] = df['JTSJOL']['2005-01-01':]

df[['Vacancies','JTSJOL']].plot(lw=2)
plt.legend(frameon=False)
plt.show()

Construct time series for unemployment and vacancy rates

In [None]:
df['Unemployment rate'] = 100 * df['UNEMPLOY'] / df['CLF16OV']
df['Vacancy rate'] = 100 * df['Vacancies'] / df['CLF16OV']

In [None]:
fig, ax = plt.subplots()

df['Vacancy rate'].to_period('D').plot(ax=ax, style='k', lw=2)
df['Unemployment rate'].to_period('D').plot(ax=ax, style='r', lw=2)

ylim = ax.get_ylim()

ax.fill_between(fred.index, ylim[0], ylim[1], fred['USREC'], facecolor='lightgrey', edgecolor='lightgrey')

l = ax.legend(loc='upper left')
l.get_frame().set_linewidth(0)
l.get_frame().set_alpha(1)

plt.title('US vacancy and unemployment rates (%)')
# plt.savefig('VU.pdf')
plt.show()

## Behavior of unemployment and vacancy rates in the United States

Below I plot the scatterplot of unemployment and vacancy rates, with colors reflecting different decades. 

The resulting negative relationship is known as the Beveridge curve

In [None]:
dfq = df.resample('QS').mean()

plt.plot(dfq['Unemployment rate']['1950-01-01':'1959-12-01'], 
         dfq['Vacancy rate']['1950-01-01':'1959-12-01'], 'o-', label='1950s')
plt.plot(dfq['Unemployment rate']['1960-01-01':'1969-12-01'], 
         dfq['Vacancy rate']['1960-01-01':'1969-12-01'], 'o-', label='1960s')
plt.plot(dfq['Unemployment rate']['1970-01-01':'1979-12-01'], 
         dfq['Vacancy rate']['1970-01-01':'1979-12-01'], 'o-', label='1970s')
plt.plot(dfq['Unemployment rate']['1980-01-01':'1989-12-01'], 
         dfq['Vacancy rate']['1980-01-01':'1989-12-01'], 'o-', label='1980s')
plt.plot(dfq['Unemployment rate']['1990-01-01':'1999-12-01'], 
         dfq['Vacancy rate']['1990-01-01':'1999-12-01'], 'o-', label='1990s')
plt.plot(dfq['Unemployment rate']['2000-01-01':'2009-12-01'], 
         dfq['Vacancy rate']['2000-01-01':'2009-12-01'], 'o-', label='2000s')
plt.plot(dfq['Unemployment rate']['2010-01-01':'2019-12-01'], 
         dfq['Vacancy rate']['2010-01-01':'2019-12-01'], 'ko-', label='2010s')

plt.legend(frameon=False)
plt.xlim(2, 12)
plt.ylim(1, 5)
plt.yticks(np.arange(1, 6))

plt.xlabel('Unemployment rate (%)')
plt.ylabel('Vacancy rate (%)')

plt.title('Shifts in the US Beveridge curve')
# plt.savefig('BC.pdf')
plt.show()

Separate trend from cycle to eliminate structural shifts to the Beveridge curve, note the adjustment of filtering options to monthly frequency

In [None]:
hp_cycle_uv = pd.DataFrame()
hp_trend_uv = pd.DataFrame()

cf_cycle_uv = pd.DataFrame()
cf_trend_uv = pd.DataFrame()

for col in ['Vacancy rate','Unemployment rate']:
    hp_cycle_uv[col], hp_trend_uv[col] = sm.tsa.filters.hpfilter(100*np.log(df[col]).dropna(), lamb=1600*3**4)
    cf_cycle_uv[col], cf_trend_uv[col] = sm.tsa.filters.cffilter(100*np.log(df[col]).dropna(), low=1.5*12, high=8*12)

Plot cyclical components of unemployment and vacancy rates vs cyclical component of output

In [None]:
fig, ax = plt.subplots()

cf_cycle_uv.resample('QS').mean().to_period('D').plot(ax=ax, style=['k','r'], lw=2)
cf_cycle['Output'].plot(ax=ax, style=['b'], lw=2)

ax.set_ylim(-60, 60)
ylim = ax.get_ylim()

ax.hlines(0, hours.index[0], hours.index[-1], linewidth=0.5)

ax.fill_between(fred.index, ylim[0], ylim[1], fred['USREC'], facecolor='lightgrey', edgecolor='lightgrey')

ax.set_xlim('1950-01', hours.index[-1])

l = ax.legend(loc='upper right')
l.get_frame().set_linewidth(0)
l.get_frame().set_alpha(1)

plt.title('Deviations from Christiano-Fitzgerald trend (%)')
# plt.savefig('VU_CF.pdf')
plt.show()

Run a (very simplified) linear regression on cyclical components of unemployment and vacancy rates, the slope is very close to -1

In [None]:
x = hp_cycle_uv['Unemployment rate']
y = hp_cycle_uv['Vacancy rate']

slope, intercept = np.polyfit(x, y, 1)

print(slope)
print(intercept)

fig, ax = plt.subplots(figsize=(5, 5))
plt.scatter(x, y, alpha=0.25) #facecolor='none', edgecolor='C0'
plt.plot(x, intercept + slope*x, 'r-', lw=2)

plt.xlim(-45, 45)
plt.ylim(-45, 45)

plt.hlines(0, -45, 45, linewidth=0.5)
plt.vlines(0, -45, 45, linewidth=0.5)

plt.title('Deviations from Hodrick-Prescott trend (%)')
plt.xlabel('Unemployment rate')
plt.ylabel('Vacancy rate')

# plt.savefig('BC_HP.pdf')

plt.show()

Generate the 'estimated' Beveridge curve without structural shifts

In [None]:
u = np.mean(dfq['Unemployment rate'])
v = np.mean(dfq['Vacancy rate'])

print(u, v)

scale = np.linspace(-40, 60, 101)

plt.plot(u*np.exp(scale/100), v*np.exp(slope*scale/100), 'r', lw=2)

plt.plot(u, v, 'ko')

plt.xlim(2, 12)
plt.ylim(1, 5)
plt.yticks(np.arange(1, 6))

# plt.hlines(v, 2, u, linestyle='--', lw=1)
# plt.vlines(u, 1, v, linestyle='--', lw=1)

plt.xlabel('Unemployment rate (%)')
plt.ylabel('Vacancy rate (%)')
plt.title('US Beveridge curve without structural shifts')

# plt.savefig('BC_est.pdf')

plt.show()

In [None]:
u = np.mean(dfq['Unemployment rate'])
v = np.mean(dfq['Vacancy rate'])

print(u, v)

scale = np.linspace(-40, 60, 101)

plt.plot(u*np.exp(scale/100), v*np.exp(slope*scale/100), 'r', lw=2)
plt.scatter(u*np.exp(x/100), v*np.exp(y/100), alpha=0.25) #, marker='.'

plt.plot(u, v, 'ko')

plt.xlim(2, 12)
plt.ylim(1, 5)
plt.yticks(np.arange(1, 6))

plt.hlines(v, 2, 12, linewidth=0.5)
plt.vlines(u, 1, 5, linewidth=0.5)

plt.xlabel('Unemployment rate (%)')
plt.ylabel('Vacancy rate (%)')
plt.title('US Beveridge curve without structural shifts')

# plt.savefig('BC_est_2.pdf')

plt.show()