# BUG: Beveridgean Unemployment Gap
This notebook demonstrates the python implementation of the "Beveridgean Unemployment Gap" by Pascal Michaillat and Emmanuel Saez. The original code was in MATLAB. See [GitHub link](https://github.com/pascalmichaillat/unemployment-gap) for original. 

## Section 2: Beveridgean model of the labor market

## import packages

In [None]:
import pandas as pd
import numpy as np
from scipy.optimize import root

In [None]:
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline
matplotlib.style.use('fivethirtyeight')

In [None]:
import sys
sys.path.insert(0, '../bug')
import bug

## Read the data
Here, we read from the [excel file](https://github.com/pascalmichaillat/unemployment-gap/blob/main/code/data.xlsx) provided with the unemployment-gap matlab package.

The goal of this notebook is to re-create analysis and some figures from the Unemployment Gap paper, so that we can verify we are getting the *same* outputs. (*Sameness* allowing for some small differences between the two language implementations)

#### Recession information

In [None]:
df = pd.read_excel('../../code/data.xlsx', sheet_name='Recession dates',
                           header=1, usecols=['Peak month', 'Trough month'],) 
starts =  pd.to_datetime(df['Peak month'])

ends = pd.to_datetime(df['Trough month'])

#### unemployment rate

In [None]:
df = pd.read_excel('../../code/data.xlsx', sheet_name='Monthly data',
                           header=1, usecols=['Unemployment rate (percent)', 'Year', 'Month'],)

In [None]:
# set the index 
dates = pd.PeriodIndex(pd.to_datetime(dict(year=df.Year, month=df.Month, day=15)).dt.to_period('m') ) 
unempl_rate = pd.Series(data=df['Unemployment rate (percent)'].values,
                       index=dates, name='unempl_rate')

#### vacancy info
For 1951–2000, we use the vacancy proxy constructed by Barnichon (2010).

For 2001–2019, we use the number of job openings measured by the Bureau of
Labor Statistics (2020b) in the Job Opening and Labor Turnover Survey,
divided by the civilian labor force constructed by the Bureau of Labor
Statistics (2020a) from the Current Population Survey. 

We then splice
the two series to obtain a vacancy rate for 1951–2019 (Fig. 1(b)).

In [None]:
df = pd.read_excel('../../code/data.xlsx', sheet_name='Monthly data',
                           header=1, usecols=['Vacancy rate (thousands)', 'Year', 'Month'],)

In [None]:
# set the index 
dates = pd.PeriodIndex(pd.to_datetime(dict(year=df.Year, month=df.Month, day=15)).dt.to_period('m') ) 
vac_rate_proxy = pd.Series(data=df['Vacancy rate (thousands)'].values,
                       index=dates, name='vacancy_rate_proxy')

#### labor force level

In [None]:
df = pd.read_excel('../../code/data.xlsx', sheet_name='Monthly data',
                           header=1, usecols=['Labor force level (thousands of persons)', 'Year', 'Month'],)

In [None]:
# set the index 
dates = pd.PeriodIndex(pd.to_datetime(dict(year=df.Year, month=df.Month, day=15)).dt.to_period('m') ) 
labor_level = pd.Series(data=df['Labor force level (thousands of persons)'].values,
                       index=dates, name='labor_force_level')

#### vacancies

In [None]:
df = pd.read_excel('../../code/data.xlsx', sheet_name='Monthly data',
                           header=1, usecols=['Vacancy level (thousands)', 'Year', 'Month'],)

In [None]:
# set the index 
dates = pd.PeriodIndex(pd.to_datetime(dict(year=df.Year, month=df.Month, day=15)).dt.to_period('m') ) 
vacancy_level = pd.Series(data=df['Vacancy level (thousands)'].values,
                       index=dates, name='vacancy_level')

In [None]:
vacancy_rate_2001 = vacancy_level/labor_level
vacancy_rate_splice = pd.concat([vac_rate_proxy.loc[:'2000-12'], vacancy_rate_2001.loc['2001-01':]*100])

### Figure 1A

In [None]:
ax = unempl_rate.plot(figsize=(9,6),linewidth=2, color='purple',)

for idx, s in enumerate(starts):
    if starts.loc[idx] > pd.to_datetime('1951-1'):
        plt.axvspan(starts.loc[idx], ends.loc[idx], facecolor='grey', alpha=0.5,zorder=-100)

ax.grid(axis='x')
plt.ylim(0, unempl_rate.max()*1.1)
ax.spines["bottom"].set_linewidth(1.5)
ax.spines["bottom"].set_color('k')
ax.spines["left"].set_linewidth(1.5)
ax.spines["left"].set_color('k')
plt.ylabel('Unemployment Rate', fontsize=12)
plt.title('Unemployment Rate', fontsize=14)

### Figure 1B

In [None]:
ax = vacancy_rate_splice.plot(figsize=(9,6),linewidth=2, color='purple',)

for idx, s in enumerate(starts):
    if starts.loc[idx] > pd.to_datetime('1951-1'):
        plt.axvspan(starts.loc[idx], ends.loc[idx], facecolor='grey', alpha=0.5,zorder=-100)

ax.grid(axis='x')
plt.ylim(0, vacancy_rate_splice.max()*1.1)
ax.spines["bottom"].set_linewidth(1.5)
ax.spines["bottom"].set_color('k')
ax.spines["left"].set_linewidth(1.5)
ax.spines["left"].set_color('k')
plt.ylabel('Vacancy Rate', fontsize=12)
plt.title('Vacancy Rate', fontsize=14)


### Figure 1C-1F

In [None]:

plt.figure(figsize = (8,8))
plt.plot(np.log(unempl_rate/100), np.log(vacancy_rate_splice/100), linewidth=1)

plt.gca().spines["bottom"].set_linewidth(1.5)
plt.gca().spines["bottom"].set_color('k')
plt.gca().spines["left"].set_linewidth(1.5)
plt.gca().spines["left"].set_color('k')
plt.xlim(-3.8, -2.1)
plt.ylim(-4.3, -2.95)
plt.ylabel('Log Vacancy Rate', fontsize=12)
plt.xlabel('Log Unemployment Rate', fontsize=12)
plt.title('Beveridge Curve (monthly)', fontsize=14)

In [None]:
log_unempl_rate_q = np.log(unempl_rate.resample('Q').mean()/100)
log_vacancy_rate_q = np.log(vacancy_rate_splice.resample('Q').mean()/100)

In [None]:

plt.figure(figsize = (8,8))
plt.plot(log_unempl_rate_q, log_vacancy_rate_q, linewidth=1,)

plt.gca().spines["bottom"].set_linewidth(1.5)
plt.gca().spines["bottom"].set_color('k')
plt.gca().spines["left"].set_linewidth(1.5)
plt.gca().spines["left"].set_color('k')
plt.xlim(-3.8, -2.1)
plt.ylim(-4.3, -2.95)
plt.ylabel('Log Vacancy Rate', fontsize=12)
plt.xlabel('Log Unemployment Rate', fontsize=12)
plt.title('Beveridge Curve (quarterly)', fontsize=14)

In [None]:

plt.figure(figsize = (8,8))
plt.plot(log_unempl_rate_q, log_vacancy_rate_q, linewidth=1, color='grey')
plt.plot(log_unempl_rate_q.loc['1951Q1':'1969Q4'],log_vacancy_rate_q.loc['1951Q1':'1969Q4'], linewidth=3, color='orange')

plt.annotate('1951', (log_unempl_rate_q.loc['1951Q1'], log_vacancy_rate_q.loc['1951Q1']))
plt.annotate('1969', (log_unempl_rate_q.loc['1969Q4'], log_vacancy_rate_q.loc['1969Q4']))

plt.gca().spines["bottom"].set_linewidth(1.5)
plt.gca().spines["bottom"].set_color('k')
plt.gca().spines["left"].set_linewidth(1.5)
plt.gca().spines["left"].set_color('k')
plt.xlim(-3.8, -2.1)
plt.ylim(-4.3, -2.95)
plt.ylabel('Log Vacancy Rate', fontsize=12)
plt.xlabel('Log Unemployment Rate', fontsize=12)
plt.title('Beveridge Curve 1951-1969', fontsize=14)

In [None]:

plt.figure(figsize = (8,8))
plt.plot(log_unempl_rate_q, log_vacancy_rate_q, linewidth=1, color='grey')
plt.plot(log_unempl_rate_q.loc['1970Q1':'1989Q4'],log_vacancy_rate_q.loc['1970Q1':'1989Q4'], linewidth=3, color='orange')

plt.annotate('1970', (log_unempl_rate_q.loc['1970Q1'], log_vacancy_rate_q.loc['1970Q1']))
plt.annotate('1989', (log_unempl_rate_q.loc['1989Q4'], log_vacancy_rate_q.loc['1989Q4']))

plt.gca().spines["bottom"].set_linewidth(1.5)
plt.gca().spines["bottom"].set_color('k')
plt.gca().spines["left"].set_linewidth(1.5)
plt.gca().spines["left"].set_color('k')
plt.xlim(-3.8, -2.1)
plt.ylim(-4.3, -2.95)
plt.ylabel('Log Vacancy Rate', fontsize=12)
plt.xlabel('Log Unemployment Rate', fontsize=12)
plt.title('Beveridge Curve 1970-1989', fontsize=14)

In [None]:

plt.figure(figsize = (8,8))
plt.plot(log_unempl_rate_q, log_vacancy_rate_q, linewidth=1, color='grey')
plt.plot(log_unempl_rate_q.loc['1990Q1':'2009Q4'],log_vacancy_rate_q.loc['1990Q1':'2009Q4'], linewidth=3, color='orange')

plt.annotate('1990', (log_unempl_rate_q.loc['1990Q1'], log_vacancy_rate_q.loc['1990Q1']))
plt.annotate('2009', (log_unempl_rate_q.loc['2009Q4'], log_vacancy_rate_q.loc['2009Q4']))

plt.gca().spines["bottom"].set_linewidth(1.5)
plt.gca().spines["bottom"].set_color('k')
plt.gca().spines["left"].set_linewidth(1.5)
plt.gca().spines["left"].set_color('k')
plt.xlim(-3.8, -2.1)
plt.ylim(-4.3, -2.95)
plt.ylabel('Log Vacancy Rate', fontsize=12)
plt.xlabel('Log Unemployment Rate', fontsize=12)
plt.title('Beveridge Curve 1990-2009', fontsize=14)

In [None]:

plt.figure(figsize = (8,8))
plt.plot(log_unempl_rate_q, log_vacancy_rate_q, linewidth=1, color='grey')
plt.plot(log_unempl_rate_q.loc['2010Q1':'2019Q4'],log_vacancy_rate_q.loc['2010Q1':'2019Q4'], linewidth=3, color='orange')

plt.annotate('2010', (log_unempl_rate_q.loc['2010Q1'], log_vacancy_rate_q.loc['2010Q1']))
plt.annotate('2019', (log_unempl_rate_q.loc['2019Q4'], log_vacancy_rate_q.loc['2019Q4']))

plt.gca().spines["bottom"].set_linewidth(1.5)
plt.gca().spines["bottom"].set_color('k')
plt.gca().spines["left"].set_linewidth(1.5)
plt.gca().spines["left"].set_color('k')
plt.xlim(-3.8, -2.1)
plt.ylim(-4.3, -2.95)
plt.ylabel('Log Vacancy Rate', fontsize=12)
plt.xlabel('Log Unemployment Rate', fontsize=12)
plt.title('Beveridge Curve 2010-2019', fontsize=14)