In [60]:
# Yash Singh 
# Date: 9/6/24 
# goal: this script generates our historical data set used later for further analysis 
# the raw data consists of stock of vacancies, employed, unemployed workers, and the price index 

####################################
# 1) vacacy rate: V / E + U 
# 2) unemployment rate: U / E + U 
# 3) inflation (12 month change)
#######################################

# Specify directories 
data_dir = "C:/Users/singhy/Desktop/Chicago/cps_data/inflation/raw_data"
output_dir = "C:/Users/singhy/Desktop/Chicago/cps_data/inflation/output"


# Necessary Packages 

import numpy as np 
import pandas as pd 

########################################################################################
# Necessary Raw Data Sets 

# Stock of Vacancies 
vacancy = pd.read_excel(f"{data_dir}/barnichon/CompositeHWI.xlsx")

# Stock of Employed and Unemployed Workers 
#stocks = pd.read_excel(f"{data_dir}/fred_employment/employment.xls", engine='xlrd')
stocks = pd.read_csv(f"{data_dir}/fred_employment/employment_v2.csv")

# jolts 
#jolts = pd.read_excel(f"{data_dir}/JOLTS/jolts_level.xls", engine='xlrd')

jolts = pd.read_csv(f"{data_dir}/JOLTS/jolts_level_v2.csv")

# Consumer Price Index 
#cpi = pd.read_excel(f"{data_dir}/CPI/CPIAUCSL.xls", engine='xlrd')

cpi = pd.read_csv(f"{data_dir}/CPI/CPIAUCSL.csv")


In [61]:
jolts

Unnamed: 0,observation_date,JTSQUL,JTSJOL,JTSHIL,JTSLDL
0,2000-12-01,2882,5088,5426,2018
1,2001-01-01,3245,5234,5722,2220
2,2001-02-01,3053,5097,5303,1855
3,2001-03-01,3054,4762,5528,2133
4,2001-04-01,3163,4615,5204,1883
...,...,...,...,...,...
283,2024-07-01,3243,7711,5416,1713
284,2024-08-01,3178,7861,5435,1668
285,2024-09-01,3098,7372,5582,1802
286,2024-10-01,3283,7839,5394,1748


In [62]:
jolts = pd.read_csv(f"{data_dir}/JOLTS/jolts_level_v3.csv")

In [63]:
jolts

Unnamed: 0,observation_date,JTSJOL,JTSQUL,JTSHIL,JTSLDL
0,2000-12-01,5088,2882,5426,2018
1,2001-01-01,5234,3245,5722,2220
2,2001-02-01,5097,3053,5303,1855
3,2001-03-01,4762,3054,5528,2133
4,2001-04-01,4615,3163,5204,1883
...,...,...,...,...,...
284,2024-08-01,7861,3178,5435,1668
285,2024-09-01,7372,3098,5582,1802
286,2024-10-01,7839,3283,5394,1748
287,2024-11-01,8156,3130,5373,1800


In [50]:
# Basic Processing of historical vacancies 
vacancy.columns = ['date', 'V', 'V_rate']  
vacancy = vacancy.iloc[8:].reset_index(drop=True)
vacancy = vacancy.dropna(subset=['date', 'V'])
vacancy = vacancy.drop(['V_rate'], axis = 1)
vacancy['V'] = vacancy['V'].astype(float)

In [51]:
# Convert decimal year to datetime
def decimal_to_datetime(decimal_year):
    year = int(decimal_year)
    fraction = decimal_year - year
    month = int(round(fraction * 12)) + 1  # Adjusting based on your clarification
    if month > 12:
        year += 1
        month = 1
    return pd.Timestamp(year=year, month=month, day=1)

vacancy['date'] = vacancy['date'].apply(decimal_to_datetime)

In [52]:
vacancy.head(12)

Unnamed: 0,date,V
0,1951-02-01,2295.689006
1,1951-03-01,2554.677439
2,1951-04-01,2502.487739
3,1951-05-01,2534.983959
4,1951-06-01,2392.152707
5,1951-07-01,2388.251431
6,1951-08-01,2404.265856
7,1951-09-01,2478.510656
8,1951-10-01,2472.520638
9,1951-11-01,2430.5735


In [53]:

# Basic Processing of stocks 
stocks.columns = ['date', 'E', 'U']
#stocks = stocks.iloc[11:].reset_index(drop=True)
stocks = stocks.dropna(subset=['date', 'E', 'U'])
stocks['date'] = pd.to_datetime(stocks['date'])
stocks['U'] = stocks['U'].astype(float)


# CPI-U 
cpi = cpi.iloc[10:].reset_index(drop=True)

cpi = cpi.rename(columns={
                            'observation_date': 'date', 
                            'CPIAUCSL':               'P'
})

cpi['date'] = pd.to_datetime(cpi['date'])
cpi['P'] = pd.to_numeric(cpi['P'], errors='coerce')
cpi['P_12m_change'] = cpi['P'].pct_change(periods=12) * 100

# JOLTS 
jolts.columns = ['date', 'vacancy_stock', 'tot_quits', 'tot_hires', 'tot_layoffs']
#jolts = jolts.iloc[13:].reset_index(drop=True)
jolts['date'] = pd.to_datetime(jolts['date'])


temp = stocks.merge(cpi, on = ['date'], how = 'inner')
temp2 = temp.merge(jolts, on = ['date'], how='outer')
final = temp2.merge(vacancy, on = ['date'], how = 'outer')
final = final[(final['date'] >= '1951-01-01')]

final['V'] = final['V'].fillna(final['vacancy_stock'])

final['date'] = pd.to_datetime(final['date'])

# Create key variables 
final['L'] = final['E'] + final['U']

final['U_rate'] = (final['U'] / final['L']) * 100 
final['V_rate'] = (final['V'] / final['L']) * 100


final['tightness'] = final['V'] / final['U']
final['ln_tightness'] = np.log(final['V']) - np.log(final['U'])

# Keep main variables 

keep = ['date', 'P_12m_change', 'V', 'U', 'U_rate', 'V_rate', 'tightness', 'ln_tightness']
final = final[keep]

final.to_csv(f"{output_dir}/data/historical_data_feb.csv", index=False)

In [54]:
final = final[final['date'] >= '2000-12-01']

In [55]:
final.head(24)

Unnamed: 0,date,P_12m_change,V,U,U_rate,V_rate,tightness,ln_tightness
635,2000-12-01,3.436019,5088.0,5634.0,3.933039,3.551882,0.903088,-0.101935
636,2001-01-01,3.721205,5234.0,6023.0,4.188427,3.639752,0.869002,-0.14041
637,2001-02-01,3.529412,5097.0,6089.0,4.23727,3.546948,0.837083,-0.177832
638,2001-03-01,2.982456,4762.0,6141.0,4.266835,3.308691,0.775444,-0.25432
639,2001-04-01,3.218256,4615.0,6271.0,4.367904,3.21446,0.735927,-0.306624
640,2001-05-01,3.563084,4425.0,6226.0,4.344186,3.08754,0.710729,-0.341464
641,2001-06-01,3.193961,4361.0,6484.0,4.522974,3.042056,0.672579,-0.396636
642,2001-07-01,2.721482,4447.0,6583.0,4.582539,3.095633,0.675528,-0.392261
643,2001-08-01,2.721482,4024.0,7042.0,4.914749,2.808428,0.571429,-0.559616
644,2001-09-01,2.592166,4071.0,7142.0,4.960136,2.827319,0.570008,-0.562104
