In [None]:
import pandas as pd
import numpy as np

In [None]:
maindir= '/Users/kenteoh/Dropbox/debt_covenant/'
rawdir = maindir + 'rawdata_jfi_fin/capiq/'
outdir = maindir + 'data_jfi_fin/'

In [None]:
## read data -------------------
filepath = rawdir + 'CapIQ_Bankruptcy_Dates.xlsx'
df = pd.read_excel(filepath, sheet_name="IQ_BR", header=2)

# keep relevant rows
mask = (df['SP_ENTITY_ID'].notnull()) & (df['IQ_GVKEY'].notnull()) & (df['SP_CIQ_ID'].notnull())
df = df[mask].reset_index(drop=True)

# Expand out GVKEY
df['gvkey'] = df['IQ_GVKEY'].str.split(',')
df_expanded = df.explode('gvkey').drop(['IQ_GVKEY'], axis=1)

# keep only observations with bankruptcy date 
mask = (df_expanded['IQ_TR_BNKY_VOL_PETITION_FILED_DATE'].notnull())\
    | (df_expanded['IQ_TR_BNKY_INVOL_PETITION_FILED_DATE'].notnull())\
    | (df_expanded['IQ_TR_BNKY_EMERGED_REORG_DATE'].notnull())\
    | (df_expanded['IQ_TR_BNKY_LIQUIDATED_DATE'].notnull())\
    | (df_expanded['IQ_TR_BNKY_DISMISSED_DATE'].notnull())
df_expanded = df_expanded[mask].reset_index(drop=True)        

## get bankruptcy dates---------
# get date bankruptcy petition filed and resolved (if any)
varlist = ['IQ_TR_BNKY_VOL_PETITION_FILED_DATE','IQ_TR_BNKY_INVOL_PETITION_FILED_DATE','IQ_TR_BNKY_EMERGED_REORG_DATE','IQ_TR_BNKY_LIQUIDATED_DATE','IQ_TR_BNKY_DISMISSED_DATE']
for v in varlist:
    df_expanded[v] = pd.to_datetime(df_expanded[v], errors='coerce')
df_expanded['bankruptcy_filing_date'] = df_expanded[['IQ_TR_BNKY_VOL_PETITION_FILED_DATE', 'IQ_TR_BNKY_INVOL_PETITION_FILED_DATE']].min(axis=1)
df_expanded['bankruptcy_resolution_date'] = df_expanded[['IQ_TR_BNKY_EMERGED_REORG_DATE', 'IQ_TR_BNKY_LIQUIDATED_DATE', 'IQ_TR_BNKY_DISMISSED_DATE']].min(axis=1)

In [None]:
## handle duplicates ----------------
# keep relevant variables
df_expanded = df_expanded[['gvkey','bankruptcy_filing_date','bankruptcy_resolution_date']]

# handle duplicated gvkeys (none in this case)
aggregation_rules = {
    col: 'max' if np.issubdtype(df_expanded[col].dtype, np.number) else 'min'
    for col in df_expanded.columns if col != 'gvkey'
}
df_unique = df_expanded.groupby('gvkey').agg(aggregation_rules).reset_index()
df_unique['gvkey'] = df_unique['gvkey'].str[3:]

In [None]:
## split to two datasets----------
df_filing = df_unique[['gvkey','bankruptcy_filing_date']]
df_filing['gvkey'] = df_filing['gvkey'].astype(int)
df_filing['bankruptcy_filing_yq'] = (df_filing['bankruptcy_filing_date'].dt.year - 1960) * 4 + df_filing['bankruptcy_filing_date'].dt.quarter - 1
# df_filing['datecq'] =  df_filing['bankruptcy_filing_date'].dt.to_period('Q').astype(str).str.replace(r'(\d+)-Q(\d)', r'\1Q\2', regex=True)
df_filing.drop(['bankruptcy_filing_date'], axis=1, inplace=True)

df_resolution = df_unique[['gvkey','bankruptcy_resolution_date']]
df_resolution['gvkey'] = df_resolution['gvkey'].astype(int)
df_resolution = df_resolution[df_resolution['bankruptcy_resolution_date'].notnull()]
df_resolution['bankruptcy_resolution_yq'] = (df_resolution['bankruptcy_resolution_date'].dt.year - 1960) * 4 + df_resolution['bankruptcy_resolution_date'].dt.quarter - 1
# df_resolution['datecq'] =  df_resolution['bankruptcy_resolution_date'].dt.to_period('Q').astype(str).str.replace(r'(\d+)-Q(\d)', r'\1Q\2', regex=True)
df_resolution.drop(['bankruptcy_resolution_date'], axis=1, inplace=True)

# save data
df_filing.to_stata(outdir+'CapIQ_bankruptcy_filing.dta', write_index=False)
df_resolution.to_stata(outdir+'CapIQ_bankruptcy_resolution.dta', write_index=False)


In [None]:
print(df_filing.shape[0])
print(df_resolution.shape[0])