In [None]:
import pandas as pd
import numpy as np

In [None]:
maindir= '/path/to/project/'
rawdir = maindir + 'rawdata_jfi_fin/capiq/'
outdir = maindir + 'data_jfi_fin/'

In [None]:
def clean_capiq(filepath, var):
    
    df = pd.read_excel(filepath, sheet_name=var, header=2)
    
    # keep relevant rows
    mask = (df['SP_ENTITY_ID'].notnull()) & (df['IQ_GVKEY'].notnull()) & (df['SP_CIQ_ID'].notnull())
    df = df[mask].reset_index(drop=True)

    # Expand out GVKEY
    df['gvkey'] = df['IQ_GVKEY'].str.split(',')
    df_expanded = df.explode('gvkey').drop(['IQ_GVKEY'], axis=1)
    
    # Create indicators for company status
    acquired_status = ['Acquired']
    liquidated_status = ['Out of Business', 'Liquidating', 'Reorganizing', 'No Longer Investing']
    
    df_expanded['iq_acquired'] = df_expanded['SP_COMPANY_STATUS'].isin(acquired_status).astype(int)
    df_expanded['iq_liquidated'] = df_expanded['SP_COMPANY_STATUS'].isin(liquidated_status).astype(int)
    
    # Define aggregation rules
    aggregation_rules = {
        col: 'max' if np.issubdtype(df_expanded[col].dtype, np.number) else 'first'
        for col in df_expanded.columns if col != 'gvkey'
    }

    # Group by GVKEY and aggregate
    df_unique = df_expanded.groupby('gvkey').agg(aggregation_rules).reset_index()
    
    # Reshape to long format
    df_long = pd.wide_to_long(df_unique, stubnames='FQ', i='gvkey', j='fyq_str').reset_index()
    
    # Select and rename relevant columns
    df_long = df_long[['gvkey', 'fyq_str', 'iq_acquired', 'iq_liquidated', 'FQ']]
    df_long.rename(columns={'FQ': var}, inplace=True)
    
    # Construct fiscal quarter variables
    df_long['fyq_str'] = df_long['fyq_str'].astype(str)
    df_long['fyearq'] = df_long['fyq_str'].str[1:]
    df_long['fqtr'] = df_long['fyq_str'].str[0]
    df_long['gvkey'] = df_long['gvkey'].str[3:]
    df_long.drop(['fyq_str'], axis=1, inplace=True)
    
    return df_long

In [None]:
# extract excel files

filepath = rawdir + 'CapIQ_CreditLine_Vars.xlsx'
iq_rc = clean_capiq(filepath, 'IQ_RC')
iq_cp = clean_capiq(filepath, 'IQ_CP')
iq_tl = clean_capiq(filepath, 'IQ_TERM_LOANS')
iq_undrawn_credit = clean_capiq(filepath, 'IQ_UNDRAWN_CREDIT')
iq_undrawn_rc = clean_capiq(filepath, 'IQ_UNDRAWN_RC')
iq_undrawn_tl = clean_capiq(filepath, 'IQ_UNDRAWN_TL')
iq_undrawn_cp = clean_capiq(filepath, 'IQ_UNDRAWN_CP')

In [None]:
# merge files

dfmain = iq_rc

dfmain = dfmain.merge(iq_cp[['gvkey','fyearq','fqtr','IQ_CP']], on=['gvkey','fyearq','fqtr'], validate='1:1', how='left')
dfmain = dfmain.merge(iq_tl[['gvkey','fyearq','fqtr','IQ_TERM_LOANS']], on=['gvkey','fyearq','fqtr'], validate='1:1', how='left')
dfmain = dfmain.merge(iq_undrawn_credit[['gvkey','fyearq','fqtr','IQ_UNDRAWN_CREDIT']], on=['gvkey','fyearq','fqtr'], validate='1:1', how='left')
dfmain = dfmain.merge(iq_undrawn_rc[['gvkey','fyearq','fqtr','IQ_UNDRAWN_RC']], on=['gvkey','fyearq','fqtr'], validate='1:1', how='left')
dfmain = dfmain.merge(iq_undrawn_tl[['gvkey','fyearq','fqtr','IQ_UNDRAWN_TL']], on=['gvkey','fyearq','fqtr'], validate='1:1', how='left')
dfmain = dfmain.merge(iq_undrawn_cp[['gvkey','fyearq','fqtr','IQ_UNDRAWN_CP']], on=['gvkey','fyearq','fqtr'], validate='1:1', how='left')


In [None]:
# reorder
dfmain = dfmain[[
    'gvkey','fyearq','fqtr','iq_acquired','iq_liquidated',
    'IQ_RC','IQ_CP','IQ_TERM_LOANS','IQ_UNDRAWN_CREDIT',
    'IQ_UNDRAWN_RC','IQ_UNDRAWN_TL','IQ_UNDRAWN_CP'
                ]]

In [None]:
# formatting
collist = [
    'IQ_RC','IQ_CP','IQ_TERM_LOANS','IQ_UNDRAWN_CREDIT',
    'IQ_UNDRAWN_RC','IQ_UNDRAWN_TL','IQ_UNDRAWN_CP'    
]
for c in collist:
    dfmain[c] = dfmain[c].astype(float) 

dfmain['fyearq'] = dfmain['fyearq'].astype(int)
dfmain['fqtr'] = dfmain['fqtr'].astype(int)

dfmain.columns = [x.lower() for x in dfmain.columns]

In [None]:
dfmain.info()

In [None]:
filepath = outdir + 'CapIQ_CreditLine_Vars_processed.dta'
dfmain.to_stata(filepath, write_index=False)

In [None]:
dfmain.head()