Builds a borrower–lender panel from Dealscan to examine covenant-related lending patterns.


In [None]:
# this file analyzes Dealscan at the borrower-lender level

import joblib
import pandas as pd
import warnings
import re
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_colwidth', None)
# pd.set_option('display.max_rows', None)
warnings.filterwarnings('ignore')
tqdm.pandas()

datdir = '/path/to/project/'

In [None]:
# read Dealscan data

dat_df = joblib.load(datdir+'rawdata/dealscan/dealscan_new.pkl')

dat_df.set_index(['lpc_tranche_id','tranche_active_date'], inplace=True)
dat_df.sort_index(inplace=True)
dat_df.reset_index(inplace=True)

In [None]:
# keep relevant variables
dscan_df = dat_df[[
        'lender_id','borrower_id','lpc_deal_id','deal_input_date','deal_active_date','lpc_tranche_id','tranche_active_date',
        'lender_parent_name','lender_parent_id','number_of_lenders','lender_name','tranche_maturity_date', 
        'deal_active', 'primary_role', 'additional_roles','deal_amount','deal_amount_converted','tranche_amount',
        'deal_purpose','lender_commit','lender_share','lead_arranger','lead_left','number_of_lead_arrangers','number_of_lead_left',
        'tranche_o_a', 'lender_institution_type', 'tranche_type', 'sic_code','market_segment',
        'base_reference_rate','all_in_spread_drawn_bps','all_in_spread_undrawn_bps'
        ]]
# del dat_df

In [None]:
# for multiple deal_input_dates, keep the earliest
dscan_df.sort_values(['lender_id','borrower_id','lpc_deal_id','deal_active_date','lpc_tranche_id','tranche_active_date','deal_input_date'], inplace=True)
dscan_df.drop_duplicates(['lender_id','borrower_id','lpc_deal_id','deal_active_date','lpc_tranche_id','tranche_active_date'],keep='first',inplace=True)
dscan_df.reset_index(drop=True,inplace=True)
dscan_df.drop(['deal_input_date'],axis=1,inplace=True)

In [None]:
# dscan_df['ncount'] = dscan_df.groupby(['lender_id','borrower_id','lpc_deal_id','deal_input_date','deal_active_date','lpc_tranche_id'])['lpc_tranche_id'].transform('count')
# count number of types the column 'tranche_o_a' takes value 'Origination' by group
# dscan_df['norig'] = dscan_df.groupby(['lender_id','borrower_id','lpc_deal_id','deal_input_date','deal_active_date','lpc_tranche_id'])['tranche_o_a'].transform(lambda x: (x=='Origination').sum())

In [None]:
# housekeeping
dscan_df['sic_code'] = dscan_df['sic_code'].str.extract('(\d+)', expand=False)
dscan_df['lender_institution_type'] = dscan_df['lender_institution_type'].fillna('')
dscan_df['tranche_type'] = dscan_df['tranche_type'].fillna('') 
dscan_df['market_segment'] = dscan_df['market_segment'].fillna('')

In [None]:
## identify lead arranger

# check 1: identify lead arranger based on whether name of company is in list of "lead arranger"
def clean_string(s):
    s = re.sub(r'[^A-Za-z\s]', '', s)  # Remove numbers and punctuations
    s = re.sub(r'\([^)]*\)', '', s)   # Remove terms in parentheses
    s = re.sub(r'\[[^\]]*\]', '', s)   # Remove terms in brackets
    return s.strip()

def check_lead(row):
    is_lead = 0
    lender_name = clean_string(row['lender_name'])
    if row['lead_arranger']:
        lead_arrangers = row['lead_arranger'].split(', ')
        for lead in lead_arrangers:
            lead = clean_string(lead)
            if fuzz.ratio(lender_name.lower(), lead.lower()) >= 90:
                is_lead=1
    return is_lead 

dscan_df['is_lead'] = dscan_df.progress_apply(check_lead, axis=1)

# # check 2: identify based on role (removed - Apr 22, 2024)
# # # relevant_terms = ["Mandated Lead arranger","Lead arranger","Lead manager","Co-lead manager","Co-lead arranger","Expanded lead manager","Lead bank","Lead Left","Senior co-lead manager","Joint lead manager","Extended lead manager"]
# relevant_terms = ["Mandated Lead arranger","Lead arranger","Co-lead arranger","Mandated arranger","Coordinating arranger"]
# dscan_df['primary_role'] = dscan_df['primary_role'].fillna('')
# dscan_df['additional_roles'] = dscan_df['additional_roles'].fillna('')
# dscan_df['all_roles'] = dscan_df['primary_role']+', '+dscan_df['additional_roles']
# dscan_df.drop(columns=['primary_role','additional_roles'], inplace=True)
# dscan_df['is_lead_2'] = dscan_df.progress_apply(lambda x: 1 if re.search('|'.join(relevant_terms), x['all_roles']) else 0, axis=1)

# final classification is the max of both labels
# dscan_df['is_lead'] = dscan_df[['is_lead_1', 'is_lead_2']].max(axis=1)
sum_df = dscan_df.groupby(['lpc_tranche_id','tranche_active_date'])['is_lead'].sum().reset_index()
dscan_df = dscan_df.merge(sum_df, on=['lpc_tranche_id','tranche_active_date'], suffixes=('', '_sum'))
# dscan_df.drop(columns=['is_lead_1','is_lead_2'], inplace=True)


In [None]:
#  check dealid 21507 -- how many lead arrangers?

# dscan_df.loc[dscan_df['lpc_deal_id']=='21507','lead_arranger'].values[0]
# dscan_df.loc[dscan_df['lpc_deal_id']=='21507']

In [None]:
## identify banks vs non-banks (see Elliot, Meisenzahl, Peydro, 2021)

# check lender_institution_type 
bank_terms = ['African Bank', 'Asia-Pacific Bank', 'Asian Bank', 'Australian/New Zealand Bank', 'Canadian Bank', 'East. Europe/Russian Bank', 'European Bank', 'Export-Import Bank/ECA', 
         'Farm Credit Bank', 'Foreign Bank', 'Japanese Bank', 'Latin America & Caribbean Bank', 'Middle Eastern Bank',  'Mortgage Bank', 'Thrift / S&L', 'US Bank', 'Western European Bank']

def check_nonbank(row):
    if row['lender_institution_type']=='':
        return {'is_bank':-1, 'is_instinv':-1, 'is_mf':-1, 'institution_type': []}

    # split string and count number of terms
    type_terms = row['lender_institution_type'].split(',')
    type_terms = [x.strip() for x in type_terms]

    # check for non bank terms
    is_bank = 0
    is_institution = 0
    is_mutual_fund = 0

    for term in type_terms:
        if term in bank_terms and len(term)>0: # if any bank term present, then it is a bank
            is_bank = 1
        if re.search('Inst. Invest.', term):
            is_institution = 1
        if re.search('Mutual Fund', term):
            is_mutual_fund = 1
        
    return {'is_bank':is_bank, 'is_instinv':is_institution, 'is_mf':is_mutual_fund, 'institution_type': type_terms}

out = dscan_df.progress_apply(check_nonbank, axis=1)
out = pd.DataFrame(out.tolist())
dscan_df = pd.concat([dscan_df, out], axis=1)

def unique_values(x):
    result = set()
    for sublist in x:
        result.update(sublist)
    return list(result)

# aggregate non_bank indicator by lender id (to handle inconsistent labeling of lender_institution_type)
aux = dscan_df[['lender_id','lender_name','is_bank','is_instinv','is_mf','institution_type']].groupby('lender_id').agg(
    {'lender_name':'first',
    'is_bank':['max','size'], 
    'is_instinv':'max',
    'is_mf':'max',
    'institution_type':unique_values}
).reset_index()
aux.columns = ['lender_id','lender_name','is_bank','n_tranche','is_instinv','is_mf','institution_type']
# aux['lender_institution_type'] = aux['lender_institution_type'].str.split(',').apply(lambda x : ', '.join(set([e.strip() for e in x])))
# aux.to_csv(datdir+'temp.txt', index=False, sep='|')
# aux.drop(['lender_institution_type'],axis=1).to_csv(datdir+'/data/dealscan_lenderlist.txt', index=False, sep='|')

# classify lenders as bank/nonbank using aggregated list
dscan_df.drop(['is_bank','is_instinv','is_mf','lender_institution_type','institution_type'], axis=1, inplace=True)
dscan_df = dscan_df.merge(aux[['lender_id','is_bank','is_instinv','is_mf','institution_type']], on='lender_id', how='left')


In [None]:
# identify type of loan (see Demiroglu and James, 2016)

# 1. traditional bank loans are term A loans or credit lines funded only by commercial banks or insurance companies
trancheA_terms = ['Term Loan A', 'Revolver/Line >= 1 Yr.', 'Revolver/Line < 1 Yr.']
commercial_lender_terms = ['African Bank', 'Asia-Pacific Bank', 'Asian Bank', 'Australian/New Zealand Bank', 'Canadian Bank', 'East. Europe/Russian Bank', 'European Bank', 'Export-Import Bank/ECA', 
         'Farm Credit Bank', 'Foreign Bank', 'Japanese Bank', 'Latin America & Caribbean Bank', 'Middle Eastern Bank',  'Mortgage Bank', 'Thrift / S&L', 'US Bank', 'Western European Bank', "Insurance Company"]
inst_lender_terms = ['Inst. Invest. CDO', 'Inst. Invest. Hedge Fund', 'Private Equity','Mutual Fund']
private_lender_terms = ['Investment Bank', 'Finance Company']
def check_loan_type(row):

    # 0. identify institution type
    is_commercial_bank, is_inst_inv, is_inv_bank = 1, 0, 0
    for term in row['institution_type']:
        if term not in commercial_lender_terms: # commercial banks or insurance company with no additional classification
            is_commercial_bank = 0
        if term in inst_lender_terms:
            is_inst_inv = 1
        if term in private_lender_terms:
            is_inv_bank = 1

    # 1. identify tranche type
    is_termA = 0
    for term in trancheA_terms:
        if re.search(term, row['tranche_type']):
            is_termA = 1
            break   
    is_termB = 0
    if re.search('Term Loan B', row['tranche_type']):
        is_termB = 1

    # 2. classify loans 
    trad_bank_loan, inst_loan, private_loan, inst_loan_ib = 0,0,0,0
    if is_termA and is_commercial_bank:
            trad_bank_loan = 1
    if is_termB or (is_inst_inv and not is_termB and not is_inv_bank):
        inst_loan = 1            
    if is_termA and is_inv_bank and not is_inst_inv:
        private_loan = 1
    if is_termB or (not is_inst_inv and not is_termB and is_inv_bank):
        inst_loan_ib = 1   

    # 3. Berlin, Nini, Yu institutional classification
    inst_loan_bny = 0
    # if the loan is Term Loan B, C, or D, then inst_loan_bny = 1
    if re.search('Term Loan [B-D]', row['tranche_type']):
        inst_loan_bny = 1

    return {'trad_bank_loan':trad_bank_loan, "inst_loan": inst_loan, 
            "inst_loan_ib": inst_loan_ib, "private_loan": private_loan, 
            "termA": is_termA, "termB": is_termB, "inst_loan_bny": inst_loan_bny}

out = dscan_df.progress_apply(check_loan_type, axis=1)
out = pd.DataFrame(out.tolist())
dscan_df = pd.concat([dscan_df, out], axis=1)

# # use value at origination
# def replace_values(group):
#     origination_row = group[group['tranche_o_a'] == 'Origination']
#     if not origination_row.empty:
#         group['trad_bank_loan'] = origination_row['trad_bank_loan'].values[0]
#         group['inst_loan'] = origination_row['inst_loan'].values[0]
#         group['inst_loan_ib'] = origination_row['inst_loan_ib'].values[0]
#         group['private_loan'] = origination_row['private_loan'].values[0]
#         group['termA'] = origination_row['termA'].values[0]
#         group['termB'] = origination_row['termB'].values[0]
#         group['inst_loan_bny'] = origination_row['inst_loan_bny'].values[0]
#     else:
#         group['trad_bank_loan'] = 0
#         group['inst_loan'] = 0
#         group['inst_loan_ib'] = 0
#         group['private_loan'] = 0
#         group['termA'] = 0
#         group['termB'] = 0
#         group['inst_loan_bny'] = 0
#     return group
# Apply the custom function within each group
# dscan_df = dscan_df.groupby(['lpc_tranche_id']).progress_apply(replace_values).reset_index(drop=True)

In [None]:
# extract covlite indicator

def read_marketsegment(row_in):
    
    leverage, covlite, institutional = 0, 0 , 0
    
    if re.search('leverage',row_in['market_segment'], re.IGNORECASE):
        leverage = 1
    if re.search('covenant lite', row_in['market_segment'], re.IGNORECASE):
        covlite = 1
    if re.search('institutional', row_in['market_segment'], re.IGNORECASE):
        institutional = 1
        
    return {'levloan': leverage, 'covlite': covlite, 'institutional': institutional}

out = dscan_df.progress_apply(read_marketsegment, axis=1)
out = pd.DataFrame(out.tolist())
dscan_df = pd.concat([dscan_df, out], axis=1)

In [None]:
## merge with lpc connector crosswalk

id_df = joblib.load(datdir+'rawdata/dealscan/lpc_loanconnector_company_id_map.pkl')
# id_df = joblib.load(datdir+'\\lpc_loanconnector_company_id_map.pkl')
id_df.rename({'loanconnector_company_id':'borrower_id'}, axis=1, inplace=True)

# adjust data type
dscan_df['tranche_active_date'] = pd.to_datetime(dscan_df['tranche_active_date'], errors='coerce')
dscan_df['tranche_maturity_date'] = pd.to_datetime(dscan_df['tranche_maturity_date'], errors='coerce')
dscan_df['deal_active_date'] = pd.to_datetime(dscan_df['deal_active_date'], errors='coerce')

# merge with old company identifiers
dscan_df = dscan_df.merge(id_df, on='borrower_id', how='outer', indicator=True, validate='many_to_one')
print(dscan_df._merge.value_counts())
dscan_df = dscan_df[dscan_df._merge=='both'].reset_index(drop=True) # keep only those with exact merges
dscan_df.drop(['_merge'],axis=1,inplace=True)

In [None]:
## merge with Chava-Roberts link table

cv_df = pd.read_excel(datdir+"rawdata/dealscan/Dealscan-Compustat_Linking_Database.xlsx", sheet_name='link_data')
# cv_df = pd.read_excel(datdir+"\\Dealscan-Compustat_Linking_Database.xlsx", sheet_name='link_data')
cv_df = cv_df[['bcoid','gvkey','facstartdate','fic']].drop_duplicates(subset=['bcoid','facstartdate'])
cv_df.rename({'bcoid': 'lpc_company_id','facstartdate':'tranche_active_date'}, axis=1, inplace=True)

dscan_df['lpc_company_id'] = dscan_df['lpc_company_id'].astype(int)
dscan_df = dscan_df.merge(cv_df, on=['lpc_company_id','tranche_active_date'], how='left', indicator=True, validate='many_to_one')
print(dscan_df._merge.value_counts())
print(dscan_df.gvkey.isna().sum())

In [None]:
# carry forward gvkey values for those with previous match (assume no change)
# dscan_df.reset_index(inplace=True, drop=False)
dscan_df.set_index(['lpc_company_id'], inplace=True)
dscan_df.sort_index(inplace=True)

# carry backward gvkey values for those with future match 
dscan_df['gvkey'] = dscan_df['gvkey'].groupby(level=0).fillna(method='ffill')
dscan_df['gvkey'] = dscan_df['gvkey'].groupby(level=0).fillna(method='bfill')

# formatting
dscan_df.reset_index(inplace=True)
dscan_df.drop(['_merge'],axis=1,inplace=True)

dscan_df.gvkey.isna().sum()

In [None]:
# adjust contract maturity date for loan amendments

dscan_df = dscan_df.sort_values(by=['lender_id','borrower_id','lpc_deal_id','deal_active_date','lpc_tranche_id', 'tranche_active_date'])

dscan_df['adjusted_maturity_date'] = dscan_df.groupby(['lender_id','borrower_id','lpc_deal_id','deal_active_date','lpc_tranche_id'])['tranche_active_date'].shift(-1)
dscan_df['adjusted_maturity_date'] = dscan_df.progress_apply(
    lambda row: min(row['tranche_maturity_date'], row['adjusted_maturity_date']) # take whichever is earlier
    if pd.notnull(row['adjusted_maturity_date']) else row['tranche_maturity_date'],
    axis=1
)

dscan_df['diff_'] = dscan_df['adjusted_maturity_date'] - dscan_df['tranche_maturity_date']
dscan_df['diff_'] = dscan_df['diff_'].dt.days
dscan_df['early_nego'] = np.where(dscan_df['diff_']<0, 1, 0)
dscan_df.drop(['diff_'], axis=1, inplace=True)

In [None]:
# flag deals with revolver tranches

dscan_df['has_revolver'] = dscan_df['tranche_type'].apply(lambda x: 1 if re.search('Revolver', x) else 0)
dscan_df['has_termloan'] = dscan_df['tranche_type'].apply(lambda x: 1 if re.search('Term Loan', x) else 0)

In [None]:
pd.set_option('display.max_rows', None)
dscan_df.loc[dscan_df['lpc_deal_id']=='21507',['lender_id','lender_name','lpc_deal_id','lpc_tranche_id','tranche_o_a','tranche_active_date','tranche_maturity_date','tranche_type','adjusted_maturity_date','early_nego','is_lead','has_revolver','has_termloan','all_in_spread_drawn_bps']].sort_values(by=['tranche_type','lender_id','tranche_active_date'])

In [None]:
# deal-lender with the earliest active date and the latest maturity date
dscan_df = dscan_df.sort_values(by=['lender_id','borrower_id','lpc_deal_id','deal_active_date','lpc_tranche_id', 'tranche_active_date'])
deal_lender_df = dscan_df.groupby(['lender_id','borrower_id','lpc_deal_id','tranche_active_date']).agg(
    # earliest_active_date=('tranche_active_date', 'min'), # earliest tranche lender participates
    latest_maturity_date=('adjusted_maturity_date', 'max'), # latest maturity lender participates 
    early_nego=('early_nego', 'max'), # whether lender participated in early negotiation
    lender_parent_name=('lender_parent_name', 'first'),  
    lender_parent_id=('lender_parent_id', 'first'),            
    lender_name=('lender_name', 'first'),
    gvkey=('gvkey', 'first'),
    sic_code=('sic_code', 'first'),
    deal_active=('deal_active', 'last'), # whether last deal is active or not
    lead_arranger=('is_lead', 'max'),
    is_bank=('is_bank', 'max'),
    is_instinv=('is_instinv', 'max'),
    is_mf=('is_mf', 'max'),
    deal_amount=('deal_amount', 'first'),
    deal_amount_converted=('deal_amount_converted', 'first'),
    tranche_o_a=('tranche_o_a', 'first'),
    has_revolver=('has_revolver', 'max'),
).reset_index()

In [None]:
# borrower-loan dataset
loan_df = dscan_df.groupby(['lpc_deal_id','tranche_active_date']).agg(
    # earliest_active_date=('tranche_active_date', 'min'), # earliest tranche lender participates
    latest_maturity_date=('adjusted_maturity_date', 'max'), # latest maturity lender participates 
    orig_maturity_date=('tranche_maturity_date', 'min'), # original maturity date  
    early_nego=('early_nego', 'max'), # whether deal was renegotiated early maturity date       
    gvkey=('gvkey', 'first'),
    borrower_id=('borrower_id', 'first'),
    sic_code=('sic_code', 'first'),
    trad_bank_loan=('trad_bank_loan', 'max'),
    inst_loan=('inst_loan', 'max'),
    private_loan=('private_loan', 'max'),
    inst_loan_ib=('inst_loan_ib', 'max'),
    termA=('termA', 'max'),
    termB=('termB', 'max'),
    levloan=('levloan', 'max'),
    covlite=('covlite', 'max'),    
    nlenders=('lender_id', 'nunique'),
    institutional=('institutional', 'max'),
    inst_loan_bny=('inst_loan_bny', 'max'),
    deal_amount=('deal_amount', 'first'),
    deal_amount_converted=('deal_amount_converted', 'first'),    
    tranche_o_a=('tranche_o_a', 'first'),
    deal_active=('deal_active', 'last'), # whether last deal is active or not
    has_revolver=('has_revolver', 'max'),
    has_termloan=('has_termloan', 'max'),
    base_reference_rate=('base_reference_rate', 'first'),
    all_in_spread_drawn_bps=('all_in_spread_drawn_bps', 'first'),
    all_in_spread_undrawn_bps=('all_in_spread_undrawn_bps', 'first')
).reset_index()

In [None]:
# borrower-tranche dataset 
tranche_df = dscan_df.groupby(['lpc_tranche_id','tranche_active_date']).agg(
    # earliest_active_date=('tranche_active_date', 'min'), # earliest tranche lender participates
    latest_maturity_date=('adjusted_maturity_date', 'max'), # latest maturity lender participates 
    orig_maturity_date=('tranche_maturity_date', 'min'), # original maturity date  
    early_nego=('early_nego', 'max'), # whether deal was renegotiated early maturity date       
    gvkey=('gvkey', 'first'),
    borrower_id=('borrower_id', 'first'),
    sic_code=('sic_code', 'first'),
    trad_bank_loan=('trad_bank_loan', 'max'),
    inst_loan=('inst_loan', 'max'),
    private_loan=('private_loan', 'max'),
    inst_loan_ib=('inst_loan_ib', 'max'),
    termA=('termA', 'max'),
    termB=('termB', 'max'),
    levloan=('levloan', 'max'),
    covlite=('covlite', 'max'),    
    nlenders=('lender_id', 'nunique'),
    institutional=('institutional', 'max'),
    inst_loan_bny=('inst_loan_bny', 'max'),
    deal_amount=('deal_amount', 'first'),
    deal_amount_converted=('deal_amount_converted', 'first'),    
    tranche_amount=('tranche_amount', 'first'),
    tranche_o_a=('tranche_o_a', 'first'),
    deal_active=('deal_active', 'last'), # whether last deal is active or not
    has_revolver=('has_revolver', 'max'),
    has_termloan=('has_termloan', 'max'),
    base_reference_rate=('base_reference_rate', 'first'),
    all_in_spread_drawn_bps=('all_in_spread_drawn_bps', 'first'),
    all_in_spread_undrawn_bps=('all_in_spread_undrawn_bps', 'first')
).reset_index()

In [None]:
# construct borrower-lender level dataset
borrower_lender_df = dscan_df.groupby(['lender_id','borrower_id']).agg(
    earliest_active_date=('tranche_active_date', 'min'), # earliest tranche lender participates
    latest_maturity_date=('adjusted_maturity_date', 'max'), # latest maturity lender participates 
    early_nego=('early_nego', 'max'), # whether lender has negotiated early maturity date
    lender_parent_name=('lender_parent_name', 'first'),  
    lender_parent_id=('lender_parent_id', 'first'),            
    lender_name=('lender_name', 'first'),
    gvkey=('gvkey', 'first'),
    sic_code=('sic_code', 'first'),
    lead_arranger=('is_lead', 'max'),
    is_bank=('is_bank', 'max'),
    is_instinv=('is_instinv', 'max'),
    is_mf=('is_mf', 'max'),
    deal_active=('deal_active', 'last'),
).reset_index()

In [None]:
# save dataframe
deal_lender_df.to_stata(datdir+'data/dealscan_deal_lender.dta', write_index=False)
 
borrower_lender_df.to_stata(datdir+'data/dealscan_borrower_lender.dta', write_index=False)

loan_df.to_stata(datdir+'data/dealscan_borrower_deal.dta', write_index=False)

tranche_df.to_stata(datdir+'data/dealscan_borrower_tranche.dta', write_index=False)

In [None]:
# del dat_df