In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import os
import dask
import dask.dataframe as dd
import itertools
from itertools import chain
from math import sqrt, floor, ceil, isnan
import multiprocess
import multiprocessing
import importlib
from importlib import reload
from collections import Counter
from fuzzywuzzy import process, fuzz
import time
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import warnings
warnings.filterwarnings("error")

pd.options.display.max_columns = 500
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = 400

# A customized winsorisation function that handles None values correctly
# The percentiles are taken and winsorisation are done on non-None values only
def winsor2(series,cutoffs):

    import numpy as np
    import scipy as sp
    
    IsNone = np.isnan(series).copy()
    IsNotNone = np.logical_not(IsNone).copy()
    series_NotNonePart = sp.stats.mstats.winsorize(series[IsNotNone],limits=(cutoffs[0],cutoffs[1]))
    series_new = series.copy()
    series_new[IsNone] = np.nan
    series_new[IsNotNone] = series_NotNonePart

    return series_new


# 1. Import data

In [2]:
# GPF
GPF = pd.read_csv("../CleanData/SDC/0A_GPF.csv",low_memory=False)
raw_name_GPF_colnames = [column for column in GPF.columns if 'raw_name_GPF_' in column]
name_GPF_colnames = [column for column in GPF.columns if column[:9]=='name_GPF_']
parent_name_GPF_colnames = [column for column in GPF.columns if 'parent_name_' in column]
# This "has_ratings" indicator will be used to determine if a bond issue falls in the expertise of the merging underwriters.
# For the program to carry through, convert it to string, which will be consistent with other variables of categories of issues.
GPF['has_ratings'] = GPF['has_ratings'].astype(str)

# Parent relationship
GPF_names = pd.read_parquet('../CleanData/SDC/0H_GPF_Parent.parquet')

# HHI and market share of each underwriter
HHI_byCSA = pd.read_csv('../CleanData/SDC/1A_HHI_byCSA.csv')
market_share_all_markets_byCSA = pd.read_csv('../CleanData/SDC/1A_market_share_all_markets_byCSA.csv')
HHI_byCBSA = pd.read_csv('../CleanData/SDC/1A_HHI_byCBSA.csv')
market_share_all_markets_byCBSA = pd.read_csv('../CleanData/SDC/1A_market_share_all_markets_byCBSA.csv')

# Portfolio weights of CSAs within underwriter
csa_share_withinbank = pd.read_csv('../CleanData/SDC/1A_csa_share_withinbank.csv')

# All M&As
MA = pd.read_parquet('../CleanData/SDC/0B_M&A.parquet')
MA = MA.reset_index(drop=True)

# Withdrawn M&As
MA_withdrawn = pd.read_csv("../CleanData/SDC/0I_MA_withdrawn.csv")

# Quantity of issuance
StateXCountyXBid = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXBid.parquet")
StateXCountyXUsageBB = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXUsageBB.parquet")
StateXCountyXUsageGeneral = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXUsageGeneral.parquet")
StateXCountyXUsageMain = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXUsageMain.parquet")
StateXCountyXIssuerType = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXIssuerType.parquet")

StateXCounty = StateXCountyXBid.groupby(['State','County','sale_year']).agg({'amount':sum})
StateXCounty = StateXCounty.reset_index()

# Demographics
CSA_POP = pd.read_csv("../CleanData/Demographics/0C_CSA_Pop.csv")
CSA_INC = pd.read_csv("../CleanData/Demographics/0C_CSA_Inc.csv")
CBSA_POP = pd.read_csv("../CleanData/Demographics/0C_CBSA_Pop.csv")
CBSA_INC = pd.read_csv("../CleanData/Demographics/0C_CBSA_Inc.csv")

#-------------#
# Import CBSA #
#-------------#

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

# "CSA" is for metropolitan and "CBSA" includes also those micropolitan
CBSAData = pd.read_excel("../RawData/MSA/CBSA.xlsx",skiprows=[0,1])
CBSAData = CBSAData[~pd.isnull(CBSAData['County/County Equivalent'])]

# Add state abbreviations
us_state_to_abbrev = pd.DataFrame.from_dict(us_state_to_abbrev,orient='index').reset_index()
us_state_to_abbrev.columns = ['State Name','State']
CBSAData = CBSAData.rename(columns={'County/County Equivalent':'County'})
CBSAData = CBSAData.merge(us_state_to_abbrev,on='State Name',how='outer',indicator=True)
CBSAData = CBSAData[CBSAData['_merge']=='both'].drop(columns=['_merge'])
# Merge is perfect
CBSAData['County'] = CBSAData['County'].str.upper()
CBSAData['County'] = CBSAData['County'].str.replace(' COUNTY','')
CBSAData['County'] = CBSAData['County'].str.replace(' AND ',' & ')
CBSAData['County'] = CBSAData['County'].str.replace('.','',regex=False)
CBSAData['CSA Code'] = CBSAData['CSA Code'].astype(float)
CBSAData['CBSA Code'] = CBSAData['CBSA Code'].astype(float)

# CSA characteristics to be used in matching, which are average yield and spread in the county

CSACharsForMatch = GPF[['CSA Code','sale_year','avg_yield','gross_spread']]
CSACharsForMatch = CSACharsForMatch[~(pd.isnull(CSACharsForMatch['CSA Code']))]
CSACharsForMatch = CSACharsForMatch[~(pd.isnull(CSACharsForMatch['gross_spread']))]
CSACharsForMatchSpread = CSACharsForMatch.groupby(['CSA Code','sale_year']).agg({'gross_spread':'mean'})
CSACharsForMatchSpread = CSACharsForMatchSpread.reset_index()

CSACharsForMatch = GPF[['CSA Code','sale_year','avg_yield','gross_spread']]
CSACharsForMatch = CSACharsForMatch[~(pd.isnull(CSACharsForMatch['CSA Code']))]
CSACharsForMatch = CSACharsForMatch[~(pd.isnull(CSACharsForMatch['avg_yield']))]
CSACharsForMatchYield = CSACharsForMatch.groupby(['CSA Code','sale_year']).agg({'avg_yield':'mean'})
CSACharsForMatchYield = CSACharsForMatchYield.reset_index()

CSACharsForMatch = CSACharsForMatchSpread.merge(CSACharsForMatchYield,on=['CSA Code','sale_year'],how='outer')

# Importance of an underwriter within US or within a CSA, used to determine underwriter expertise
RankBankWithinCategoryUS = pd.read_csv("../CleanData/SDC/0D_Rank_Bank_Within_Category_US.csv")
RankBankWithinCategoryCSA = pd.read_csv("../CleanData/SDC/0D_Rank_Bank_Within_Category_CSA.csv")
RankBankWithinCategoryUS_gb = RankBankWithinCategoryUS.groupby('underwriter')
RankBankWithinCategoryCSA_gb = RankBankWithinCategoryCSA.groupby('underwriter')


## 1.1. Find Episodes Less Confounded by Commercial Bank M&A

In [3]:
%%time

# This is used to construct a more "clean" sample that is not affected by concurrent CB M&A, which could also affect local
# economy and then affect yield

#----------------------------------#
# Identify CSAs affected by CB M&A #
#----------------------------------#

try:
    del(FUN_1B_Get_Delta_CB_HHI)
except:
    pass
import FUN_1B_Get_Delta_CB_HHI
importlib.reload(FUN_1B_Get_Delta_CB_HHI)
from FUN_1B_Get_Delta_CB_HHI import FUN_1B_Get_Delta_CB_HHI

# CBs in SOD
SOD = pd.read_csv('../CleanData/FDIC/0I_SOD.csv')
SOD['DEPSUMBR'] = SOD['DEPSUMBR'].str.replace(',','')
SOD['DEPSUMBR'] = SOD['DEPSUMBR'].astype(int)

CSAs = SOD['CSA Code'].unique()
CSAs = [item for item in CSAs if str(item)!='nan']

divided_list = [[CSAs[i::10]] for i in range(10)]

if __name__ == '__main__':
    with multiprocessing.Pool(processes = 10) as p:
        Delta_CB_HHI = p.starmap(FUN_1B_Get_Delta_CB_HHI, divided_list)
Delta_CB_HHI = pd.concat(Delta_CB_HHI)

# Generate lagged and forward versions of the variable
Delta_CB_HHI = Delta_CB_HHI.sort_values(by=['CSA Code','year'])
Delta_CB_HHI['CB_hhi_dif_m1'] = Delta_CB_HHI.groupby('CSA Code')['CB_hhi_dif'].shift(1)
Delta_CB_HHI['CB_hhi_dif_m2'] = Delta_CB_HHI.groupby('CSA Code')['CB_hhi_dif'].shift(2)
Delta_CB_HHI['CB_hhi_dif_m3'] = Delta_CB_HHI.groupby('CSA Code')['CB_hhi_dif'].shift(3)
Delta_CB_HHI['CB_hhi_dif_m4'] = Delta_CB_HHI.groupby('CSA Code')['CB_hhi_dif'].shift(4)
Delta_CB_HHI['CB_hhi_dif_p1'] = Delta_CB_HHI.groupby('CSA Code')['CB_hhi_dif'].shift(-1)
Delta_CB_HHI['CB_hhi_dif_p2'] = Delta_CB_HHI.groupby('CSA Code')['CB_hhi_dif'].shift(-2)
Delta_CB_HHI['CB_hhi_dif_p3'] = Delta_CB_HHI.groupby('CSA Code')['CB_hhi_dif'].shift(-3)
Delta_CB_HHI['CB_hhi_dif_p4'] = Delta_CB_HHI.groupby('CSA Code')['CB_hhi_dif'].shift(-4)


CPU times: user 2.65 s, sys: 563 ms, total: 3.22 s
Wall time: 18.7 s


# 2. Sample description & Correlation

## 2.1 All M&As

In [4]:
# Total number of M&As
n_MA = len(MA)

MA['both_active'] = False
MA['both_active_overlap_CSA'] = False

# M&As where both underwrite municipal bonds right before merger
for idx,row in MA.iterrows():
    GPF_oneyear = GPF[GPF['sale_year']==row['sale_year']-1]
    names = list(chain.from_iterable(list(np.array(GPF_oneyear[name_GPF_colnames]))))
    names = list(set(names))
    parent_names = list(chain.from_iterable(list(np.array(GPF_oneyear[parent_name_GPF_colnames]))))
    parent_names = list(set(parent_names))
    if (row['target'] in names or row['target'] in parent_names) and \
        (row['acquiror'] in names or row['acquiror'] in parent_names):
        MA.at[idx,'both_active'] = True

# M&As where both underwirte municipal bonds before merger and have market overlap in terms of CSA
for idx,row in MA.iterrows():
    GPF_oneyear = GPF[GPF['sale_year']==row['sale_year']-1]
    CSAs = list(GPF_oneyear['CSA Code'].unique())
    CSAs = [item for item in CSAs if str(item)!='nan']
    for CSA in CSAs:
        GPF_oneyearCSA = GPF_oneyear[GPF_oneyear['CSA Code']==CSA]
        names = list(chain.from_iterable(list(np.array(GPF_oneyearCSA[name_GPF_colnames]))))
        names = list(set(names))
        parent_names = list(chain.from_iterable(list(np.array(GPF_oneyearCSA[parent_name_GPF_colnames]))))
        parent_names = list(set(parent_names))
        # If for any CSA there is overlap, then there is overlap
        if (row['target'] in names or row['target'] in parent_names) and \
            (row['acquiror'] in names or row['acquiror'] in parent_names):
            MA.at[idx,'both_active_overlap_CSA'] = True

n_MA_both_active = np.sum(MA['both_active']==True)
n_MA_both_active_overlap_CSA = np.sum(MA['both_active_overlap_CSA']==True)


In [5]:
latex_table = ""
formatted_row = "\\# of M\\&As"+"&"+f"{n_MA:.0f}"+"\\\\\n"
latex_table += formatted_row
formatted_row = "\\# of M\\&As, both sides active"+"&"+f"{n_MA_both_active:.0f}"+"\\\\\n"
latex_table += formatted_row
formatted_row = "\\# of M\\&As, both sides active and with CSA overlap"+"&"+f"{n_MA_both_active_overlap_CSA:.0f}"+"\\\\\n"
latex_table += formatted_row
with open("../Draft/tabs/sum_stats_MA.tex", "w") as file:
    file.write(latex_table)

# Number: Number of M&As where both sides are active #
with open('../Draft/nums/n_MA_both_active.tex','w') as file:
    file.write(str(n_MA_both_active))

# Number: Number of M&As where both sides are active and have geographic overlap #
with open('../Draft/nums/n_MA_both_active_overlap_CSA.tex','w') as file:
    file.write(str(n_MA_both_active_overlap_CSA))

# Number: Number of M&As where both sides are active post 1990 #
n_MA_both_active_1990on = len(MA[(MA['both_active']==True)&(MA['sale_year']>=1990)])

## 2.2 Withdrawn M&As

In [6]:
MA_withdrawn = MA_withdrawn.rename(columns={'announce_year':'sale_year'})[['target','acquiror','sale_year']]

name_GPF_colnames = [column for column in GPF.columns if column[:9]=='name_GPF_']
parent_name_GPF_colnames = [column for column in GPF.columns if 'parent_name_' in column]

MA_withdrawn['both_active'] = False
MA_withdrawn['both_active_overlap_CSA'] = False

# M&As where both underwrite municipal bonds right before merger
for idx,row in MA_withdrawn.iterrows():
    GPF_oneyear = GPF[GPF['sale_year']==row['sale_year']-1]
    names = list(chain.from_iterable(list(np.array(GPF_oneyear[name_GPF_colnames]))))
    names = list(set(names))
    parent_names = list(chain.from_iterable(list(np.array(GPF_oneyear[parent_name_GPF_colnames]))))
    parent_names = list(set(parent_names))
    if (row['target'] in names or row['target'] in parent_names) and \
        (row['acquiror'] in names or row['acquiror'] in parent_names):
        MA_withdrawn.at[idx,'both_active'] = True

# M&As where both underwirte municipal bonds before merger and have market overlap in terms of CSA
for idx,row in MA_withdrawn.iterrows():
    GPF_oneyear = GPF[GPF['sale_year']==row['sale_year']-1]
    CSAs = list(GPF_oneyear['CSA Code'].unique())
    CSAs = [item for item in CSAs if str(item)!='nan']
    for CSA in CSAs:
        GPF_oneyearCSA = GPF_oneyear[GPF_oneyear['CSA Code']==CSA]
        names = list(chain.from_iterable(list(np.array(GPF_oneyearCSA[name_GPF_colnames]))))
        names = list(set(names))
        parent_names = list(chain.from_iterable(list(np.array(GPF_oneyearCSA[parent_name_GPF_colnames]))))
        parent_names = list(set(parent_names))
        # If for any CSA there is overlap, then there is overlap
        if (row['target'] in names or row['target'] in parent_names) and \
            (row['acquiror'] in names or row['acquiror'] in parent_names):
            MA_withdrawn.at[idx,'both_active_overlap_CSA'] = True


In [7]:
n_MA_withdrawn_both_active = np.sum(MA_withdrawn['both_active']==True)
n_MA_withdrawn_both_active_overlap_CSA = np.sum(MA_withdrawn['both_active_overlap_CSA']==True)

# Number: Number of M&As where both sides are active #
with open('../Draft/nums/n_MA_withdrawn_both_active.tex','w') as file:
    file.write(str(n_MA_withdrawn_both_active))

# Number: Number of M&As where both sides are active and have geographic overlap #
with open('../Draft/nums/n_MA_withdrawn_both_active_overlap_CSA.tex','w') as file:
    file.write(str(n_MA_withdrawn_both_active_overlap_CSA))


## 2.3 Distribution of number of underwriters

In [8]:
%%script false --no-raise-error

GPF_gb = GPF[(~pd.isnull(GPF['CSA Code']))&(~pd.isnull(GPF['sale_year']))].groupby(['CSA Code','sale_year'])
n_banks_byCSAyear = []
keys = list(GPF_gb.groups.keys())

for group in keys:

    GPF_oneCSAyear = GPF_gb.get_group(group)
    GPF_oneCSAyearm1 = pd.DataFrame()
    # try:
    #     GPF_oneCSAyearm1 = GPF_gb.get_group((group[0],group[1]-1))
    # except:
    #     pass
    GPF_oneCSAyearp1 = pd.DataFrame()
    # try:
    #     GPF_oneCSAyearp1 = GPF_gb.get_group((group[0],group[1]+1))
    # except:
    #     pass
    GPF_CSAthreeyears = pd.concat([GPF_oneCSAyear,GPF_oneCSAyearm1,GPF_oneCSAyearp1])

    banks = list(chain.from_iterable(list(np.array(GPF_CSAthreeyears[parent_name_GPF_colnames]))))
    banks = list(set(banks))
    banks = [item for item in banks if str(item)!='nan']
    n_banks = len(banks)
    amount_total = np.sum(GPF_CSAthreeyears['amount'])

    # Record data
    n_banks_byCSAyear = n_banks_byCSAyear+[{
        'CSA Code':group[0],
        'year':group[1],
        'n_banks':n_banks,
        'amount_total':amount_total,
        }]

n_banks_byCSAyear = pd.DataFrame(n_banks_byCSAyear)

n_banks_byCSAyear_oneyear_withgroup = []
years = list(n_banks_byCSAyear['year'].unique())
for year in years:
    n_banks_byCSAyear_oneyear = n_banks_byCSAyear[n_banks_byCSAyear['year']==year].copy()
    n_banks_byCSAyear_oneyear['rank'] = n_banks_byCSAyear_oneyear['amount_total'].rank()
    n_banks_byCSAyear_oneyear['group'] = None
    n_banks_byCSAyear_oneyear.loc[
        n_banks_byCSAyear_oneyear['rank']<=len(n_banks_byCSAyear_oneyear)/3,
        'group'] = 1
    n_banks_byCSAyear_oneyear.loc[
        (n_banks_byCSAyear_oneyear['rank']>len(n_banks_byCSAyear_oneyear)/3)&\
        (n_banks_byCSAyear_oneyear['rank']<=len(n_banks_byCSAyear_oneyear)/3*2),
        'group'] = 2
    n_banks_byCSAyear_oneyear.loc[
        (n_banks_byCSAyear_oneyear['rank']>len(n_banks_byCSAyear_oneyear)/3*2)&\
        (n_banks_byCSAyear_oneyear['rank']<=len(n_banks_byCSAyear_oneyear)),
        'group'] = 3
    n_banks_byCSAyear_oneyear_withgroup = n_banks_byCSAyear_oneyear_withgroup+[n_banks_byCSAyear_oneyear]
n_banks_byCSAyear = pd.concat(n_banks_byCSAyear_oneyear_withgroup)

In [9]:
%%script false --no-raise-error

# Divide into three groups based on size
sns.kdeplot(n_banks_byCSAyear[n_banks_byCSAyear['group']==1]['n_banks'],
    fill=False,color='blue', linestyle='-',label='Small CSAs')
sns.kdeplot(n_banks_byCSAyear[n_banks_byCSAyear['group']==2]['n_banks'],
    fill=False,color='green', linestyle='--',label='Medium CSAs')
sns.kdeplot(n_banks_byCSAyear[n_banks_byCSAyear['group']==3]['n_banks'],
    fill=False,color='red', linestyle=':',label='Large CSAs')

plt.xlim(0, 60)
plt.legend(fontsize=12)
plt.xlabel('Number of active underwriters',fontsize=12)
plt.ylabel('Density',fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

warnings.filterwarnings('ignore')
plt.savefig('../Draft/figs/NumBanks.eps', format='eps', bbox_inches='tight')
warnings.filterwarnings('default')

plt.show()

with open('../Draft/nums/n_banks_median.tex','w') as file:
    file.write(str(round(np.median(n_banks_byCSAyear['n_banks']))))


# 3. Construct Events of M&As, Using CSAs

## 3.1. Find CSA X Year affected by merger

Notes:
- Go over each merger. Check the CSAs affected by the merger (i.e., either side has business in the CSA in the year prior to the merger). Check if the merger affects just one underwriter or affects multiple underwriters in this CSA.
- Note that for the column "market share of other targets", the optimal object to put there is the market share of the other target alone. Here I am instead putting in market share of the other target's parent. This should make a minimal difference.


In [10]:
%%script false --no-raise-error

# %%time

def proc_list(MA_frag):

    raw_name_GPF_colnames = [column for column in GPF.columns if 'raw_name_GPF_' in column]
    name_GPF_colnames = ['name_GPF_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
    parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
    
    CSA_affected = []
    MA_frag = MA_frag.reset_index(drop=True)
    
    for idx,row in MA_frag.iterrows():
        
        # Find CSAs that this merger affects
        # Determine if an underwriter is active in an CSA based on activity of PRIOR years
        GPF_prioryears = GPF[(GPF['sale_year']>=row['sale_year']-3)&(GPF['sale_year']<=row['sale_year']-1)]

        # Also check other targets of the acquiror in that year. This accounts for cases where post merger the new formed entity
        # is new and appear as a name that was not in the sample before. Note that here "MA_frag" cannot be used or the other firm
        # involved in the merger will be missed. Instead, use the whole sample "MA"
        other_targets = \
            list(MA[(MA['acquiror']==row['acquiror'])&
            (MA['sale_year']==row['sale_year'])&
            (MA['target']!=row['target'])]['target'])
        
        for CSA in list(GPF_prioryears['CSA Code'].unique()):

            GPF_prioryears_oneCSA = GPF_prioryears[GPF_prioryears['CSA Code']==CSA]

            # Underwriters in this state
            underwriters_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCSA[name_GPF_colnames]))))
            underwriters_priorMA = [item for item in underwriters_priorMA if item!=None]
            underwriters_priorMA = list(set(underwriters_priorMA))
            # Parents of underwriters in this state
            parents_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCSA[parent_name_colnames]))))
            parents_priorMA = [item for item in parents_priorMA if item!=None]
            parents_priorMA = list(set(parents_priorMA))
            # Subsidiaries of parents in this state (using data of PRIOR year)
            subsidiaries_priorMA = list(GPF_names[
                (GPF_names['parent_name'].isin(parents_priorMA))&
                (GPF_names['sale_year']>=row['sale_year']-3)&
                (GPF_names['sale_year']<=row['sale_year']-1)]['name_GPF'])

            # Determine if merger affects the CSA, and if both sides have business
            IF_acquiror_active = None
            IF_target_active = None
            IF_other_target_active = None
            if (row['acquiror'] in parents_priorMA) or (row['acquiror'] in underwriters_priorMA) or (row['acquiror'] in subsidiaries_priorMA):
                IF_acquiror_active = True
            if (row['target'] in parents_priorMA) or (row['target'] in underwriters_priorMA) or (row['target'] in subsidiaries_priorMA):
                IF_target_active = True
            for other_target in other_targets:
                if (other_target in parents_priorMA) or (other_target in underwriters_priorMA):
                    IF_other_target_active = True

            # Get market share of merged banks. Note that this is the market share in the years prior to M&A. Also note that market 
            # share "market_share_all_markets_byCSA" is calculated at the parent level. There are many cases where market share of a
            # firm in an area is unavailable, which is because of no presence.



            #-------------------------#
            # Market share by N deals #
            #-------------------------#

            # (1) Market share of acquiror
            # Determine parent of target, as "market_share_all_markets_byCSA" is at parent level
            try:
                # Situation where acquiror is a subsidiary or standalone firm whose parent is itself. Extract its parent
                acquiror_parent = GPF_names[(GPF_names['name_GPF']==row['acquiror'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                # Situation where acquiror is a parent
                acquiror_parent = row['acquiror']
            try:
                acquiror_market_share_N_m1 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m1 = 0
            try:
                acquiror_market_share_N_m2 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m2 = 0
            try:
                acquiror_market_share_N_m3 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m3 = 0

            # (2) Market share of target
            try:
                # Note that I must use "GPF_names" (the parent-subsidiary) mapping use the year(s) prior to the MA
                target_parent = GPF_names[(GPF_names['name_GPF']==row['target'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                target_parent = row['target']
            try:
                target_market_share_N_m1 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m1 = 0
            try:
                target_market_share_N_m2 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m2 = 0
            try:
                target_market_share_N_m3 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m3 = 0

            # (3) Market share of other targets in the same transaction
            # Account for possibility that other targets can be either a parent or a standalone firm
            other_targets_parents = \
                list(GPF_names[(GPF_names['name_GPF'].isin(other_targets))
                &(GPF_names['sale_year']==row['sale_year']-1)]['parent_name'])+\
                list(other_targets)
            other_targets_parents = list(set(other_targets_parents))

            other_targets_market_share_N = \
                market_share_all_markets_byCSA[
                (market_share_all_markets_byCSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCSA['CSA Code']==CSA)
                &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-1)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m1 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m1 = 0

            other_targets_market_share_N = \
                market_share_all_markets_byCSA[
                (market_share_all_markets_byCSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCSA['CSA Code']==CSA)
                &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-2)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m2 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m2 = 0

            other_targets_market_share_N = \
                market_share_all_markets_byCSA[
                (market_share_all_markets_byCSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCSA['CSA Code']==CSA)
                &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-3)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m3 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m3 = 0



            # Record data
            if IF_acquiror_active or IF_target_active or IF_other_target_active:
                CSA_affected = CSA_affected+[{
                    'CSA Code':CSA,
                    'sale_year':row['sale_year'],
                    'acquiror':row['acquiror'],
                    'target':row['target'],
                    'other_targets':other_targets,
                    'acquiror_parent':acquiror_parent,
                    'target_parent':target_parent,
                    'acquiror_market_share_N_m1':acquiror_market_share_N_m1,
                    'acquiror_market_share_N_m2':acquiror_market_share_N_m2,
                    'acquiror_market_share_N_m3':acquiror_market_share_N_m3,
                    'target_market_share_N_m1':target_market_share_N_m1,
                    'target_market_share_N_m2':target_market_share_N_m2,
                    'target_market_share_N_m3':target_market_share_N_m3,
                    'other_targets_market_share_N_m1':other_targets_market_share_N_m1,
                    'other_targets_market_share_N_m2':other_targets_market_share_N_m2,
                    'other_targets_market_share_N_m3':other_targets_market_share_N_m3,
                }]
            acquiror_market_share_N_m1 = None
            acquiror_market_share_N_m2 = None
            acquiror_market_share_N_m3 = None
            target_market_share_N_m1 = None
            target_market_share_N_m2 = None
            target_market_share_N_m3 = None
            other_targets_market_share = None
            other_targets_market_share_N_m1 = None
            other_targets_market_share_N_m2 = None
            other_targets_market_share_N_m3 = None
    
    CSA_affected = pd.DataFrame(CSA_affected)
    return CSA_affected

MA_dd = dd.from_pandas(MA, npartitions=40)
with dask.config.set(scheduler='processes',num_workers=40):
    CSA_affected = MA_dd.map_partitions(proc_list, 
    meta=pd.DataFrame(columns=
    ['CSA Code','sale_year','acquiror','target',
    'other_targets','acquiror_parent','target_parent',
    'acquiror_market_share_N_m1','acquiror_market_share_N_m2','acquiror_market_share_N_m3',
    'target_market_share_N_m1','target_market_share_N_m2','target_market_share_N_m3',
    'other_targets_market_share_N_m1','other_targets_market_share_N_m2','other_targets_market_share_N_m3',
    ])).compute()

# Average market share over past three years
CSA_affected['acquiror_market_share_N_avg'] = \
    (CSA_affected['acquiror_market_share_N_m1']+\
    CSA_affected['acquiror_market_share_N_m2']+\
    CSA_affected['acquiror_market_share_N_m3'])/3
CSA_affected['target_market_share_N_avg'] = \
    (CSA_affected['target_market_share_N_m1']+\
    CSA_affected['target_market_share_N_m2']+\
    CSA_affected['target_market_share_N_m3'])/3
CSA_affected['other_targets_market_share_N_avg'] = \
    (CSA_affected['other_targets_market_share_N_m1']+\
    CSA_affected['other_targets_market_share_N_m2']+\
    CSA_affected['other_targets_market_share_N_m3'])/3

# As this step takes significant time, export output
CSA_affected.to_parquet('../CleanData/MAEvent/1B_CSA_affected.parquet')

In [11]:
CSA_affected = pd.read_parquet('../CleanData/MAEvent/1B_CSA_affected.parquet')

## 3.2 Identify merger episodes

### 3.2.1 Method 1: By market share

In [12]:
#----------------------------#
# Market share by N of deals #
#----------------------------#

# Identify episodes of mergers at the CSA level

# Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
# identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
# the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
# as in cases where two firms merge into a new one, both will get recorded in "CSA_affected"

parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]

CSA_episodes_marketshare_N = []

for CSA in list(CSA_affected['CSA Code'].unique()):

    CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==CSA]
    CSA_affected_part = CSA_affected_part[
        (CSA_affected_part['acquiror_market_share_N_avg']>0)&
        ((CSA_affected_part['target_market_share_N_avg']>0)|
        (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
    
    episode_start_year = 1900
    years = CSA_affected_part['sale_year'].unique()
    years = sorted(years)
    for sale_year in years:
    
        # If this year is still within the last merger episode
        if sale_year<=episode_start_year+4:
            continue
        
        # Check intensity of M&A activities in that year and three years following
        CSA_affected_episode = CSA_affected_part[(CSA_affected_part['sale_year']>=sale_year)&(CSA_affected_part['sale_year']<=sale_year+3)]
        # When a firm acquires multiple firms, market share of other firms are in "other_targets_market_share_N_avg", so just keeping one record
        # is sufficient
        CSA_affected_episode = CSA_affected_episode.drop_duplicates(['acquiror','sale_year'])
        # Alternative aggregation methods might be more reasonable. Also, this does not account for that target tends to be smaller so threshold
        # for them should be smaller too. Even better, can compute the implied-HHI change (based on historical data) of this merger, and put threshold
        # on that, which is definitely more powerful.
        acquiror_market_share_N_avg = np.sum(CSA_affected_episode['acquiror_market_share_N_avg'])
        target_market_share_N_avg = np.sum(CSA_affected_episode['target_market_share_N_avg'])
        other_targets_market_share_N_avg = np.sum(CSA_affected_episode['other_targets_market_share_N_avg'])

        # Out of all mergers in this episode, calculate
        # (1) the max of sum of market shares of merging entities
        max_sum_share = \
            np.max(CSA_affected_episode['acquiror_market_share_N_avg']+CSA_affected_episode['target_market_share_N_avg']
                +CSA_affected_episode['other_targets_market_share_N_avg'])
        # (2) the max of min of market shares of merging entities
        max_min_share = \
            np.max(np.minimum(CSA_affected_episode['acquiror_market_share_N_avg'],
                CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']))
        # (3) the mean of sum of market shares of merging entities
        mean_sum_share = \
            np.mean(CSA_affected_episode['acquiror_market_share_N_avg']+CSA_affected_episode['target_market_share_N_avg']
                +CSA_affected_episode['other_targets_market_share_N_avg'])

        # Check if market share in the episode is high enough
        if acquiror_market_share_N_avg>0.05 and target_market_share_N_avg+other_targets_market_share_N_avg>0.05:
            # An episode is identified
            CSA_episodes_marketshare_N = CSA_episodes_marketshare_N+[{
                'episode_start_year':sale_year,
                'CSA Code':CSA,
                'mergers':CSA_affected_episode,
                'acquiror_market_share_N_avg':acquiror_market_share_N_avg,
                'target_market_share_N_avg':target_market_share_N_avg,
                'other_targets_market_share_N_avg':other_targets_market_share_N_avg,
                'max_sum_share':max_sum_share,
                'max_min_share':max_min_share,
                'mean_sum_share':mean_sum_share,
                }]
            episode_start_year = sale_year

CSA_episodes_marketshare_N = pd.DataFrame(CSA_episodes_marketshare_N)

print(len(CSA_episodes_marketshare_N))

321


### 3.2.2 Method 2: By implied rise in HHI due to merger

In [13]:
#-----------------------------#
# Change in HHI by N of deals #
#-----------------------------#

# Identify episodes of mergers at the CSA level

# Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
# identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
# the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
# as in cases where two firms merge into a new one, both will get recorded in "CSA_affected"

CSA_episodes_impliedHHI_N = []

for CSA in list(CSA_affected['CSA Code'].unique()):

    CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==CSA]
    CSA_affected_part = CSA_affected_part[
        (CSA_affected_part['acquiror_market_share_N_avg']>0)&
        ((CSA_affected_part['target_market_share_N_avg']>0)|
        (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
    
    episode_start_year = 1900
    for sale_year in CSA_affected_part['sale_year'].unique():
    
        # If this year is still within the last merger episode
        if sale_year<=episode_start_year+4:
            continue
        
        # Check intensity of M&A activities in that year and three years following
        CSA_affected_episode = CSA_affected_part[(CSA_affected_part['sale_year']>=sale_year)&(CSA_affected_part['sale_year']<=sale_year+3)]
        GPF_oneCSA_priorMA = GPF[(GPF['sale_year']>=sale_year-3)&(GPF['sale_year']<=sale_year)&(GPF['CSA Code']==CSA)]
        
        # Calculate (1) HHI (by parent firm) in the three years prior (2) Predicted HHI after the mergers complete
        
        # Underwriters in the market
        name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneCSA_priorMA[parent_name_colnames]))))
        name_GPFs = [item for item in name_GPFs if item!=None]
        name_GPFs = [item for item in name_GPFs if str(item)!='nan']
        name_GPFs = list(set(name_GPFs))
        n_deals = {}
        for item in name_GPFs:
            n_deals[item] = 0
        
        # Record market shares before merger episode
        parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
        for idx,row in GPF_oneCSA_priorMA.iterrows():
            underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
            n_underwriters = len(underwriters_onedeal)
            for item in underwriters_onedeal:
                n_deals[item] = n_deals[item]+1/n_underwriters
        n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
        n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
        n_deals_prior = n_deals
        
        # HHI prior to merger
        hhi_piror = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)

        # Implied HHI post merger
        CSA_affected_episode = CSA_affected_episode.reset_index(drop=True)
        for idx,row in CSA_affected_episode.iterrows():
            n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror_parent']
        n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
        hhi_predicted = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
        n_deals_post = n_deals

        hhi_dif = hhi_predicted-hhi_piror

        # Market shares used in summary statistics
        CSA_affected_episode['min_share'] = np.minimum(CSA_affected_episode['acquiror_market_share_N_avg'],
            CSA_affected_episode['target_market_share_N_avg']+\
            CSA_affected_episode['other_targets_market_share_N_avg'])
        CSA_affected_episode = CSA_affected_episode.sort_values('min_share')
        CSA_affected_episode_topshare = CSA_affected_episode[-1:]
        acquiror_market_share_N_max = np.max(CSA_affected_episode_topshare['acquiror_market_share_N_avg'])
        target_market_share_N_max = np.max(CSA_affected_episode_topshare['target_market_share_N_avg'])
        other_targets_market_share_N_max = np.max(CSA_affected_episode_topshare['other_targets_market_share_N_avg'])

        # Check if market share in the episode is high enough
        if hhi_dif>0.01:
            # An episode is identified
            CSA_episodes_impliedHHI_N = CSA_episodes_impliedHHI_N+[{
                'episode_start_year':sale_year,
                'CSA Code':CSA,
                'mergers':CSA_affected_episode,
                'hhi_dif':hhi_dif,
                'n_deals_prior':n_deals_prior,
                'n_deals_post':n_deals_post,
                'acquiror_market_share_N_max':acquiror_market_share_N_max,
                'target_market_share_N_max':target_market_share_N_max,
                'other_targets_market_share_N_max':other_targets_market_share_N_max,
                }]
            episode_start_year = sale_year

CSA_episodes_impliedHHI_N = pd.DataFrame(CSA_episodes_impliedHHI_N)

print(len(CSA_episodes_impliedHHI_N))

219


In [14]:
# Data for summary stats
CSA_episodes_impliedHHI_N[['episode_start_year','CSA Code','hhi_dif',
    'acquiror_market_share_N_max','target_market_share_N_max','other_targets_market_share_N_max']].\
    to_csv('../CleanData/MAEvent/1B_CSA_episodes_impliedHHI_SumStats.csv')

### 3.2.3 Method 3: By implied rise in top 5 share due to merger

In [15]:
#-------------------------------------#
# Change in top 5 share by N of deals #
#-------------------------------------#

# Identify episodes of mergers at the CSA level

# Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
# identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
# the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
# as in cases where two firms merge into a new one, both will get recorded in "CSA_affected"

CSA_episodes_top5share_N = []

for CSA in list(CSA_affected['CSA Code'].unique()):

    CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==CSA]
    CSA_affected_part = CSA_affected_part[
        (CSA_affected_part['acquiror_market_share_N_avg']>0)&
        ((CSA_affected_part['target_market_share_N_avg']>0)|
        (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
    
    episode_start_year = 1900
    for sale_year in CSA_affected_part['sale_year'].unique():
    
        # If this year is still within the last merger episode
        if sale_year<=episode_start_year+4:
            continue
        
        # Check intensity of M&A activities in that year and three years following
        CSA_affected_episode = CSA_affected_part[(CSA_affected_part['sale_year']>=sale_year)&(CSA_affected_part['sale_year']<=sale_year+3)]
        GPF_oneCSA_priorMA = GPF[(GPF['sale_year']>=sale_year-3)&(GPF['sale_year']<=sale_year)&(GPF['CSA Code']==CSA)]
        
        # Calculate (1) Top 5 share (by parent firm) in the three years prior (2) Predicted top 5 share after the mergers complete
        
        # Underwriters in the market
        name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneCSA_priorMA[parent_name_colnames]))))
        name_GPFs = [item for item in name_GPFs if item!=None]
        name_GPFs = [item for item in name_GPFs if str(item)!='nan']
        name_GPFs = list(set(name_GPFs))
        n_deals = {}
        for item in name_GPFs:
            n_deals[item] = 0
        
        # Record market shares before merger episode
        parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
        for idx,row in GPF_oneCSA_priorMA.iterrows():
            underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
            n_underwriters = len(underwriters_onedeal)
            for item in underwriters_onedeal:
                n_deals[item] = n_deals[item]+1/n_underwriters
        n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
        n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
        n_deals_prior = n_deals
        
        # Top 5 share prior to merger
        n_deals['marketshare'] = n_deals['n_deals']/np.sum(n_deals['n_deals'])
        n_deals = n_deals.sort_values(by=['n_deals'],ascending=False).reset_index(drop=True)
        if len(n_deals)<=5:
            top5share_prior = 1
        else:
            top5share_prior = np.sum(n_deals['marketshare'][:5])

        # Implied top 5 share post merger
        CSA_affected_episode = CSA_affected_episode.reset_index(drop=True)
        for idx,row in CSA_affected_episode.iterrows():
            n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror_parent']
        n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
        n_deals['marketshare'] = n_deals['n_deals']/np.sum(n_deals['n_deals'])
        n_deals = n_deals.sort_values(by=['n_deals'],ascending=False).reset_index(drop=True)
        if len(n_deals)<=5:
            top5share_post = 1
        else:
            top5share_post = np.sum(n_deals['marketshare'][:5])
        n_deals_post = n_deals

        top5share_dif = top5share_post-top5share_prior

        # Check if market share in the episode is high enough
        if top5share_dif>0.05:
            # An episode is identified
            CSA_episodes_top5share_N = CSA_episodes_top5share_N+[{
                'episode_start_year':sale_year,
                'CSA Code':CSA,
                'mergers':CSA_affected_episode,
                'top5share_dif':top5share_dif,
                'n_deals_prior':n_deals_prior,
                'n_deals_post':n_deals_post,
                }]
            episode_start_year = sale_year

CSA_episodes_top5share_N = pd.DataFrame(CSA_episodes_top5share_N)

print(len(CSA_episodes_top5share_N))

171


### 3.2.4 Add characteristics of events

Another characteristic, whether there is concurrent CB M&A, is implemented in the stage of sample construction.

#### 3.2.4.1 Share of CSA within underwriter's business portfolio

In [16]:
episodes_files = ['CSA_episodes_marketshare_N','CSA_episodes_impliedHHI_N','CSA_episodes_top5share_N']

for episodes_file in episodes_files:

    if episodes_file=='CSA_episodes_marketshare_N':
        CSA_episodes = CSA_episodes_marketshare_N.copy()
    if episodes_file=='CSA_episodes_impliedHHI_N':
        CSA_episodes = CSA_episodes_impliedHHI_N.copy()
    if episodes_file=='CSA_episodes_top5share_N':
        CSA_episodes = CSA_episodes_top5share_N.copy()

    # Add the maximum share of the treated CSA within the acquiror or the target firm's portfolio
    CSA_episodes['max_acquiror_weight'] = None
    CSA_episodes['max_target_weight'] = None
    for idx,row in CSA_episodes.iterrows():
        # Weight of acquiror
        mergers = row['mergers'].reset_index(drop=True)
        mergers['acquiror_weight'] = None
        for sub_idx,sub_row in mergers.iterrows():
            acquiror_weight = csa_share_withinbank[
                (csa_share_withinbank['underwriter']==sub_row['acquiror_parent'])&
                (csa_share_withinbank['CSA Code']==row['CSA Code'])&
                (csa_share_withinbank['year']==sub_row['sale_year'])]
            acquiror_weight = acquiror_weight.reset_index(drop=True)
            if len(acquiror_weight):
                mergers.loc[sub_idx,'acquiror_weight'] = acquiror_weight['csa_share'][0]
        CSA_episodes.at[idx,'max_acquiror_weight'] = np.max(mergers['acquiror_weight'])
        # Weight of target
        mergers = row['mergers'].reset_index(drop=True)
        mergers['target_weight'] = None
        for sub_idx,sub_row in mergers.iterrows():
            acquiror_weight = csa_share_withinbank[
                (csa_share_withinbank['underwriter']==sub_row['target_parent'])&
                (csa_share_withinbank['CSA Code']==row['CSA Code'])&
                (csa_share_withinbank['year']==sub_row['sale_year'])]
            acquiror_weight = acquiror_weight.reset_index(drop=True)
            if len(acquiror_weight)>0:
                mergers.loc[sub_idx,'target_weight'] = acquiror_weight['csa_share'][0]
        CSA_episodes.at[idx,'max_target_weight'] = np.max(mergers['target_weight'])

    if episodes_file=='CSA_episodes_marketshare_N':
        CSA_episodes_marketshare_N = CSA_episodes.copy()
    if episodes_file=='CSA_episodes_impliedHHI_N':
        CSA_episodes_impliedHHI_N = CSA_episodes.copy()
    if episodes_file=='CSA_episodes_top5share_N':
        CSA_episodes_top5share_N = CSA_episodes.copy()


In [17]:
n_bothweight_lt5 = np.sum((CSA_episodes_impliedHHI_N['max_acquiror_weight']<0.05)&
    (CSA_episodes_impliedHHI_N['max_target_weight']<0.05))

# Number: Number of M&As where both sides are active #
with open('../Draft/nums/n_bothweight_lt5.tex','w') as file:
    file.write(str(n_bothweight_lt5))


#### 3.2.4.2 Mark out M&As for which stated reason could be related to local economic conditions

In [18]:
episodes_files = ['CSA_episodes_marketshare_N','CSA_episodes_impliedHHI_N','CSA_episodes_top5share_N']

for episodes_file in episodes_files:

    if episodes_file=='CSA_episodes_marketshare_N':
        CSA_episodes = CSA_episodes_marketshare_N.copy()
    if episodes_file=='CSA_episodes_impliedHHI_N':
        CSA_episodes = CSA_episodes_impliedHHI_N.copy()
    if episodes_file=='CSA_episodes_top5share_N':
        CSA_episodes = CSA_episodes_top5share_N.copy()
    
    #---------------------#
    # Whether endogeneous #
    #---------------------#
    
    CSA_episodes['reasonMA_endo_possible'] = "False"
    
    MA_local_reasons = pd.read_csv('SCRIPT_M&A_local_reasons.csv');
    for idx,row in CSA_episodes.iterrows():
        merged = row['mergers'].merge(MA_local_reasons,on=['target','acquiror'])
        if len(merged)>=1:
            CSA_episodes.at[idx,'reasonMA_endo_possible'] = "True"
    
    MA_nonfound_reasons = pd.read_csv('SCRIPT_M&A_nonfound_reasons.csv');
    for idx,row in CSA_episodes.iterrows():
        merged = row['mergers'].merge(MA_nonfound_reasons,on=['target','acquiror'])
        if len(merged)==len(row['mergers']):
            CSA_episodes.at[idx,'reasonMA_endo_possible'] = "Unsure"
    
    # Number: Number of M&As where both sides are active #
    n_reasonMA_endo_not_possible = np.sum(CSA_episodes['reasonMA_endo_possible']=="False")
    with open('../Draft/nums/n_reasonMA_endo_not_possible.tex','w') as file:
        file.write(str(n_reasonMA_endo_not_possible))

    MA_all_reasons = pd.read_csv('SCRIPT_M&A_all_reasons.csv');

    CSA_episodes['reasonMA_fin_stress_any'] = "False"
    MA_one_reasons = MA_all_reasons[MA_all_reasons['reason']=='financial stress']
    for idx,row in CSA_episodes.iterrows():
        merged = row['mergers'].merge(MA_one_reasons,on=['target','acquiror'])
        if len(merged)>=1:
            CSA_episodes.at[idx,'reasonMA_fin_stress_any'] = "True"

    #------------------------#
    # Specific reason of M&A #
    #------------------------#

    # For these specific reason of M&A, I mark out the reason based on the reason of the most important M&A.
    # This way, the heterogeneity by them makes more sense. Otherwise, they are not mutually exclusive, and
    # in each category I might end up getting those bigger episodes that involve more M&As, rather than those
    # that really reflect a certain specific reason of the M&A.
    
    MA_all_reasons = pd.read_csv('SCRIPT_M&A_all_reasons.csv');
    
    CSA_episodes['reasonMA_local_dom'] = "False"
    MA_one_reasons = MA_all_reasons[MA_all_reasons['reason']=='local dominance']
    for idx,row in CSA_episodes.iterrows():
        mergers = row['mergers']
        mergers['min_acquiror_target_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],mergers['target_market_share_N_avg'])
        mergers = mergers.sort_values(['min_acquiror_target_share'],ascending=False)
        mergers = mergers.reset_index(drop=True)[:1]
        merged = mergers.merge(MA_one_reasons,on=['target','acquiror'])
        if len(merged)>=1:
            CSA_episodes.at[idx,'reasonMA_local_dom'] = "True"
    
    CSA_episodes['reasonMA_expand_geo'] = "False"
    MA_one_reasons = MA_all_reasons[MA_all_reasons['reason']=='expand geographically']
    for idx,row in CSA_episodes.iterrows():
        mergers = row['mergers']
        mergers['min_acquiror_target_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],mergers['target_market_share_N_avg'])
        mergers = mergers.sort_values(['min_acquiror_target_share'],ascending=False)
        mergers = mergers.reset_index(drop=True)[:1]
        merged = mergers.merge(MA_one_reasons,on=['target','acquiror'])
        if len(merged)>=1:
            CSA_episodes.at[idx,'reasonMA_expand_geo'] = "True"
    
    CSA_episodes['reasonMA_ind_dom'] = "False"
    MA_one_reasons = MA_all_reasons[MA_all_reasons['reason']=='industry dominance']
    for idx,row in CSA_episodes.iterrows():
        mergers = row['mergers']
        mergers['min_acquiror_target_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],mergers['target_market_share_N_avg'])
        mergers = mergers.sort_values(['min_acquiror_target_share'],ascending=False)
        mergers = mergers.reset_index(drop=True)[:1]
        merged = mergers.merge(MA_one_reasons,on=['target','acquiror'])
        if len(merged)>=1:
            CSA_episodes.at[idx,'reasonMA_ind_dom'] = "True"
    
    CSA_episodes['reasonMA_syn_comb_lines'] = "False"
    MA_one_reasons = MA_all_reasons[MA_all_reasons['reason']=='synergy from combining business lines']
    for idx,row in CSA_episodes.iterrows():
        mergers = row['mergers']
        mergers['min_acquiror_target_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],mergers['target_market_share_N_avg'])
        mergers = mergers.sort_values(['min_acquiror_target_share'],ascending=False)
        mergers = mergers.reset_index(drop=True)[:1]
        merged = mergers.merge(MA_one_reasons,on=['target','acquiror'])
        if len(merged)>=1:
            CSA_episodes.at[idx,'reasonMA_syn_comb_lines'] = "True"
    
    CSA_episodes['reasonMA_fin_stress'] = "False"
    MA_one_reasons = MA_all_reasons[MA_all_reasons['reason']=='financial stress']
    for idx,row in CSA_episodes.iterrows():
        mergers = row['mergers']
        mergers['min_acquiror_target_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],mergers['target_market_share_N_avg'])
        mergers = mergers.sort_values(['min_acquiror_target_share'],ascending=False)
        mergers = mergers.reset_index(drop=True)[:1]
        merged = mergers.merge(MA_one_reasons,on=['target','acquiror'])
        if len(merged)>=1:
            CSA_episodes.at[idx,'reasonMA_fin_stress'] = "True"
    
    CSA_episodes['reasonMA_syn_cost'] = "False"
    MA_one_reasons = MA_all_reasons[MA_all_reasons['reason']=='synergy from cost management']
    for idx,row in CSA_episodes.iterrows():
        mergers = row['mergers']
        mergers['min_acquiror_target_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],mergers['target_market_share_N_avg'])
        mergers = mergers.sort_values(['min_acquiror_target_share'],ascending=False)
        mergers = mergers.reset_index(drop=True)[:1]
        merged = mergers.merge(MA_one_reasons,on=['target','acquiror'])
        if len(merged)>=1:
            CSA_episodes.at[idx,'reasonMA_syn_cost'] = "True"
    
    CSA_episodes['reasonMA_diversify'] = "False"
    MA_one_reasons = MA_all_reasons[MA_all_reasons['reason']=='diversify revenue sources']
    for idx,row in CSA_episodes.iterrows():
        mergers = row['mergers']
        mergers['min_acquiror_target_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],mergers['target_market_share_N_avg'])
        mergers = mergers.sort_values(['min_acquiror_target_share'],ascending=False)
        mergers = mergers.reset_index(drop=True)[:1]
        merged = mergers.merge(MA_one_reasons,on=['target','acquiror'])
        if len(merged)>=1:
            CSA_episodes.at[idx,'reasonMA_diversify'] = "True"

    if episodes_file=='CSA_episodes_marketshare_N':
        CSA_episodes_marketshare_N = CSA_episodes.copy()
    if episodes_file=='CSA_episodes_impliedHHI_N':
        CSA_episodes_impliedHHI_N = CSA_episodes.copy()
    if episodes_file=='CSA_episodes_top5share_N':
        CSA_episodes_top5share_N = CSA_episodes.copy()


In [19]:
# Export share of M&As for each reason
n_total = len(MA_nonfound_reasons)+len(MA_all_reasons.drop_duplicates(['target','acquiror','year']))

with open('../Draft/nums/share_reasonMA_local_dom.tex','w') as file:
    file.write(str(round(np.sum(MA_all_reasons['reason']=='local dominance')/n_total*100,1)))
with open('../Draft/nums/share_reasonMA_expand_geo.tex','w') as file:
    file.write(str(round(np.sum(MA_all_reasons['reason']=='expand geographically')/n_total*100,1)))
with open('../Draft/nums/share_reasonMA_ind_dom.tex','w') as file:
    file.write(str(round(np.sum(MA_all_reasons['reason']=='industry dominance')/n_total*100,1)))
with open('../Draft/nums/share_reasonMA_syn_comb_lines.tex','w') as file:
    file.write(str(round(np.sum(MA_all_reasons['reason']=='synergy from combining business lines')/n_total*100,1)))
with open('../Draft/nums/share_reasonMA_fin_stress.tex','w') as file:
    file.write(str(round(np.sum(MA_all_reasons['reason']=='financial stress')/n_total*100,1)))
with open('../Draft/nums/share_reasonMA_syn_cost.tex','w') as file:
    file.write(str(round(np.sum(MA_all_reasons['reason']=='synergy from cost management')/n_total*100,1)))
with open('../Draft/nums/share_reasonMA_diversify.tex','w') as file:
    file.write(str(round(np.sum(MA_all_reasons['reason']=='diversify revenue sources')/n_total*100,1)))


### 3.2.5 Export a table of M&As used

In [20]:
%%script false --no-raise-error

# Export a table of M&As used
MAs_Used = []
for idx,row in CSA_episodes_impliedHHI_N.iterrows():
    MAs_Used = MAs_Used+[row['mergers'][['acquiror','target','sale_year']]]
MAs_Used = pd.concat(MAs_Used).drop_duplicates()
MAs_Used = MAs_Used.sort_values(['sale_year','acquiror'])
MAs_Used = MAs_Used.reset_index(drop=True)
MAs_Used['acquiror'] = MAs_Used['acquiror'].str.replace('&','\\&')
MAs_Used['target'] = MAs_Used['target'].str.replace('&','\\&')

# MA_SDC = pd.read_csv('SCRIPT_SDC_deals_cleaned.csv')
# MAs_Used = MAs_Used.merge(MA_SDC,on=['target','acquiror','sale_year'],how='outer',indicator=True)
# MAs_Used = MAs_Used[MAs_Used['_merge']=='left_only']
# MAs_Used = MAs_Used.drop(columns=['_merge'])

# Adjusting the format
MAs_Used['sale_year'] = MAs_Used['sale_year'].astype(int)
MAs_Used = MAs_Used.rename(columns={'acquiror':'Acquiror','target':'Target','sale_year':'Year'})
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('SHEARSONAMERICAN EXPRESS','SHEARSON/AMERICAN EXPRESS')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('SHEARSONAMERICAN EXPRESS','SHEARSON/AMERICAN EXPRESS')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.title()
MAs_Used['Target'] = MAs_Used['Target'].str.title()
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace(' Of ',' of ')
MAs_Used['Target'] = MAs_Used['Target'].str.replace(' Of ',' of ')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Ag Becker','AG Becker')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Ag Becker','AG Becker')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace("American Ntnl Bank \\& Tr","American National Bank \\& Trust",regex=False)
MAs_Used['Target'] = MAs_Used['Target'].str.replace("American Ntnl Bank \\& Tr","American National Bank \\& Trust",regex=False)
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Mcdonald','McDonald')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Mcdonald','McDonald')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Pnc Bank','PNC Bank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Pnc Bank','PNC Bank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Us Bank','US Bank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Us Bank','US Bank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Onbank','OnBank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Onbank','OnBank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Bb\\&T','BB\\&T',regex=False)
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Bb\\&T','BB\\&T',regex=False)
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Bok Financial','BOK Financial')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Bok Financial','BOK Financial')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Ubs Financial Services','UBS Financial Services')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Ubs Financial Services','UBS Financial Services')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Bb\\&T','BB\\&T',regex=False)
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Bb\\&T','BB\\&T',regex=False)
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Natwest Bank','NatWest Bank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Natwest Bank','NatWest Bank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('West One Bank Oregon Sb','West One Bank Oregon')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('West One Bank Oregon Sb','West One Bank Oregon')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Johnston Brown Barnett\\&Knight','Johnston Brown Barnett \\& Knight',regex=False)
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Johnston Brown Barnett\\&Knight','Johnston Brown Barnett \\& Knight',regex=False)
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Jp Morgan','JP Morgan')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Jp Morgan','JP Morgan')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Rbc Bank','RBC Bank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Rbc Bank','RBC Bank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Suntrust Bank','SunTrust Bank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Suntrust Bank','SunTrust Bank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Rrz Public Markets','RRZ Public Markets')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Rrz Public Markets','RRZ Public Markets')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Interstatejohnson Lane','Interstate/Johnson Lane',regex=False)
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Interstatejohnson Lane','Interstate/Johnson Lane',regex=False)
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Nationsbank','NationsBank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Nationsbank','NationsBank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Bankboston','BankBoston')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Bankboston','BankBoston')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Nbc Capital Markets Group','NBC Capital Markets Group')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Nbc Capital Markets Group','NBC Capital Markets Group')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Southtrust Securities','SouthTrust Securities')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Southtrust Securities','SouthTrust Securities')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Td Bank','TD Bank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Td Bank','TD Bank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Nationsbank','NationsBank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Nationsbank','NationsBank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Seattle Northwest Sec','Seattle Northwest Securities')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Seattle Northwest Sec','Seattle Northwest Securities')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Bmo Bank','BMO Bank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Bmo Bank','BMO Bank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Stonex Group','StoneX Group')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Stonex Group','StoneX Group')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Nbh Bank','NBH Bank')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Nbh Bank','NBH Bank')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Jjb Hilliard Wl Lyons','JJB Hilliard WL Lyons')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Jjb Hilliard Wl Lyons','JJB Hilliard WL Lyons')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Lj Hart','LJ Hart')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Lj Hart','LJ Hart')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Bbva Compass','BBVA Compass')
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Bbva Compass','BBVA Compass')
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Peoples United Bank',"People's United Bank")
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Peoples United Bank',"People's United Bank")
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Peoples National Bank',"People's National Bank")
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Peoples National Bank',"People's National Bank")
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Tgh Securities',"TGH Securities")
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Tgh Securities',"TGH Securities")
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Adams Mcentee',"Adams McEntee")
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Adams Mcentee',"Adams McEntee")
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Mbank Capital Mkts Dallasna',"MBank Capital Mkts Dallas NA")
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Mbank Capital Mkts Dallasna',"MBank Capital Mkts Dallas NA")
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Raffenspergerhughes & Coinc',"Raffensperger Hughes & Co Inc")
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Raffenspergerhughes & Coinc',"Raffensperger Hughes & Co Inc")
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Corestates Bank',"CoreStates Bank")
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Corestates Bank',"CoreStates Bank")
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Piper Sandler',"Piper Jaffray")
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Piper Sandler',"Piper Jaffray")
MAs_Used['Acquiror'] = MAs_Used['Acquiror'].str.replace('Parkerhunter',"Parker Hunter")
MAs_Used['Target'] = MAs_Used['Target'].str.replace('Parkerhunter',"Parker Hunter")

MAs_Used_part1 = MAs_Used[:45]
MAs_Used_part2 = MAs_Used[45:90]
MAs_Used_part3 = MAs_Used[90:135]
MAs_Used_part4 = MAs_Used[135:]

latex_table = MAs_Used_part1.style.hide(axis="index").to_latex(hrules=True)
with open('../Draft/tabs/MAs_Used_part1.tex', 'w') as f:
    f.write(latex_table)

latex_table = MAs_Used_part2.style.hide(axis="index").to_latex(hrules=True)
with open('../Draft/tabs/MAs_Used_part2.tex', 'w') as f:
    f.write(latex_table)

latex_table = MAs_Used_part3.style.hide(axis="index").to_latex(hrules=True)
with open('../Draft/tabs/MAs_Used_part3.tex', 'w') as f:
    f.write(latex_table)

latex_table = MAs_Used_part4.style.hide(axis="index").to_latex(hrules=True)
with open('../Draft/tabs/MAs_Used_part4.tex', 'w') as f:
    f.write(latex_table)


### 3.2.6 Create a plot of map of treatments

In [31]:
%%script false --no-raise-error

treat_frequency = pd.DataFrame(CSA_episodes_impliedHHI_N.value_counts(['CSA Code'])).reset_index()
treat_frequency = treat_frequency.rename(columns={0:'frequency'})

CSA_episodes_impliedHHI_N = CSA_episodes_impliedHHI_N.merge(treat_frequency,on=['CSA Code'])

# Read the US states GeoJSON file
gdf = gpd.read_file('../RawData/MSA/US-counties.geojson')
gdf = gdf[gdf['STATE']!='02']
gdf = gdf[gdf['STATE']!='15']
gdf = gdf[gdf['STATE']!='72']
gdf = gdf.rename(columns={'STATE':'FIPS State Code','COUNTY':'FIPS County Code'})
gdf['FIPS State Code'] = gdf['FIPS State Code'].astype(int)
gdf['FIPS County Code'] = gdf['FIPS County Code'].astype(int)

treat_frequency = treat_frequency.merge(CBSAData[['CSA Code','FIPS State Code','FIPS County Code']],
    on=['CSA Code'])
treat_frequency = gdf.merge(treat_frequency,on=['FIPS State Code','FIPS County Code'],how='outer')
treat_frequency.loc[pd.isnull(treat_frequency['frequency']),'frequency'] = 0

fig, ax = plt.subplots(1, 1, figsize=(12, 8))

color_dict = {0:'azure', 1:"#fef0d9", 2:"#fdcc8a", 3:"#fc8d59", 4:"#e34a33", 5:"#b30000"}

# Merge the GeoDataFrame with data
treat_frequency.plot(ax=ax, column='frequency', cmap=colors.ListedColormap(list(color_dict.values())), edgecolor='0.9', legend=False)
legend_labels = [
    'Treated 1 Time',
    'Treated 2 Times',
    'Treated 3 Times',
    'Treated 4 Times',
    'Treated 5 Times',
    ]
legend_handles = [
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor="#fef0d9"),
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor="#fdcc8a"),
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor="#fc8d59"),
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor="#e34a33"),
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor="#b30000"),
    ]
ax.legend(legend_handles, legend_labels,loc='lower center',bbox_to_anchor=(0.5, -0.15),ncol=3,fontsize='11.5')
ax.axis("off")
warnings.filterwarnings("ignore", category=DeprecationWarning)
fig.savefig('../Draft/figs/TreatedFrequency.eps', format='eps', bbox_inches='tight')


### 3.2.7 Illustrate the idea of Sunderam and Scharfstein

In [105]:
%%script false --no-raise-error

# M&As that are most frequently involved
mergers = pd.DataFrame()
for idx,row in CSA_episodes_impliedHHI_N.iterrows():
    mergers = pd.concat([mergers,row['mergers']])
mergers_ranked = mergers.value_counts(['acquiror','target','sale_year']).reset_index()

# Pick out a case
mergers_onecase = mergers[(mergers['acquiror']=='RBC BANK')&(mergers['target']=='DAIN BOSWORTH')]
mergers_onecase = mergers_onecase[['CSA Code','sale_year','acquiror','target']]
mergers_onecase['acquiror_share'] = None
mergers_onecase['target_share'] = None

for idx,row in mergers_onecase.iterrows():

    acquiror_share = csa_share_withinbank[
        (csa_share_withinbank['underwriter']==row['acquiror'])&
        (csa_share_withinbank['CSA Code']==row['CSA Code'])&
        (csa_share_withinbank['year']<=row['sale_year'])&
        (csa_share_withinbank['year']>=row['sale_year']-5)
        ].reset_index()
    acquiror_share = np.mean(acquiror_share['csa_share'])
    mergers_onecase.at[idx,'acquiror_share'] = acquiror_share

    target_share = csa_share_withinbank[
        (csa_share_withinbank['underwriter']==row['target'])&
        (csa_share_withinbank['CSA Code']==row['CSA Code'])&
        (csa_share_withinbank['year']<=row['sale_year'])&
        (csa_share_withinbank['year']>=row['sale_year']-5)
        ].reset_index()
    target_share = np.mean(target_share['csa_share'])
    mergers_onecase.at[idx,'target_share'] = target_share

mergers_onecase = mergers_onecase.sort_values(['acquiror_share','target_share'],ascending=False)

# Merge in CSA name
CSAs = CBSAData[['CSA Code','CSA Title']].drop_duplicates(['CSA Code'])
mergers_onecase = mergers_onecase.merge(CSAs,on=['CSA Code'])


In [104]:
%%script false --no-raise-error

table = '../Draft/tabs/Sunderam_illustrate.tex'
try:
    os.remove(table)
except:
    pass
with open(table, 'w') as f:
    f.write('\\begin{tabular}{lccc} \n \\toprule \n')
    f.write('& Significance of CSA & Significance of CSA \\\\ \n')
    f.write('CSA & for RBC Bank & for Dain Bosworth \\\\ \n')
    f.write('\\hline \n \\\\ \n')

    f.write(mergers_onecase['CSA Title'][0])
    f.write('&'+str(format(mergers_onecase['acquiror_share'][0]*100,'.1f'))+'\\%')
    f.write('&'+str(format(mergers_onecase['target_share'][0]*100,'.1f'))+'\\%'+'\\\\ \n')

    f.write(mergers_onecase['CSA Title'][1])
    f.write('&'+str(format(mergers_onecase['acquiror_share'][1]*100,'.1f'))+'\\%')
    f.write('&'+str(format(mergers_onecase['target_share'][1]*100,'.1f'))+'\\%'+'\\\\ \n')

    f.write(mergers_onecase['CSA Title'][2])
    f.write('&'+str(format(mergers_onecase['acquiror_share'][2]*100,'.1f'))+'\\%')
    f.write('&'+str(format(mergers_onecase['target_share'][2]*100,'.1f'))+'\\%'+'\\\\ \n')

    f.write(mergers_onecase['CSA Title'][3])
    f.write('&'+str(format(mergers_onecase['acquiror_share'][3]*100,'.1f'))+'\\%')
    f.write('&'+str(format(mergers_onecase['target_share'][3]*100,'.1f'))+'\\%'+'\\\\ \n')

    f.write(mergers_onecase['CSA Title'][6])
    f.write('&'+str(format(mergers_onecase['acquiror_share'][6]*100,'.1f'))+'\\%')
    f.write('&'+str(format(mergers_onecase['target_share'][6]*100,'.1f'))+'\\%'+'\\\\ \n')

    f.write(mergers_onecase['CSA Title'][10])
    f.write('&'+str(format(mergers_onecase['acquiror_share'][10]*100,'.1f'))+'\\%')
    f.write('&'+str(format(mergers_onecase['target_share'][10]*100,'.1f'))+'\\%'+'\\\\ \n')

    f.write('\\bottomrule \n')
    f.write('\\end{tabular}')


### 3.2.8 Mark out if affected market neighbours major markets of merging underwriter

In [109]:
CSA_Neighboring = pd.read_csv('../CleanData/Demographics/CSA_Neighboring.csv')

# Add the maximum share of the treated CSA within the acquiror or the target firm's portfolio
CSA_episodes_impliedHHI_N['max_acquiror_weight_in_neighbour'] = None
CSA_episodes_impliedHHI_N['max_target_weight_in_neighbour'] = None

for idx,row in CSA_episodes_impliedHHI_N.iterrows():

    # Get neighbours
    neighbours = list(CSA_Neighboring[CSA_Neighboring['CSA Code A']==row['CSA Code']]['CSA Code B'])

    # Weight of acquiror
    mergers = row['mergers'].reset_index(drop=True)
    mergers['acquiror_weight_in_neighbour'] = None
    for sub_idx,sub_row in mergers.iterrows():
        acquiror_weight = csa_share_withinbank[
            (csa_share_withinbank['underwriter']==sub_row['acquiror_parent'])&
            (csa_share_withinbank['CSA Code'].isin(neighbours))&
            (csa_share_withinbank['year']==sub_row['sale_year'])]
        acquiror_weight = acquiror_weight.reset_index(drop=True)
        if len(acquiror_weight):
            mergers.loc[sub_idx,'acquiror_weight_in_neighbour'] = np.max(acquiror_weight['csa_share'])
    CSA_episodes_impliedHHI_N.at[idx,'max_acquiror_weight_in_neighbour'] = np.max(mergers['acquiror_weight_in_neighbour'])

    # Weight of target
    mergers = row['mergers'].reset_index(drop=True)
    mergers['target_weight_in_neighbour'] = None
    for sub_idx,sub_row in mergers.iterrows():
        acquiror_weight = csa_share_withinbank[
            (csa_share_withinbank['underwriter']==sub_row['target_parent'])&
            (csa_share_withinbank['CSA Code'].isin(neighbours))&
            (csa_share_withinbank['year']==sub_row['sale_year'])]
        acquiror_weight = acquiror_weight.reset_index(drop=True)
        if len(acquiror_weight)>0:
            mergers.loc[sub_idx,'target_weight_in_neighbour'] = np.max(acquiror_weight['csa_share'])
    CSA_episodes_impliedHHI_N.at[idx,'max_target_weight_in_neighbour'] = np.max(mergers['target_weight_in_neighbour'])


# 4. Placebo Tests, Using CSA

## 4.1 Cross-market M&A

### 4.1.1 Randomly pick one

In [22]:
# Fix random seed
np.random.seed(314)

CSA_episodes_impliedHHI_AcrossMarket = []

for idx,row in CSA_episodes_impliedHHI_N.iterrows():

    # Find the "representative" single M&A of this episode
    mergers = row['mergers']
    mergers['min_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],
        mergers['target_market_share_N_avg']+\
        mergers['other_targets_market_share_N_avg'])
    mergers = mergers.sort_values('min_share')
    repre_merger = mergers[-1:].reset_index(drop=True)

    # Find a placebo CSA for both treated and control

    #---------------------#
    # First, for acquiror #
    #---------------------#
    
    # Get other CSAs where involved firm has market share>10% (a flexible threshold)
    market_share = market_share_all_markets_byCSA[
        (market_share_all_markets_byCSA['parent_name']==repre_merger['acquiror_parent'][0])&
        (market_share_all_markets_byCSA['calendar_year']<=repre_merger['sale_year'][0])&
        (market_share_all_markets_byCSA['calendar_year']>=repre_merger['sale_year'][0]-3)
        ].sort_values(['CSA Code','calendar_year'])
    market_share = market_share.groupby(['CSA Code']).agg({'market_share_N':'mean'}).reset_index()
    market_share = market_share[market_share['market_share_N']>0.1]

    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    market_share['if_also_withinMA'] = False
    for sub_idx,sub_row in market_share.iterrows():

        # M&As in a candidate placebo CSA in [-4,+4]
        CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==sub_row['CSA Code']]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['sale_year']>=repre_merger['sale_year'][0]-1)&
            (CSA_affected_part['sale_year']<=repre_merger['sale_year'][0]+5)
            ]
        CSA_affected_episode = CSA_affected_part.copy()
        CSA_affected_episode = CSA_affected_episode[(CSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CSA_affected_episode)>0:
            market_share.at[sub_idx,'if_also_withinMA'] = True

    market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
    # Randomly pick such a CSA. If there are too many potential candidates, narrow the thresholds above
    if len(market_share)>0:
        market_share = market_share[np.logical_not(market_share['if_also_withinMA'])]
        if len(market_share)>0:
            CSA_episodes_impliedHHI_AcrossMarket = CSA_episodes_impliedHHI_AcrossMarket+\
                [{'CSA Code':np.random.choice(market_share['CSA Code']),'episode_start_year':repre_merger['sale_year'][0]}]

    #------------------#
    # Next, for target #
    #------------------#
    
    # Get other CSAs where involved firm has market share>10% (a flexible threshold)
    market_share = market_share_all_markets_byCSA[
        (market_share_all_markets_byCSA['parent_name']==repre_merger['target_parent'][0])&
        (market_share_all_markets_byCSA['calendar_year']<=repre_merger['sale_year'][0])&
        (market_share_all_markets_byCSA['calendar_year']>=repre_merger['sale_year'][0]-3)
        ].sort_values(['CSA Code','calendar_year'])
    market_share = market_share.groupby(['CSA Code']).agg({'market_share_N':'mean'}).reset_index()
    market_share = market_share[market_share['market_share_N']>0.1]

    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    market_share['if_also_withinMA'] = False
    for sub_idx,sub_row in market_share.iterrows():

        # M&As in a candidate placebo CSA in [-4,+4]
        CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==sub_row['CSA Code']]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['sale_year']>=repre_merger['sale_year'][0]-1)&
            (CSA_affected_part['sale_year']<=repre_merger['sale_year'][0]+5)
            ]
        CSA_affected_episode = CSA_affected_part.copy()
        CSA_affected_episode = CSA_affected_episode[(CSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CSA_affected_episode)>0:
            market_share.at[sub_idx,'if_also_withinMA'] = True

    market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
    # Randomly pick such a CSA. If there are too many potential candidates, narrow the thresholds above
    if len(market_share)>0:
        market_share = market_share[np.logical_not(market_share['if_also_withinMA'])]
        if len(market_share)>0:
            CSA_episodes_impliedHHI_AcrossMarket = CSA_episodes_impliedHHI_AcrossMarket+\
                [{'CSA Code':np.random.choice(market_share['CSA Code']),'episode_start_year':repre_merger['sale_year'][0]}]

CSA_episodes_impliedHHI_AcrossMarket = pd.DataFrame(CSA_episodes_impliedHHI_AcrossMarket)

### 4.1.2 Randomly pick one, without requiring market share

In [23]:
# Fix random seed
np.random.seed(314)

CSA_episodes_impliedHHI_AcrossMarket_AnyShare = []

for idx,row in CSA_episodes_impliedHHI_N.iterrows():

    # Find the "representative" single M&A of this episode
    mergers = row['mergers']
    mergers['min_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],
        mergers['target_market_share_N_avg']+\
        mergers['other_targets_market_share_N_avg'])
    mergers = mergers.sort_values('min_share')
    repre_merger = mergers[-1:].reset_index(drop=True)

    # Find a placebo CSA for both treated and control

    #---------------------#
    # First, for acquiror #
    #---------------------#
    
    # Get other CSAs where involved firm has market share>0% (a flexible threshold)
    market_share = market_share_all_markets_byCSA[
        (market_share_all_markets_byCSA['parent_name']==repre_merger['acquiror_parent'][0])&
        (market_share_all_markets_byCSA['calendar_year']<=repre_merger['sale_year'][0])&
        (market_share_all_markets_byCSA['calendar_year']>=repre_merger['sale_year'][0]-3)
        ].sort_values(['CSA Code','calendar_year'])
    market_share = market_share.groupby(['CSA Code']).agg({'market_share_N':'mean'}).reset_index()
    market_share = market_share[market_share['market_share_N']>0]

    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    market_share['if_also_withinMA'] = False
    for sub_idx,sub_row in market_share.iterrows():

        # M&As in a candidate placebo CSA in [-4,+4]
        CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==sub_row['CSA Code']]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['sale_year']>=repre_merger['sale_year'][0]-1)&
            (CSA_affected_part['sale_year']<=repre_merger['sale_year'][0]+5)
            ]
        CSA_affected_episode = CSA_affected_part.copy()
        CSA_affected_episode = CSA_affected_episode[(CSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CSA_affected_episode)>0:
            market_share.at[sub_idx,'if_also_withinMA'] = True

    market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
    # Randomly pick such a CSA. If there are too many potential candidates, narrow the thresholds above
    if len(market_share)>0:
        market_share = market_share[np.logical_not(market_share['if_also_withinMA'])]
        if len(market_share)>0:
            CSA_episodes_impliedHHI_AcrossMarket_AnyShare = CSA_episodes_impliedHHI_AcrossMarket_AnyShare+\
                [{'CSA Code':np.random.choice(market_share['CSA Code']),'episode_start_year':repre_merger['sale_year'][0]}]

    #------------------#
    # Next, for target #
    #------------------#
    
    # Get other CSAs where involved firm has market share>0% (a flexible threshold)
    market_share = market_share_all_markets_byCSA[
        (market_share_all_markets_byCSA['parent_name']==repre_merger['target_parent'][0])&
        (market_share_all_markets_byCSA['calendar_year']<=repre_merger['sale_year'][0])&
        (market_share_all_markets_byCSA['calendar_year']>=repre_merger['sale_year'][0]-3)
        ].sort_values(['CSA Code','calendar_year'])
    market_share = market_share.groupby(['CSA Code']).agg({'market_share_N':'mean'}).reset_index()
    market_share = market_share[market_share['market_share_N']>0]

    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    market_share['if_also_withinMA'] = False
    for sub_idx,sub_row in market_share.iterrows():

        # M&As in a candidate placebo CSA in [-4,+4]
        CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==sub_row['CSA Code']]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['sale_year']>=repre_merger['sale_year'][0]-1)&
            (CSA_affected_part['sale_year']<=repre_merger['sale_year'][0]+5)
            ]
        CSA_affected_episode = CSA_affected_part.copy()
        CSA_affected_episode = CSA_affected_episode[(CSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CSA_affected_episode)>0:
            market_share.at[sub_idx,'if_also_withinMA'] = True

    market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
    # Randomly pick such a CSA. If there are too many potential candidates, narrow the thresholds above
    if len(market_share)>0:
        market_share = market_share[np.logical_not(market_share['if_also_withinMA'])]
        if len(market_share)>0:
            CSA_episodes_impliedHHI_AcrossMarket_AnyShare = CSA_episodes_impliedHHI_AcrossMarket_AnyShare+\
                [{'CSA Code':np.random.choice(market_share['CSA Code']),'episode_start_year':repre_merger['sale_year'][0]}]

CSA_episodes_impliedHHI_AcrossMarket_AnyShare = pd.DataFrame(CSA_episodes_impliedHHI_AcrossMarket_AnyShare)

### 4.1.3 Pick one with closest population size 

In [24]:

CSA_episodes_impliedHHI_AcrossMarket_SamePop = []

for idx,row in CSA_episodes_impliedHHI_N.iterrows():

    # Find the "representative" single M&A of this episode
    mergers = row['mergers']
    mergers['min_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],
        mergers['target_market_share_N_avg']+\
        mergers['other_targets_market_share_N_avg'])
    mergers = mergers.sort_values('min_share')
    repre_merger = mergers[-1:].reset_index(drop=True)

    # Find a placebo CSA for both treated and control

    #---------------------#
    # First, for acquiror #
    #---------------------#
    
    # Get other CSAs where involved firm has market share>10% (a flexible threshold)
    market_share = market_share_all_markets_byCSA[
        (market_share_all_markets_byCSA['parent_name']==repre_merger['acquiror_parent'][0])&
        (market_share_all_markets_byCSA['calendar_year']<=repre_merger['sale_year'][0])&
        (market_share_all_markets_byCSA['calendar_year']>=repre_merger['sale_year'][0]-3)
        ].sort_values(['CSA Code','calendar_year'])
    market_share = market_share.groupby(['CSA Code']).agg({'market_share_N':'mean'}).reset_index()
    market_share = market_share[market_share['market_share_N']>0.1]

    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    market_share['if_also_withinMA'] = False
    for sub_idx,sub_row in market_share.iterrows():

        # M&As in a candidate placebo CSA in [-4,+4]
        CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==sub_row['CSA Code']]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['sale_year']>=repre_merger['sale_year'][0]-1)&
            (CSA_affected_part['sale_year']<=repre_merger['sale_year'][0]+5)
            ]
        CSA_affected_episode = CSA_affected_part.copy()
        CSA_affected_episode = CSA_affected_episode[(CSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CSA_affected_episode)>0:
            market_share.at[sub_idx,'if_also_withinMA'] = True

    market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
    # Randomly pick such a CSA. If there are too many potential candidates, narrow the thresholds above
    if len(market_share)>0:
        market_share = market_share[np.logical_not(market_share['if_also_withinMA'])]
        # Get dif. in population size
        market_share = market_share.copy()
        market_share['year'] = repre_merger['sale_year'][0]
        market_share['focal_CSA'] = row['CSA Code']
        market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
        market_share = market_share.merge(CSA_POP[['year','CSA Code','pop']],on=['CSA Code','year'])
        market_share = market_share.rename(columns={'pop':'pop_candidate'})
        market_share = market_share.merge(CSA_POP[['year','CSA Code','pop']].\
            rename(columns={'CSA Code':'focal_CSA'}),on=['focal_CSA','year'])
        market_share = market_share.rename(columns={'pop':'pop_focal'})
        market_share['pop_diff'] = np.absolute(market_share['pop_focal']-market_share['pop_candidate'])
        market_share = market_share.sort_values(['pop_diff']).reset_index(drop=True)
        # Drop if difference is too big
        # market_share['pop_diff_ratio'] = market_share['pop_diff']/market_share['pop_focal']
        # market_share = market_share[market_share['pop_diff_ratio']<0.5]
        if len(market_share)>0:
            CSA_episodes_impliedHHI_AcrossMarket_SamePop = CSA_episodes_impliedHHI_AcrossMarket_SamePop+\
                [{'CSA Code':market_share['CSA Code'][0],'episode_start_year':repre_merger['sale_year'][0]}]

    #------------------#
    # Next, for target #
    #------------------#
    
    # Get other CSAs where involved firm has market share>10% (a flexible threshold)
    market_share = market_share_all_markets_byCSA[
        (market_share_all_markets_byCSA['parent_name']==repre_merger['target_parent'][0])&
        (market_share_all_markets_byCSA['calendar_year']<=repre_merger['sale_year'][0])&
        (market_share_all_markets_byCSA['calendar_year']>=repre_merger['sale_year'][0]-3)
        ].sort_values(['CSA Code','calendar_year'])
    market_share = market_share.groupby(['CSA Code']).agg({'market_share_N':'mean'}).reset_index()
    market_share = market_share[market_share['market_share_N']>0.1]

    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    market_share['if_also_withinMA'] = False
    for sub_idx,sub_row in market_share.iterrows():

        # M&As in a candidate placebo CSA in [-4,+4]
        CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==sub_row['CSA Code']]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['sale_year']>=repre_merger['sale_year'][0]-1)&
            (CSA_affected_part['sale_year']<=repre_merger['sale_year'][0]+5)
            ]
        CSA_affected_episode = CSA_affected_part.copy()
        CSA_affected_episode = CSA_affected_episode[(CSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CSA_affected_episode)>0:
            market_share.at[sub_idx,'if_also_withinMA'] = True

    market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
    # Randomly pick such a CSA. If there are too many potential candidates, narrow the thresholds above
    if len(market_share)>0:
        market_share = market_share[np.logical_not(market_share['if_also_withinMA'])]
        # Get dif. in population size
        market_share = market_share.copy()
        market_share['year'] = repre_merger['sale_year'][0]
        market_share['focal_CSA'] = row['CSA Code']
        market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
        market_share = market_share.merge(CSA_POP[['year','CSA Code','pop']],on=['CSA Code','year'])
        market_share = market_share.rename(columns={'pop':'pop_candidate'})
        market_share = market_share.merge(CSA_POP[['year','CSA Code','pop']].\
            rename(columns={'CSA Code':'focal_CSA'}),on=['focal_CSA','year'])
        market_share = market_share.rename(columns={'pop':'pop_focal'})
        market_share['pop_diff'] = np.absolute(market_share['pop_focal']-market_share['pop_candidate'])
        market_share = market_share.sort_values(['pop_diff']).reset_index(drop=True)
        # Drop if difference is too big
        # market_share['pop_diff_ratio'] = market_share['pop_diff']/market_share['pop_focal']
        # market_share = market_share[market_share['pop_diff_ratio']<0.5]
        if len(market_share)>0:
            CSA_episodes_impliedHHI_AcrossMarket_SamePop = CSA_episodes_impliedHHI_AcrossMarket_SamePop+\
                [{'CSA Code':market_share['CSA Code'][0],'episode_start_year':repre_merger['sale_year'][0]}]

CSA_episodes_impliedHHI_AcrossMarket_SamePop = pd.DataFrame(CSA_episodes_impliedHHI_AcrossMarket_SamePop)

### 4.1.4 Pick one with closest population size, without requiring market share

In [25]:
CSA_episodes_impliedHHI_AcrossMarket_SamePop_AnyShare = []

for idx,row in CSA_episodes_impliedHHI_N.iterrows():

    # Find the "representative" single M&A of this episode
    mergers = row['mergers']
    mergers['min_share'] = np.minimum(mergers['acquiror_market_share_N_avg'],
        mergers['target_market_share_N_avg']+\
        mergers['other_targets_market_share_N_avg'])
    mergers = mergers.sort_values('min_share')
    repre_merger = mergers[-1:].reset_index(drop=True)

    # Find a placebo CSA for both treated and control

    #---------------------#
    # First, for acquiror #
    #---------------------#
    
    # Get other CSAs where involved firm has market share>0% (a flexible threshold)
    market_share = market_share_all_markets_byCSA[
        (market_share_all_markets_byCSA['parent_name']==repre_merger['acquiror_parent'][0])&
        (market_share_all_markets_byCSA['calendar_year']<=repre_merger['sale_year'][0])&
        (market_share_all_markets_byCSA['calendar_year']>=repre_merger['sale_year'][0]-3)
        ].sort_values(['CSA Code','calendar_year'])
    market_share = market_share.groupby(['CSA Code']).agg({'market_share_N':'mean'}).reset_index()
    market_share = market_share[market_share['market_share_N']>0]

    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    market_share['if_also_withinMA'] = False
    for sub_idx,sub_row in market_share.iterrows():

        # M&As in a candidate placebo CSA in [-4,+4]
        CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==sub_row['CSA Code']]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['sale_year']>=repre_merger['sale_year'][0]-1)&
            (CSA_affected_part['sale_year']<=repre_merger['sale_year'][0]+5)
            ]
        CSA_affected_episode = CSA_affected_part.copy()
        CSA_affected_episode = CSA_affected_episode[(CSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CSA_affected_episode)>0:
            market_share.at[sub_idx,'if_also_withinMA'] = True

    market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
    # Randomly pick such a CSA. If there are too many potential candidates, narrow the thresholds above
    if len(market_share)>0:
        market_share = market_share[np.logical_not(market_share['if_also_withinMA'])]
        # Get dif. in population size
        market_share = market_share.copy()
        market_share['year'] = repre_merger['sale_year'][0]
        market_share['focal_CSA'] = row['CSA Code']
        market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
        market_share = market_share.merge(CSA_POP[['year','CSA Code','pop']],on=['CSA Code','year'])
        market_share = market_share.rename(columns={'pop':'pop_candidate'})
        market_share = market_share.merge(CSA_POP[['year','CSA Code','pop']].\
            rename(columns={'CSA Code':'focal_CSA'}),on=['focal_CSA','year'])
        market_share = market_share.rename(columns={'pop':'pop_focal'})
        market_share['pop_diff'] = np.absolute(market_share['pop_focal']-market_share['pop_candidate'])
        market_share = market_share.sort_values(['pop_diff']).reset_index(drop=True)
        # Drop if difference is too big
        # market_share['pop_diff_ratio'] = market_share['pop_diff']/market_share['pop_focal']
        # market_share = market_share[market_share['pop_diff_ratio']<0.5]
        if len(market_share)>0:
            CSA_episodes_impliedHHI_AcrossMarket_SamePop_AnyShare = CSA_episodes_impliedHHI_AcrossMarket_SamePop_AnyShare+\
                [{'CSA Code':market_share['CSA Code'][0],'episode_start_year':repre_merger['sale_year'][0]}]

    #------------------#
    # Next, for target #
    #------------------#
    
    # Get other CSAs where involved firm has market share>0% (a flexible threshold)
    market_share = market_share_all_markets_byCSA[
        (market_share_all_markets_byCSA['parent_name']==repre_merger['target_parent'][0])&
        (market_share_all_markets_byCSA['calendar_year']<=repre_merger['sale_year'][0])&
        (market_share_all_markets_byCSA['calendar_year']>=repre_merger['sale_year'][0]-3)
        ].sort_values(['CSA Code','calendar_year'])
    market_share = market_share.groupby(['CSA Code']).agg({'market_share_N':'mean'}).reset_index()
    market_share = market_share[market_share['market_share_N']>0]

    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    market_share['if_also_withinMA'] = False
    for sub_idx,sub_row in market_share.iterrows():

        # M&As in a candidate placebo CSA in [-4,+4]
        CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==sub_row['CSA Code']]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['sale_year']>=repre_merger['sale_year'][0]-1)&
            (CSA_affected_part['sale_year']<=repre_merger['sale_year'][0]+5)
            ]
        CSA_affected_episode = CSA_affected_part.copy()
        CSA_affected_episode = CSA_affected_episode[(CSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CSA_affected_episode)>0:
            market_share.at[sub_idx,'if_also_withinMA'] = True

    market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
    # Randomly pick such a CSA. If there are too many potential candidates, narrow the thresholds above
    if len(market_share)>0:
        market_share = market_share[np.logical_not(market_share['if_also_withinMA'])]
        # Get dif. in population size
        market_share = market_share.copy()
        market_share['year'] = repre_merger['sale_year'][0]
        market_share['focal_CSA'] = row['CSA Code']
        market_share = market_share[market_share['CSA Code']!=row['CSA Code']]
        market_share = market_share.merge(CSA_POP[['year','CSA Code','pop']],on=['CSA Code','year'])
        market_share = market_share.rename(columns={'pop':'pop_candidate'})
        market_share = market_share.merge(CSA_POP[['year','CSA Code','pop']].\
            rename(columns={'CSA Code':'focal_CSA'}),on=['focal_CSA','year'])
        market_share = market_share.rename(columns={'pop':'pop_focal'})
        market_share['pop_diff'] = np.absolute(market_share['pop_focal']-market_share['pop_candidate'])
        market_share = market_share.sort_values(['pop_diff']).reset_index(drop=True)
        # Drop if difference is too big
        # market_share['pop_diff_ratio'] = market_share['pop_diff']/market_share['pop_focal']
        # market_share = market_share[market_share['pop_diff_ratio']<0.5]
        if len(market_share)>0:
            CSA_episodes_impliedHHI_AcrossMarket_SamePop_AnyShare = CSA_episodes_impliedHHI_AcrossMarket_SamePop_AnyShare+\
                [{'CSA Code':market_share['CSA Code'][0],'episode_start_year':repre_merger['sale_year'][0]}]

CSA_episodes_impliedHHI_AcrossMarket_SamePop_AnyShare = pd.DataFrame(CSA_episodes_impliedHHI_AcrossMarket_SamePop_AnyShare)

## 4.2 Withdrawn M&A

In [26]:
MA_withdrawn = pd.read_csv("../CleanData/SDC/0I_MA_withdrawn.csv")
MA_withdrawn = MA_withdrawn.rename(columns={'announce_year':'sale_year'})[['target','acquiror','sale_year']]

### 4.2.1 Find CSA X Year that could be affected by withdrawn merger

In [27]:

# Go over each merger. Check the CSAs affected by the merger (i.e., either side has business in the CSA in the year prior 
# to the merger). Check if the merger affects just one underwriter or affects multiple underwriters in this CSA.

# Note that for the column "market share of other targets", the optimal object to put there is the market share of the other target
# alone. Here I am instead putting in market share of the other target's parent. This should make a minimal difference.

name_GPF_colnames = ['name_GPF_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]

def proc_list(MA_withdrawn_frag):

    CSA_affected_withdrawn = []
    MA_withdrawn_frag = MA_withdrawn_frag.reset_index(drop=True)
    
    for idx,row in MA_withdrawn_frag.iterrows():
        
        # Find CSAs that this merger affects
        # Determine if an underwriter is active in an CSA based on activity of PRIOR years
        GPF_prioryears = GPF[(GPF['sale_year']>=row['sale_year']-3)&(GPF['sale_year']<=row['sale_year']-1)]
    
        # Also check other targets of the acquiror in that year. This accounts for cases where post merger the new formed entity
        # is new and appear as a name that was not in the sample before. Note that here "MA_frag" cannot be used or the other firm
        # involved in the merger will be missed. Instead, use the whole sample "MA"
        other_targets = \
            list(MA_withdrawn[(MA_withdrawn['acquiror']==row['acquiror'])&
            (MA_withdrawn['sale_year']==row['sale_year'])&
            (MA_withdrawn['target']!=row['target'])]['target'])
        
        for CSA in list(GPF_prioryears['CSA Code'].unique()):
    
            GPF_prioryears_oneCSA = GPF_prioryears[GPF_prioryears['CSA Code']==CSA]
    
            # Underwriters in this state
            underwriters_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCSA[name_GPF_colnames]))))
            underwriters_priorMA = [item for item in underwriters_priorMA if item!=None]
            underwriters_priorMA = list(set(underwriters_priorMA))
            # Parents of underwriters in this state
            parents_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCSA[parent_name_colnames]))))
            parents_priorMA = [item for item in parents_priorMA if item!=None]
            parents_priorMA = list(set(parents_priorMA))
            # Subsidiaries of parents in this state (using data of PRIOR year)
            subsidiaries_priorMA = list(GPF_names[
                (GPF_names['parent_name'].isin(parents_priorMA))&
                (GPF_names['sale_year']>=row['sale_year']-3)&
                (GPF_names['sale_year']<=row['sale_year']-1)]['name_GPF'])
    
            # Determine if merger affects the CSA, and if both sides have business
            IF_acquiror_active = None
            IF_target_active = None
            IF_other_target_active = None
            if (row['acquiror'] in parents_priorMA) or (row['acquiror'] in underwriters_priorMA) or (row['acquiror'] in subsidiaries_priorMA):
                IF_acquiror_active = True
            if (row['target'] in parents_priorMA) or (row['target'] in underwriters_priorMA) or (row['target'] in subsidiaries_priorMA):
                IF_target_active = True
            for other_target in other_targets:
                if (other_target in parents_priorMA) or (other_target in underwriters_priorMA):
                    IF_other_target_active = True
    
            # Get market share of merged banks. Note that this is the market share in the years prior to M&A. Also note that market 
            # share "market_share_all_markets_byCSA" is calculated at the parent level. There are many cases where market share of a
            # firm in an area is unavailable, which is because of no presence.
    
    
    
            #-------------------------#
            # Market share by N deals #
            #-------------------------#
    
            # (1) Market share of acquiror
            # Determine parent of target, as "market_share_all_markets_byCSA" is at parent level
            try:
                # Situation where acquiror is a subsidiary or standalone firm whose parent is itself. Extract its parent
                acquiror_parent = GPF_names[(GPF_names['name_GPF']==row['acquiror'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                # Situation where acquiror is a parent
                acquiror_parent = row['acquiror']
            try:
                acquiror_market_share_N_m1 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m1 = 0
            try:
                acquiror_market_share_N_m2 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m2 = 0
            try:
                acquiror_market_share_N_m3 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m3 = 0
    
            # (2) Market share of target
            try:
                # Note that I must use "GPF_names" (the parent-subsidiary) mapping use the year(s) prior to the MA
                target_parent = GPF_names[(GPF_names['name_GPF']==row['target'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                target_parent = row['target']
            try:
                target_market_share_N_m1 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m1 = 0
            try:
                target_market_share_N_m2 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m2 = 0
            try:
                target_market_share_N_m3 = \
                    market_share_all_markets_byCSA[
                    (market_share_all_markets_byCSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCSA['CSA Code']==CSA)
                    &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m3 = 0
    
            # (3) Market share of other targets in the same transaction
            # Account for possibility that other targets can be either a parent or a standalone firm
            other_targets_parents = \
                list(GPF_names[(GPF_names['name_GPF'].isin(other_targets))
                &(GPF_names['sale_year']==row['sale_year']-1)]['parent_name'])+\
                list(other_targets)
            other_targets_parents = list(set(other_targets_parents))
    
            other_targets_market_share_N = \
                market_share_all_markets_byCSA[
                (market_share_all_markets_byCSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCSA['CSA Code']==CSA)
                &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-1)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m1 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m1 = 0
    
            other_targets_market_share_N = \
                market_share_all_markets_byCSA[
                (market_share_all_markets_byCSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCSA['CSA Code']==CSA)
                &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-2)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m2 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m2 = 0
    
            other_targets_market_share_N = \
                market_share_all_markets_byCSA[
                (market_share_all_markets_byCSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCSA['CSA Code']==CSA)
                &(market_share_all_markets_byCSA['calendar_year']==row['sale_year']-3)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m3 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m3 = 0
    
    
            # Record data
            if IF_acquiror_active or IF_target_active or IF_other_target_active:
                CSA_affected_withdrawn = CSA_affected_withdrawn+[{
                    'CSA Code':CSA,
                    'sale_year':row['sale_year'],
                    'acquiror':row['acquiror'],
                    'target':row['target'],
                    'other_targets':other_targets,
                    'acquiror_parent':acquiror_parent,
                    'target_parent':target_parent,
                    'acquiror_market_share_N_m1':acquiror_market_share_N_m1,
                    'acquiror_market_share_N_m2':acquiror_market_share_N_m2,
                    'acquiror_market_share_N_m3':acquiror_market_share_N_m3,
                    'target_market_share_N_m1':target_market_share_N_m1,
                    'target_market_share_N_m2':target_market_share_N_m2,
                    'target_market_share_N_m3':target_market_share_N_m3,
                    'other_targets_market_share_N_m1':other_targets_market_share_N_m1,
                    'other_targets_market_share_N_m2':other_targets_market_share_N_m2,
                    'other_targets_market_share_N_m3':other_targets_market_share_N_m3,
                }]
            acquiror_market_share_N_m1 = None
            acquiror_market_share_N_m2 = None
            acquiror_market_share_N_m3 = None
            target_market_share_N_m1 = None
            target_market_share_N_m2 = None
            target_market_share_N_m3 = None
            other_targets_market_share = None
            other_targets_market_share_N_m1 = None
            other_targets_market_share_N_m2 = None
            other_targets_market_share_N_m3 = None
    
    CSA_affected_withdrawn = pd.DataFrame(CSA_affected_withdrawn)
        
    return CSA_affected_withdrawn

MA_withdrawn_dd = dd.from_pandas(MA_withdrawn, npartitions=40)
with dask.config.set(scheduler='processes',num_workers=40):
    CSA_affected_withdrawn = MA_withdrawn_dd.map_partitions(proc_list, 
    meta=pd.DataFrame(columns=
    ['CSA Code','sale_year','acquiror','target',
    'other_targets','acquiror_parent','target_parent',
    'acquiror_market_share_N_m1','acquiror_market_share_N_m2','acquiror_market_share_N_m3',
    'target_market_share_N_m1','target_market_share_N_m2','target_market_share_N_m3',
    'other_targets_market_share_N_m1','other_targets_market_share_N_m2','other_targets_market_share_N_m3',
    ])).compute()

# Average market share over past three years
CSA_affected_withdrawn['acquiror_market_share_N_avg'] = \
    (CSA_affected_withdrawn['acquiror_market_share_N_m1']+\
    CSA_affected_withdrawn['acquiror_market_share_N_m2']+\
    CSA_affected_withdrawn['acquiror_market_share_N_m3'])/3
CSA_affected_withdrawn['target_market_share_N_avg'] = \
    (CSA_affected_withdrawn['target_market_share_N_m1']+\
    CSA_affected_withdrawn['target_market_share_N_m2']+\
    CSA_affected_withdrawn['target_market_share_N_m3'])/3
CSA_affected_withdrawn['other_targets_market_share_N_avg'] = \
    (CSA_affected_withdrawn['other_targets_market_share_N_m1']+\
    CSA_affected_withdrawn['other_targets_market_share_N_m2']+\
    CSA_affected_withdrawn['other_targets_market_share_N_m3'])/3


### 4.2.2 Construct events

In [28]:
#----------------------------------------------------#
# Withdrawn episodes with Delta HHI of each treshold #
#----------------------------------------------------#

for threshold in [0.01,0.005,0.003,0.002,0.001]:

    # Identify episodes of mergers at the CSA level
    
    # Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
    # identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
    # the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
    # as in cases where two firms merge into a new one, both will get recorded in "CSA_affected"
    
    CSA_episodes_Withdrawn = []
    
    for CSA in list(CSA_affected_withdrawn['CSA Code'].unique()):
    
        CSA_affected_part = CSA_affected_withdrawn[CSA_affected_withdrawn['CSA Code']==CSA]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        
        episode_start_year = 1900
        for sale_year in CSA_affected_part['sale_year'].unique():
        
            # If this year is still within the last merger episode
            if sale_year<=episode_start_year+4:
                continue
            
            # Check intensity of M&A activities in that year and three years following
            CSA_affected_episode = CSA_affected_part[(CSA_affected_part['sale_year']>=sale_year)&(CSA_affected_part['sale_year']<=sale_year+3)]
            GPF_oneCSA_priorMA = GPF[(GPF['sale_year']>=sale_year-3)&(GPF['sale_year']<=sale_year)&(GPF['CSA Code']==CSA)]
            
            # Calculate (1) HHI (by parent firm) in the three years prior (2) Predicted HHI after the mergers complete
            
            # Underwriters in the market
            name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneCSA_priorMA[parent_name_colnames]))))
            name_GPFs = [item for item in name_GPFs if item!=None]
            name_GPFs = [item for item in name_GPFs if str(item)!='nan']
            name_GPFs = list(set(name_GPFs))
            n_deals = {}
            for item in name_GPFs:
                n_deals[item] = 0
            
            # Record market shares before merger episode
            parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
            for idx,row in GPF_oneCSA_priorMA.iterrows():
                underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
                n_underwriters = len(underwriters_onedeal)
                for item in underwriters_onedeal:
                    n_deals[item] = n_deals[item]+1/n_underwriters
            n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
            n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
            n_deals_prior = n_deals
            
            # HHI prior to merger
            hhi_piror = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
    
            # Implied HHI post merger
            CSA_affected_episode = CSA_affected_episode.reset_index(drop=True)
            for idx,row in CSA_affected_episode.iterrows():
                n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror_parent']
            n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
            hhi_predicted = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
            n_deals_post = n_deals
    
            hhi_dif = hhi_predicted-hhi_piror
    
            # Check if market share in the episode is high enough
            if hhi_dif>threshold:
                # An episode is identified
                CSA_episodes_Withdrawn = CSA_episodes_Withdrawn+[{
                    'episode_start_year':sale_year,
                    'CSA Code':CSA,
                    'mergers':CSA_affected_episode,
                    'hhi_dif':hhi_dif,
                    'n_deals_prior':n_deals_prior,
                    'n_deals_post':n_deals_post,
                    'acquiror_market_share_N_max':acquiror_market_share_N_max,
                    'target_market_share_N_max':target_market_share_N_max,
                    'other_targets_market_share_N_max':other_targets_market_share_N_max,
                    }]
                episode_start_year = sale_year
    
    CSA_episodes_Withdrawn = pd.DataFrame(CSA_episodes_Withdrawn)
    
    
    #----------------------------------------------#
    # Check and rule out if affected by actual M&A #
    #----------------------------------------------#
    
    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    CSA_episodes_Withdrawn['if_also_withinMA'] = False
    for sub_idx,sub_row in CSA_episodes_Withdrawn.iterrows():
    
        # M&As in a candidate placebo CSA in [-4,+4]
        CSA_affected_part = CSA_affected[CSA_affected['CSA Code']==sub_row['CSA Code']]
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSA_affected_part['target_market_share_N_avg']>0)|
            (CSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CSA_affected_part = CSA_affected_part[
            (CSA_affected_part['sale_year']>=sub_row['episode_start_year']-1)&
            (CSA_affected_part['sale_year']<=sub_row['episode_start_year']+5)
            ]
        CSA_affected_episode = CSA_affected_part.copy()
        CSA_affected_episode = CSA_affected_episode[(CSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CSA_affected_episode['target_market_share_N_avg']+CSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CSA_affected_episode)>0:
            CSA_episodes_Withdrawn.at[sub_idx,'if_also_withinMA'] = True
    
    CSA_episodes_Withdrawn = CSA_episodes_Withdrawn[~CSA_episodes_Withdrawn['if_also_withinMA']]

    if threshold==0.01:
        CSA_episodes_Withdrawn_DeltaHHI100 = CSA_episodes_Withdrawn
        # Number: Number of within-market CB M&As #
        n_CSA_episodes_Withdrawn_DeltaHHI100 = '{:,}'.format(len(CSA_episodes_Withdrawn_DeltaHHI100))
        with open('../Draft/nums/n_CSA_episodes_Withdrawn_DeltaHHI100.tex','w') as file:
            file.write(str(n_CSA_episodes_Withdrawn_DeltaHHI100))
    if threshold==0.005:
        CSA_episodes_Withdrawn_DeltaHHI50 = CSA_episodes_Withdrawn
        # Number: Number of within-market CB M&As #
        n_CSA_episodes_Withdrawn_DeltaHHI50 = '{:,}'.format(len(CSA_episodes_Withdrawn_DeltaHHI50))
        with open('../Draft/nums/n_CSA_episodes_Withdrawn_DeltaHHI50.tex','w') as file:
            file.write(str(n_CSA_episodes_Withdrawn_DeltaHHI50))
    if threshold==0.003:
        CSA_episodes_Withdrawn_DeltaHHI30 = CSA_episodes_Withdrawn
        # Number: Number of within-market CB M&As #
        n_CSA_episodes_Withdrawn_DeltaHHI30 = '{:,}'.format(len(CSA_episodes_Withdrawn_DeltaHHI30))
        with open('../Draft/nums/n_CSA_episodes_Withdrawn_DeltaHHI30.tex','w') as file:
            file.write(str(n_CSA_episodes_Withdrawn_DeltaHHI30))
    if threshold==0.002:
        CSA_episodes_Withdrawn_DeltaHHI20 = CSA_episodes_Withdrawn
        # Number: Number of within-market CB M&As #
        n_CSA_episodes_Withdrawn_DeltaHHI20 = '{:,}'.format(len(CSA_episodes_Withdrawn_DeltaHHI20))
        with open('../Draft/nums/n_CSA_episodes_Withdrawn_DeltaHHI20.tex','w') as file:
            file.write(str(n_CSA_episodes_Withdrawn_DeltaHHI20))
    if threshold==0.001:
        CSA_episodes_Withdrawn_DeltaHHI10 = CSA_episodes_Withdrawn
        # Number: Number of within-market CB M&As #
        n_CSA_episodes_Withdrawn_DeltaHHI10 = '{:,}'.format(len(CSA_episodes_Withdrawn_DeltaHHI10))
        with open('../Draft/nums/n_CSA_episodes_Withdrawn_DeltaHHI10.tex','w') as file:
            file.write(str(n_CSA_episodes_Withdrawn_DeltaHHI10))


# 5. Assemble a Treatment-Control Matched Sample

Notes:
- There are multiple versions of episodes definiton (by market share or HHI, cutoff on implied HHI increases, etc.). I go over each
version here


In [30]:
print('*')


*


In [112]:
episodes_files = [
    # ["By Market Share in terms of N deals",CSA_episodes_marketshare_N,1,
    #     '../CleanData/MAEvent/CSA_episodes_marketshareByN.csv',
    #     '../CleanData/MAEvent/CSA_episodes_marketshareByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_episodes_marketshareByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_episodes_marketshareByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_episodes_marketshareByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_episodes_marketshareByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_episodes_marketshareByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_episodes_marketshareByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_episodes_marketshareByN_CountyInc.csv',
    # ],
    ["By Implied HHI Increase in terms of N deals, >= 0.01",CSA_episodes_impliedHHI_N,1,
        '../CleanData/MAEvent/CSA_episodes_impliedHHIByN.csv',
        '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_bondlevel.csv',
        '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_Quant.csv',
        '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_Quant_GeneralUse.csv',
        '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_Quant_IssuerType.csv',
        '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_Quant_Bid.csv',
        '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_GovFin.csv',
        '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_IncPop.csv',
        '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_CountyInc.csv',
    ],
    # ["By Implied Top 5 Share Increase in terms of N deals, >= 0.01",CSA_episodes_top5share_N,1,
    #     '../CleanData/MAEvent/CSA_episodes_top5shareByN.csv',
    #     '../CleanData/MAEvent/CSA_episodes_top5shareByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_episodes_top5shareByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_episodes_top5shareByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_episodes_top5shareByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_episodes_top5shareByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_episodes_top5shareByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_episodes_top5shareByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_episodes_top5shareByN_CountyInc.csv',
    # ],
    # ["By Implied HHI Increase in terms of N deals, >= 0.01, two match",CSA_episodes_impliedHHI_N,2,
    #     '../CleanData/MAEvent/CSA_TwoMatch_episodes_impliedHHIByN.csv',
    #     '../CleanData/MAEvent/CSA_TwoMatch_episodes_impliedHHIByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_TwoMatch_episodes_impliedHHIByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_TwoMatch_episodes_impliedHHIByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_TwoMatch_episodes_impliedHHIByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_TwoMatch_episodes_impliedHHIByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_TwoMatch_episodes_impliedHHIByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_TwoMatch_episodes_impliedHHIByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_TwoMatch_episodes_impliedHHIByN_CountyInc.csv',
    # ],
    # ["By Implied HHI Increase in terms of N deals, >= 0.01, three match",CSA_episodes_impliedHHI_N,3,
    #     '../CleanData/MAEvent/CSA_ThreeMatch_episodes_impliedHHIByN.csv',
    #     '../CleanData/MAEvent/CSA_ThreeMatch_episodes_impliedHHIByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_ThreeMatch_episodes_impliedHHIByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_ThreeMatch_episodes_impliedHHIByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_ThreeMatch_episodes_impliedHHIByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_ThreeMatch_episodes_impliedHHIByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_ThreeMatch_episodes_impliedHHIByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_ThreeMatch_episodes_impliedHHIByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_ThreeMatch_episodes_impliedHHIByN_CountyInc.csv',
    # ],
    # ["Cross-market M&A, By Implied HHI Increase in terms of N deals, >= 0.01",CSA_episodes_impliedHHI_AcrossMarket,1,
    #     '../CleanData/MAEvent/CSA_AcrossMarket_episodes_impliedHHIByN.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_episodes_impliedHHIByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_episodes_impliedHHIByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_episodes_impliedHHIByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_episodes_impliedHHIByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_episodes_impliedHHIByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_episodes_impliedHHIByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_episodes_impliedHHIByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_episodes_impliedHHIByN_CountyInc.csv',
    # ],
    # ["Cross-market M&A, By Implied HHI Increase in terms of N deals, >= 0.01",CSA_episodes_impliedHHI_AcrossMarket_AnyShare,1,
    #     '../CleanData/MAEvent/CSA_AcrossMarket_AnyShare_episodes_impliedHHIByN.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_AnyShare_episodes_impliedHHIByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_AnyShare_episodes_impliedHHIByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_AnyShare_episodes_impliedHHIByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_AnyShare_episodes_impliedHHIByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_AnyShare_episodes_impliedHHIByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_AnyShare_episodes_impliedHHIByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_AnyShare_episodes_impliedHHIByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_AnyShare_episodes_impliedHHIByN_CountyInc.csv',
    # ],
    # ["Cross-market M&A, By Implied HHI Increase in terms of N deals, >= 0.01",CSA_episodes_impliedHHI_AcrossMarket_SamePop,1,
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_episodes_impliedHHIByN.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_episodes_impliedHHIByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_episodes_impliedHHIByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_episodes_impliedHHIByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_episodes_impliedHHIByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_episodes_impliedHHIByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_episodes_impliedHHIByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_episodes_impliedHHIByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_episodes_impliedHHIByN_CountyInc.csv',
    # ],
    # ["Cross-market M&A, By Implied HHI Increase in terms of N deals, >= 0.01",CSA_episodes_impliedHHI_AcrossMarket_SamePop_AnyShare,1,
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_AnyShare_episodes_impliedHHIByN.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_AnyShare_episodes_impliedHHIByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_AnyShare_episodes_impliedHHIByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_AnyShare_episodes_impliedHHIByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_AnyShare_episodes_impliedHHIByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_AnyShare_episodes_impliedHHIByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_AnyShare_episodes_impliedHHIByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_AnyShare_episodes_impliedHHIByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_AcrossMarket_SamePop_AnyShare_episodes_impliedHHIByN_CountyInc.csv',
    # ],
    # ["Withdrawn M&A, >= 0.01",CSA_episodes_Withdrawn_DeltaHHI100,1,
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI100.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI100_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI100_Quant.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI100_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI100_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI100_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI100_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI100_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI100_CountyInc.csv',
    # ],
    # ["Withdrawn M&A, >= 0.005",CSA_episodes_Withdrawn_DeltaHHI50,1,
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI50.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI50_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI50_Quant.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI50_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI50_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI50_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI50_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI50_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI50_CountyInc.csv',
    # ],
    # ["Withdrawn M&A, >= 0.003",CSA_episodes_Withdrawn_DeltaHHI20,1,
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI30.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI30_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI30_Quant.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI30_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI30_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI30_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI30_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI30_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI30_CountyInc.csv',
    # ],
    # ["Withdrawn M&A, >= 0.002",CSA_episodes_Withdrawn_DeltaHHI20,1,
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI20.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI20_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI20_Quant.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI20_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI20_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI20_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI20_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI20_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI20_CountyInc.csv',
    # ],
    # ["Withdrawn M&A, >= 0.001",CSA_episodes_Withdrawn_DeltaHHI10,1,
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI10.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI10_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI10_Quant.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI10_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI10_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI10_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI10_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI10_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_Withdrawn_DeltaHHI10_CountyInc.csv',
    # ],
    # ["By Implied HHI Increase in terms of N deals, >= 0.01, match on both level and dynamics of demographics",CSA_episodes_impliedHHI_N,1,
    #     '../CleanData/MAEvent/CSA_Dynamics_episodes_impliedHHIByN.csv',
    #     '../CleanData/MAEvent/CSA_Dynamics_episodes_impliedHHIByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_Dynamics_episodes_impliedHHIByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_Dynamics_episodes_impliedHHIByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_Dynamics_episodes_impliedHHIByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_Dynamics_episodes_impliedHHIByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_Dynamics_episodes_impliedHHIByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_Dynamics_episodes_impliedHHIByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_Dynamics_episodes_impliedHHIByN_CountyInc.csv',
    # ],
    # ["By Implied HHI Increase in terms of N deals, >= 0.01, match on outcome variables",CSA_episodes_impliedHHI_N,1,
    #     '../CleanData/MAEvent/CSA_Outcome_episodes_impliedHHIByN.csv',
    #     '../CleanData/MAEvent/CSA_Outcome_episodes_impliedHHIByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_Outcome_episodes_impliedHHIByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_Outcome_episodes_impliedHHIByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_Outcome_episodes_impliedHHIByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_Outcome_episodes_impliedHHIByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_Outcome_episodes_impliedHHIByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_Outcome_episodes_impliedHHIByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_Outcome_episodes_impliedHHIByN_CountyInc.csv',
    # ],
    # ["By Implied HHI Increase in terms of N deals, >= 0.01, exclude cases confounded by CB M&A based on 0.01 CB HHI",CSA_episodes_impliedHHI_N,1,
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound100.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound100_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound100_Quant.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound100_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound100_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound100_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound100_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound100_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound100_CountyInc.csv',
    # ],
    # ["By Implied HHI Increase in terms of N deals, >= 0.01, exclude cases confounded by CB M&A based on 0.005 CB HHI",CSA_episodes_impliedHHI_N,1,
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound50.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound50_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound50_Quant.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound50_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound50_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound50_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound50_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound50_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_excludeCBConfound50_CountyInc.csv',
    # ],
    # ["By Implied HHI Increase in terms of N deals, >= 0.01, use all non-treated CSAs as matches",CSA_episodes_impliedHHI_N,1000,
    #     '../CleanData/MAEvent/CSA_AllAsControl_episodes_impliedHHIByN.csv',
    #     '../CleanData/MAEvent/CSA_AllAsControl_episodes_impliedHHIByN_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_AllAsControl_episodes_impliedHHIByN_Quant.csv',
    #     '../CleanData/MAEvent/CSA_AllAsControl_episodes_impliedHHIByN_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_AllAsControl_episodes_impliedHHIByN_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_AllAsControl_episodes_impliedHHIByN_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_AllAsControl_episodes_impliedHHIByN_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_AllAsControl_episodes_impliedHHIByN_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_AllAsControl_episodes_impliedHHIByN_CountyInc.csv',
    # ],
    # ["By Implied HHI Increase in terms of N deals, >= 0.01, require control to be never treated",CSA_episodes_impliedHHI_N,1,
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_ControlNeverTreated.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_ControlNeverTreated_bondlevel.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_ControlNeverTreated_Quant.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_ControlNeverTreated_Quant_GeneralUse.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_ControlNeverTreated_Quant_IssuerType.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_ControlNeverTreated_Quant_Bid.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_ControlNeverTreated_GovFin.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_ControlNeverTreated_IncPop.csv',
    #     '../CleanData/MAEvent/CSA_episodes_impliedHHIByN_ControlNeverTreated_CountyInc.csv',
    # ],
    ]


In [113]:
# %%script false --no-raise-error

for episodes_file in episodes_files:

    criteria = episodes_file[0]
    episodes = episodes_file[1]
    N_matches = episodes_file[2]
    file_path = episodes_file[3]
    file_path_bondlevel = episodes_file[4]
    file_path_Quant = episodes_file[5]
    file_path_Quant_GeneralUse = episodes_file[6]
    file_path_Quant_IssuerType = episodes_file[7]
    file_path_Quant_Bid = episodes_file[8]
    file_path_GovFin = episodes_file[9]
    file_path_IncPop = episodes_file[10]
    file_path_CountyInc = episodes_file[11]

    episodes = episodes.copy()



    ########################################
    # Find control for each merger episode #
    ########################################
    
    # State demographics to be used in merger
    CSA_POP = pd.read_csv("../CleanData/Demographics/0C_CSA_Pop.csv")
    CSA_INC = pd.read_csv("../CleanData/Demographics/0C_CSA_Inc.csv")
    CSA_Data = CSA_POP.merge(CSA_INC,on=['CSA Code','year'])
    CSA_Data = CSA_Data[['CSA Code','year','inc','pop','inc_inc_rate','pop_inc_rate']]
    Same_State_CSA_pairs = pd.read_csv("../CleanData/Demographics/0C_Same_State_CSA_pairs.csv")

    #-------------------#
    # Baseline matching #
    #-------------------#
    
    def calculate_distance(row,weightingmat):
        return sp.spatial.distance.mahalanobis((row['inc'],row['pop']),\
            (row['treated_inc'],row['treated_pop']),weightingmat)
    
    episodes['control'] = None
    for idx,row in episodes.iterrows():
    
        # Find population of this CSA
        CSA_Data_oneyear = CSA_Data[CSA_Data['year']==row['episode_start_year']].copy()
    
        # Demographic data of the treated CSA
        CSA_Data_oneyear_frag = CSA_Data_oneyear[CSA_Data_oneyear['CSA Code']==row['CSA Code']].copy()
        if len(CSA_Data_oneyear_frag)==0:
            continue
        episode_pop = CSA_Data_oneyear_frag.reset_index()['pop'][0]
        episode_inc = CSA_Data_oneyear_frag.reset_index()['inc'][0]
        
        # Find a match
        CSA_Data_oneyear['treated_pop'] = episode_pop
        CSA_Data_oneyear['treated_inc'] = episode_inc
        # Get weighting matrix
        CSA_Data_oneyear['inc'] = winsor2(CSA_Data_oneyear['inc'],cutoffs=[0.05,0.05])
        CSA_Data_oneyear['pop'] = winsor2(CSA_Data_oneyear['pop'],cutoffs=[0.05,0.05])
        cov = CSA_Data_oneyear[['inc','pop']].cov()
        invcov = np.linalg.inv(cov)
        CSA_Data_oneyear['dist'] = CSA_Data_oneyear.apply(calculate_distance, axis=1,weightingmat=invcov)
        CSA_Data_oneyear = CSA_Data_oneyear.sort_values('dist').reset_index(drop=True)
        # Remove oneself from potential matches
        CSA_Data_oneyear = CSA_Data_oneyear[CSA_Data_oneyear['CSA Code']!=row['CSA Code']]
        # Remove other CSAs in the same state from potential matches
        Same_State_CSAs = list(Same_State_CSA_pairs[Same_State_CSA_pairs['CSA_1']==row['CSA Code']]['CSA_2'])
        CSA_Data_oneyear = CSA_Data_oneyear[~CSA_Data_oneyear['CSA Code'].isin(Same_State_CSAs)]

        # A version of sample that exclude cases confounded by CB M&A. I require both treated and control firm
        # to be not affected. Here I work on the requirement for control. Very soon, will remove all such potentially
        # confounded cases from the treated firm as well
        if "exclude cases confounded by CB M&A" in criteria:
            CSA_Data_oneyear = CSA_Data_oneyear.merge(Delta_CB_HHI,on=['CSA Code','year'])
            CSA_Data_oneyear = CSA_Data_oneyear[
                ((CSA_Data_oneyear['CB_hhi_dif']<0.01)|pd.isnull(CSA_Data_oneyear['CB_hhi_dif']))&
                ((CSA_Data_oneyear['CB_hhi_dif_m1']<0.01)|pd.isnull(CSA_Data_oneyear['CB_hhi_dif_m1']))&
                ((CSA_Data_oneyear['CB_hhi_dif_m2']<0.01)|pd.isnull(CSA_Data_oneyear['CB_hhi_dif_m2']))&
                ((CSA_Data_oneyear['CB_hhi_dif_m3']<0.01)|pd.isnull(CSA_Data_oneyear['CB_hhi_dif_m3']))&
                ((CSA_Data_oneyear['CB_hhi_dif_m4']<0.01)|pd.isnull(CSA_Data_oneyear['CB_hhi_dif_m4']))&
                ((CSA_Data_oneyear['CB_hhi_dif_p1']<0.01)|pd.isnull(CSA_Data_oneyear['CB_hhi_dif_p1']))&
                ((CSA_Data_oneyear['CB_hhi_dif_p2']<0.01)|pd.isnull(CSA_Data_oneyear['CB_hhi_dif_p2']))&
                ((CSA_Data_oneyear['CB_hhi_dif_p3']<0.01)|pd.isnull(CSA_Data_oneyear['CB_hhi_dif_p3']))&
                ((CSA_Data_oneyear['CB_hhi_dif_p4']<0.01)|pd.isnull(CSA_Data_oneyear['CB_hhi_dif_p4']))
                ]

        # A version of sample where I address the critique in Baker et al (2022) that estimates can be biased if 
        # previously treated units act as control
        if "require control to be never treated" in criteria:
            episodes_select_columns = episodes[['CSA Code','episode_start_year']]
            episodes_select_columns = episodes_select_columns[episodes_select_columns['episode_start_year']<row['episode_start_year']]
            previous_treated = list(episodes_select_columns['CSA Code'])
            CSA_Data_oneyear = CSA_Data_oneyear[~CSA_Data_oneyear['CSA Code'].isin(previous_treated)]

        match_counter = 0
        control = []
        for subidx,subrow in CSA_Data_oneyear.iterrows():
            # Years for which potential control is treated itself
            CSA_affected_frag = CSA_affected[CSA_affected['CSA Code']==subrow['CSA Code']]
            CSA_affected_frag = CSA_affected_frag[(CSA_affected_frag['acquiror_market_share_N_avg']>0.01)&
                (CSA_affected_frag['target_market_share_N_avg']+CSA_affected_frag['other_targets_market_share_N_avg']>0.01)]
            CSA_affected_frag_affected_years = list(CSA_affected_frag['sale_year'].unique())
            # Exclude those that are treated themselves as controls
            if len(set(list(range(row['episode_start_year']-4,row['episode_start_year']+5))).\
                intersection(set(CSA_affected_frag_affected_years)))>0:
                # This potential control is treated
                continue
            else:
                # This potential control is not treated => Good control
                control = control+[subrow['CSA Code']]
                match_counter = match_counter+1
                if match_counter==N_matches:
                    break
    
        episodes.at[idx,'control'] = control

    #-----------------------------------------------------------#
    # Matching on both level and dynamics of local demographics #
    #-----------------------------------------------------------#

    if "match on both level and dynamics of demographics" in criteria:
        
        def calculate_distance(row,weightingmat):
            return sp.spatial.distance.mahalanobis((row['inc'],row['pop'],row['inc_inc_rate'],row['pop_inc_rate']),\
                (row['treated_inc'],row['treated_pop'],row['treated_inc_inc_rate'],row['treated_pop_inc_rate']),weightingmat)
        
        episodes['control'] = None
        for idx,row in episodes.iterrows():

            # Income data is unavailable in recent years and is imputed
            if row['episode_start_year']>=2020:
                episode_start_year = 2020
            else:
                episode_start_year = row['episode_start_year']
            # Find population of this CSA
            CSA_Data_oneyear = CSA_Data[CSA_Data['year']==episode_start_year].copy()
        
            # Demographic data of the treated CSA
            CSA_Data_oneyear_frag = CSA_Data_oneyear[CSA_Data_oneyear['CSA Code']==row['CSA Code']].copy()
            if len(CSA_Data_oneyear_frag)==0:
                continue
            episode_pop = CSA_Data_oneyear_frag.reset_index()['pop'][0]
            episode_inc = CSA_Data_oneyear_frag.reset_index()['inc'][0]
            episode_pop_inc_rate = CSA_Data_oneyear_frag.reset_index()['pop_inc_rate'][0]
            episode_inc_inc_rate = CSA_Data_oneyear_frag.reset_index()['inc_inc_rate'][0]

            # Find a match
            CSA_Data_oneyear['treated_pop'] = episode_pop
            CSA_Data_oneyear['treated_inc'] = episode_inc
            CSA_Data_oneyear['treated_pop_inc_rate'] = episode_pop_inc_rate
            CSA_Data_oneyear['treated_inc_inc_rate'] = episode_inc_inc_rate
            # Get weighting matrix
            CSA_Data_oneyear['inc'] = winsor2(CSA_Data_oneyear['inc'],cutoffs=[0.05,0.05])
            CSA_Data_oneyear['pop'] = winsor2(CSA_Data_oneyear['pop'],cutoffs=[0.05,0.05])
            CSA_Data_oneyear['inc_inc_rate'] = winsor2(CSA_Data_oneyear['inc_inc_rate'],cutoffs=[0.05,0.05])
            CSA_Data_oneyear['pop_inc_rate'] = winsor2(CSA_Data_oneyear['pop_inc_rate'],cutoffs=[0.05,0.05])
            cov = CSA_Data_oneyear[['inc','pop','inc_inc_rate','pop_inc_rate']].cov()
            invcov = np.linalg.inv(cov)
            CSA_Data_oneyear['dist'] = CSA_Data_oneyear.apply(calculate_distance, axis=1,weightingmat=invcov)
            CSA_Data_oneyear = CSA_Data_oneyear.sort_values('dist').reset_index(drop=True)
            # Remove oneself from potential matches
            CSA_Data_oneyear = CSA_Data_oneyear[CSA_Data_oneyear['CSA Code']!=row['CSA Code']]
            # Remove other CSAs in the same state from potential matches
            Same_State_CSAs = list(Same_State_CSA_pairs[Same_State_CSA_pairs['CSA_1']==row['CSA Code']]['CSA_2'])
            CSA_Data_oneyear = CSA_Data_oneyear[~CSA_Data_oneyear['CSA Code'].isin(Same_State_CSAs)]
        
            match_counter = 0
            control = []
            for subidx,subrow in CSA_Data_oneyear.iterrows():
                # Years for which potential control is treated itself
                CSA_affected_frag = CSA_affected[CSA_affected['CSA Code']==subrow['CSA Code']]
                CSA_affected_frag = CSA_affected_frag[(CSA_affected_frag['acquiror_market_share_N_avg']>0.01)&
                    (CSA_affected_frag['target_market_share_N_avg']+CSA_affected_frag['other_targets_market_share_N_avg']>0.01)]
                CSA_affected_frag_affected_years = list(CSA_affected_frag['sale_year'].unique())
                # Exclude those that are treated themselves as controls
                if len(set(list(range(row['episode_start_year']-4,row['episode_start_year']+5))).\
                    intersection(set(CSA_affected_frag_affected_years)))>0:
                    # This potential control is treated
                    continue
                else:
                    # This potential control is not treated => Good control
                    control = control+[subrow['CSA Code']]
                    match_counter = match_counter+1
                    if match_counter==N_matches:
                        break
        
            episodes.at[idx,'control'] = control

    #-------------------------------#
    # Matching on outcome variables #
    #-------------------------------#

    if "match on outcome variables" in criteria:

        CSA_Data = CSA_Data.merge(CSACharsForMatch.rename(columns={'sale_year':'year'}),on=['CSA Code','year'],how='outer')

        def calculate_distance(row,weightingmat):
            return sp.spatial.distance.mahalanobis((row['inc'],row['pop'],row['gross_spread'],row['avg_yield']),\
                (row['treated_inc'],row['treated_pop'],row['treated_gross_spread'],row['treated_avg_yield']),weightingmat)
        
        episodes['control'] = None
        for idx,row in episodes.iterrows():

            # Income data is unavailable in recent years and is imputed
            if row['episode_start_year']>=2020:
                episode_start_year = 2020
            else:
                episode_start_year = row['episode_start_year']
            # Find population of this CSA
            CSA_Data_oneyear = CSA_Data[CSA_Data['year']==episode_start_year].copy()
        
            # Demographic data of the treated CSA
            CSA_Data_oneyear_frag = CSA_Data_oneyear[CSA_Data_oneyear['CSA Code']==row['CSA Code']].copy()
            if len(CSA_Data_oneyear_frag)==0:
                continue
            episode_pop = CSA_Data_oneyear_frag.reset_index()['pop'][0]
            episode_inc = CSA_Data_oneyear_frag.reset_index()['inc'][0]
            episode_gross_spread = CSA_Data_oneyear_frag.reset_index()['gross_spread'][0]
            episode_avg_yield = CSA_Data_oneyear_frag.reset_index()['avg_yield'][0]

            # Find a match
            CSA_Data_oneyear['treated_pop'] = episode_pop
            CSA_Data_oneyear['treated_inc'] = episode_inc
            CSA_Data_oneyear['treated_gross_spread'] = episode_gross_spread
            CSA_Data_oneyear['treated_avg_yield'] = episode_avg_yield
            # Get weighting matrix
            # If data is missing which hinders construction of weighting matrix, skip
            if np.sum(~pd.isnull(CSA_Data_oneyear['inc']))==0:
                continue
            if np.sum(~pd.isnull(CSA_Data_oneyear['pop']))==0:
                continue
            if np.sum(~pd.isnull(CSA_Data_oneyear['gross_spread']))==0:
                continue
            if np.sum(~pd.isnull(CSA_Data_oneyear['avg_yield']))==0:
                continue
            CSA_Data_oneyear['inc'] = winsor2(CSA_Data_oneyear['inc'],cutoffs=[0.05,0.05])
            CSA_Data_oneyear['pop'] = winsor2(CSA_Data_oneyear['pop'],cutoffs=[0.05,0.05])
            CSA_Data_oneyear['gross_spread'] = winsor2(CSA_Data_oneyear['gross_spread'],cutoffs=[0.05,0.05])
            CSA_Data_oneyear['avg_yield'] = winsor2(CSA_Data_oneyear['avg_yield'],cutoffs=[0.05,0.05])
            cov = CSA_Data_oneyear[['inc','pop','gross_spread','avg_yield']].cov()
            invcov = np.linalg.inv(cov)
            CSA_Data_oneyear['dist'] = CSA_Data_oneyear.apply(calculate_distance, axis=1,weightingmat=invcov)
            CSA_Data_oneyear = CSA_Data_oneyear.sort_values('dist').reset_index(drop=True)
            # Remove oneself from potential matches
            CSA_Data_oneyear = CSA_Data_oneyear[CSA_Data_oneyear['CSA Code']!=row['CSA Code']]
            # Remove other CSAs in the same state from potential matches
            Same_State_CSAs = list(Same_State_CSA_pairs[Same_State_CSA_pairs['CSA_1']==row['CSA Code']]['CSA_2'])
            CSA_Data_oneyear = CSA_Data_oneyear[~CSA_Data_oneyear['CSA Code'].isin(Same_State_CSAs)]
        
            match_counter = 0
            control = []
            for subidx,subrow in CSA_Data_oneyear.iterrows():
                # Years for which potential control is treated itself
                CSA_affected_frag = CSA_affected[CSA_affected['CSA Code']==subrow['CSA Code']]
                CSA_affected_frag = CSA_affected_frag[(CSA_affected_frag['acquiror_market_share_N_avg']>0.01)&
                    (CSA_affected_frag['target_market_share_N_avg']+CSA_affected_frag['other_targets_market_share_N_avg']>0.01)]
                CSA_affected_frag_affected_years = list(CSA_affected_frag['sale_year'].unique())
                # 
                if len(set(list(range(row['episode_start_year']-4,row['episode_start_year']+5))).\
                    intersection(set(CSA_affected_frag_affected_years)))>0:
                    # This potential control is treated
                    continue
                else:
                    # This potential control is not treated => Good control
                    control = control+[subrow['CSA Code']]
                    match_counter = match_counter+1
                    if match_counter==N_matches:
                        break
        
            episodes.at[idx,'control'] = control

    #-------------------------------------------------------------#
    # A version of sample that exclude cases confounded by CB M&A #
    #-------------------------------------------------------------#

    if "exclude cases confounded by CB M&A based on 0.01 CB HHI" in criteria:
    
        Delta_CB_HHI_renamed = Delta_CB_HHI.rename(columns={'year':'episode_start_year'})
        episodes = episodes.merge(Delta_CB_HHI_renamed,on=['CSA Code','episode_start_year'])
        episodes = episodes[
            ((episodes['CB_hhi_dif']<0.01)|pd.isnull(episodes['CB_hhi_dif']))&
            ((episodes['CB_hhi_dif_m1']<0.01)|pd.isnull(episodes['CB_hhi_dif_m1']))&
            ((episodes['CB_hhi_dif_m2']<0.01)|pd.isnull(episodes['CB_hhi_dif_m2']))&
            ((episodes['CB_hhi_dif_m3']<0.01)|pd.isnull(episodes['CB_hhi_dif_m3']))&
            ((episodes['CB_hhi_dif_m4']<0.01)|pd.isnull(episodes['CB_hhi_dif_m4']))&
            ((episodes['CB_hhi_dif_p1']<0.01)|pd.isnull(episodes['CB_hhi_dif_p1']))&
            ((episodes['CB_hhi_dif_p2']<0.01)|pd.isnull(episodes['CB_hhi_dif_p2']))&
            ((episodes['CB_hhi_dif_p3']<0.01)|pd.isnull(episodes['CB_hhi_dif_p3']))&
            ((episodes['CB_hhi_dif_p4']<0.01)|pd.isnull(episodes['CB_hhi_dif_p4']))
            ]

    if "exclude cases confounded by CB M&A based on 0.005 CB HHI" in criteria:
    
        Delta_CB_HHI_renamed = Delta_CB_HHI.rename(columns={'year':'episode_start_year'})
        episodes = episodes.merge(Delta_CB_HHI_renamed,on=['CSA Code','episode_start_year'])
        episodes = episodes[
            ((episodes['CB_hhi_dif']<0.005)|pd.isnull(episodes['CB_hhi_dif']))&
            ((episodes['CB_hhi_dif_m1']<0.005)|pd.isnull(episodes['CB_hhi_dif_m1']))&
            ((episodes['CB_hhi_dif_m2']<0.005)|pd.isnull(episodes['CB_hhi_dif_m2']))&
            ((episodes['CB_hhi_dif_m3']<0.005)|pd.isnull(episodes['CB_hhi_dif_m3']))&
            ((episodes['CB_hhi_dif_m4']<0.005)|pd.isnull(episodes['CB_hhi_dif_m4']))&
            ((episodes['CB_hhi_dif_p1']<0.005)|pd.isnull(episodes['CB_hhi_dif_p1']))&
            ((episodes['CB_hhi_dif_p2']<0.005)|pd.isnull(episodes['CB_hhi_dif_p2']))&
            ((episodes['CB_hhi_dif_p3']<0.005)|pd.isnull(episodes['CB_hhi_dif_p3']))&
            ((episodes['CB_hhi_dif_p4']<0.005)|pd.isnull(episodes['CB_hhi_dif_p4']))
            ]

    # Exclude cases where a match cannot be found
    print('A control cannot be found for '+str(np.sum(pd.isnull(episodes['control'])))+' episodes.')
    episodes = episodes[~pd.isnull(episodes['control'])]

    # Update the "episodes" files with match information
    if episodes_file[0]=="By Market Share in terms of N deals":
        CSA_episodes_marketshare_N = episodes.copy()
    if episodes_file[0]=="By Implied HHI Increase in terms of N deals, >= 0.01":
        CSA_episodes_impliedHHI_N = episodes.copy()



    #############################################
    # Expand to include an event time dimension #
    #############################################
    
    episodes_Exploded = episodes
    episodes_Exploded['year_to_merger'] = [list(range(-4,11))]*len(episodes_Exploded)
    episodes_Exploded = episodes_Exploded.explode('year_to_merger')
    episodes_Exploded['calendar_year'] = episodes_Exploded['episode_start_year']+episodes_Exploded['year_to_merger']    

    

    ################################
    # Assemble a regression sample #
    ################################

    #------------------------#
    # Issue level, using GPF #
    #------------------------#

    reg_sample = []
    for idx,row in episodes_Exploded.iterrows():

        # Event characteristics - strength
        if 'acquiror_market_share_N_avg' in episodes_Exploded.columns:
            acquiror_market_share_avg = row['acquiror_market_share_N_avg']
        else:
            acquiror_market_share_avg = None

        if 'target_market_share_N_avg' in episodes_Exploded.columns:
            target_market_share_avg = row['target_market_share_N_avg']
        else:
            target_market_share_avg = None

        if 'other_targets_market_share_N_avg' in episodes_Exploded.columns:
            other_targets_market_share_avg = row['other_targets_market_share_N_avg']
        else:
            other_targets_market_share_avg = None

        if 'hhi_dif' in episodes_Exploded.columns:
            hhi_dif = row['hhi_dif']
        else:
            hhi_dif = None

        if 'top5share_dif' in episodes_Exploded.columns:
            top5share_dif = row['top5share_dif']
        else:
            top5share_dif = None

        if 'max_sum_share' in episodes_Exploded.columns:
            max_sum_share = row['max_sum_share']
        else:
            max_sum_share = None

        if 'max_min_share' in episodes_Exploded.columns:
            max_min_share = row['max_min_share']
        else:
            max_min_share = None

        if 'mean_sum_share' in episodes_Exploded.columns:
            mean_sum_share = row['mean_sum_share']
        else:
            mean_sum_share = None

        # Event characteristics - importance for merging firms
        if 'max_acquiror_weight' in episodes_Exploded.columns:
            max_acquiror_weight = row['max_acquiror_weight']
        else:
            max_acquiror_weight = None

        if 'max_target_weight' in episodes_Exploded.columns:
            max_target_weight = row['max_target_weight']
        else:
            max_target_weight = None

        # Event characteristics - importance of neighbouring CSAs for merging firms
        if 'max_acquiror_weight_in_neighbour' in episodes_Exploded.columns:
            max_acquiror_weight_in_neighbour = row['max_acquiror_weight_in_neighbour']
        else:
            max_acquiror_weight_in_neighbour = None

        if 'max_target_weight_in_neighbour' in episodes_Exploded.columns:
            max_target_weight_in_neighbour = row['max_target_weight_in_neighbour']
        else:
            max_target_weight_in_neighbour = None

        # Event characteristics - driving reasons
        if 'reasonMA_endo_possible' in episodes_Exploded.columns:
            reasonMA_endo_possible = row['reasonMA_endo_possible']
        else:
            reasonMA_endo_possible = None

        if 'reasonMA_local_dom' in episodes_Exploded.columns:
            reasonMA_local_dom = row['reasonMA_local_dom']
        else:
            reasonMA_local_dom = None
        if 'reasonMA_expand_geo' in episodes_Exploded.columns:
            reasonMA_expand_geo = row['reasonMA_expand_geo']
        else:
            reasonMA_expand_geo = None
        if 'reasonMA_ind_dom' in episodes_Exploded.columns:
            reasonMA_ind_dom = row['reasonMA_ind_dom']
        else:
            reasonMA_ind_dom = None
        if 'reasonMA_syn_comb_lines' in episodes_Exploded.columns:
            reasonMA_syn_comb_lines = row['reasonMA_syn_comb_lines']
        else:
            reasonMA_syn_comb_lines = None
        if 'reasonMA_fin_stress' in episodes_Exploded.columns:
            reasonMA_fin_stress = row['reasonMA_fin_stress']
        else:
            reasonMA_fin_stress = None
        if 'reasonMA_syn_cost' in episodes_Exploded.columns:
            reasonMA_syn_cost = row['reasonMA_syn_cost']
        else:
            reasonMA_syn_cost = None
        if 'reasonMA_diversify' in episodes_Exploded.columns:
            reasonMA_diversify = row['reasonMA_diversify']
        else:
            reasonMA_diversify = None

        # Frequency of treatment throughout the sample period
        if 'frequency' in episodes_Exploded.columns:
            frequency = row['frequency']
        else:
            frequency = None

        # Treated observations
        GPF_Seg = GPF[(GPF['sale_year']==row['calendar_year'])&(GPF['CSA Code']==row['CSA Code'])].copy()
        GPF_Seg = GPF_Seg[[
            'CSA Code','sale_year','State','County',
            'issuer_type','Issuer',
            'avg_maturity','amount',
            'avg_yield','treasury_avg_spread','MMA_avg_spread',
            'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
            'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
            'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
            'underpricing_15to60','underpricing_15to30',
            'Bid','taxable_code','security_type','if_advisor','if_dual_advisor','if_refunding',
            'amount_bracket','mat_bracket','use_short','has_ratings',
            'use_of_proceeds_BB','use_of_proceeds_main','use_of_proceeds_general',
            'has_Moodys','has_Fitch','rating_Moodys','rating_Fitch','insured_amount',
            'AdvisorFeeRatio_hat','CRFeeRatio_hat','InsureFeeRatio_hat',
            'AdvisorFeeRatio_hat_model_timeFE','CRFeeRatio_hat_model_timeFE','InsureFeeRatio_hat_model_timeFE',
            'if_callable','CB_Eligible',
            'num_relationship','TBB_n_bidders',
            ]+name_GPF_colnames+parent_name_GPF_colnames]

        #------------------------------------#
        # Some cross-sectional heterogeneity #
        #------------------------------------#

        # Note that I am check if bank is involved in any mergers in [-4,+4], instead of if bank is involved in mergers
        mergers = CSA_affected[
            (CSA_affected['CSA Code']==row['CSA Code'])&
            (CSA_affected['sale_year']>=row['episode_start_year']-4)&
            (CSA_affected['sale_year']<=row['episode_start_year']+4)
            ][['acquiror','target','acquiror_parent','target_parent',
            'acquiror_market_share_N_avg','target_market_share_N_avg','other_targets_market_share_N_avg']]
        mergers = mergers[(mergers['acquiror_market_share_N_avg']>0)&(mergers['target_market_share_N_avg']+mergers['other_targets_market_share_N_avg']>0)]
        
        # Whether the underwriter is the target bank in M&A
        GPF_Seg['bank_is_target'] = False
        for column in name_GPF_colnames:
            GPF_Seg['bank_is_target'] = \
            (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
            |(GPF_Seg['bank_is_target'])
        for column in parent_name_GPF_colnames:
            GPF_Seg['bank_is_target'] = \
            (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
            |(GPF_Seg['bank_is_target'])
        # Whether the underwriter is the acquiror bank in M&A
        GPF_Seg['bank_is_acquiror'] = False
        for column in name_GPF_colnames:
            GPF_Seg['bank_is_acquiror'] = \
            (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
            |(GPF_Seg['bank_is_acquiror'])
        for column in parent_name_GPF_colnames:
            GPF_Seg['bank_is_acquiror'] = \
            (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
            |(GPF_Seg['bank_is_acquiror'])

        #---------------------------------------------------------------------#
        # Whether any of the merging banks is an expert in a certain subfield #
        #---------------------------------------------------------------------#

        if ("Cross-market" in criteria) or ("Withdrawn" in criteria) or ("all non-treated CSAs as matches" in criteria):
            x = 1 # Do nothing
        else:
            if_merging_banks_expert_US = {}
            if_merging_banks_expert_CSA = {}
            
            # Initialize the dictionary
            for sorting_var in ['Bid','amount_bracket','mat_bracket','use_short','has_ratings']:
                if sorting_var=='Bid':
                    categories = ['N','C','P']
                if sorting_var=='amount_bracket':
                    categories = ['small','med','large','mega']
                if sorting_var=='mat_bracket':
                    categories = ['short','med','long']
                if sorting_var=='use_short':
                    categories = ['gp','edu','util','house','health','ed','tsp','pollute']
                if sorting_var=='has_ratings':
                    categories = ['False','True']
                for category in categories:
                    for threshold in ['5','10','25','50']:
                        if_merging_banks_expert_US['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category] = False
                        if_merging_banks_expert_CSA['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category] = False
            
            # As an episode involves many banks, if in any merger both sides are experts, set indicator to 1
            mergers = row['mergers']
            for sub_idx,sub_row in mergers.iterrows():
    
                RankBankWithinCategoryUS_acquiror = pd.DataFrame()
                try:
                    RankBankWithinCategoryUS_acquiror = RankBankWithinCategoryUS_gb.get_group(sub_row['acquiror'])
                except:
                    pass
    
                RankBankWithinCategoryUS_target = pd.DataFrame()
                try:
                    RankBankWithinCategoryUS_target = RankBankWithinCategoryUS_gb.get_group(sub_row['target'])
                except:
                    pass
    
                RankBankWithinCategoryCSA_acquiror = pd.DataFrame()
                try:
                    RankBankWithinCategoryCSA_acquiror = RankBankWithinCategoryCSA_gb.get_group(sub_row['acquiror'])
                except:
                    pass
    
                RankBankWithinCategoryCSA_target = pd.DataFrame()
                try:
                    RankBankWithinCategoryCSA_target = RankBankWithinCategoryCSA_gb.get_group(sub_row['target'])
                except:
                    pass
    
                for sorting_var in ['Bid','amount_bracket','mat_bracket','use_short','has_ratings']:
    
                    if sorting_var=='Bid':
                        categories = ['N','C','P']
                    if sorting_var=='amount_bracket':
                        categories = ['small','med','large','mega']
                    if sorting_var=='mat_bracket':
                        categories = ['short','med','long']
                    if sorting_var=='use_short':
                        categories = ['gp','edu','util','house','health','ed','tsp','pollute']
                    if sorting_var=='has_ratings':
                        categories = ['False','True']
    
                    for category in categories:
                        if len(RankBankWithinCategoryUS_acquiror)>0 and len(RankBankWithinCategoryUS_target)>0:
                            for threshold in ['5','10','25','50']:
                                if \
                                if_merging_banks_expert_US['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category]:
                                    continue
                                else:
                                    if_acquiror_bank_top = \
                                        np.any(RankBankWithinCategoryUS_acquiror[ \
                                        (RankBankWithinCategoryUS_acquiror['sale_year']==sub_row['sale_year'])]\
                                        ['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category])
                                    if_target_bank_top = \
                                        np.any(RankBankWithinCategoryUS_target[
                                        (RankBankWithinCategoryUS_target['sale_year']==sub_row['sale_year'])]\
                                        ['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category])
                                    if_merging_banks_top = if_acquiror_bank_top and if_target_bank_top
                                    if_merging_banks_expert_US['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category] = \
                                        if_merging_banks_top
    
                        if len(RankBankWithinCategoryCSA_acquiror)>0 and len(RankBankWithinCategoryCSA_target)>0:
                            for threshold in ['5','10','25','50']:
                                if \
                                if_merging_banks_expert_CSA['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category]:
                                    continue
                                else:
                                    if_acquiror_bank_top = \
                                        np.any(RankBankWithinCategoryCSA_acquiror[
                                        (RankBankWithinCategoryCSA_acquiror['CSA Code']==row['CSA Code'])& \
                                        (RankBankWithinCategoryCSA_acquiror['sale_year']==sub_row['sale_year'])]\
                                        ['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category])
                                    if_target_bank_top = \
                                        np.any(RankBankWithinCategoryCSA_target[
                                        (RankBankWithinCategoryCSA_target['CSA Code']==row['CSA Code'])& \
                                        (RankBankWithinCategoryCSA_target['sale_year']==sub_row['sale_year'])]\
                                        ['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category])
                                    if_merging_banks_top = if_acquiror_bank_top and if_target_bank_top
                                    if_merging_banks_expert_CSA['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category] = \
                                        if_merging_banks_top
    
            # Whether the merging underwriter is an expert in underwriting for the specific issue, using multiple ways of
            # dividing expertise
            for sorting_var in ['Bid','amount_bracket','mat_bracket','use_short','has_ratings']:
                if sorting_var=='Bid':
                    categories = ['N','C','P']
                if sorting_var=='amount_bracket':
                    categories = ['small','med','large','mega']
                if sorting_var=='mat_bracket':
                    categories = ['short','med','long']
                if sorting_var=='use_short':
                    categories = ['gp','edu','util','house','health','ed','tsp','pollute']
                if sorting_var=='has_ratings':
                    categories = ['False','True']
                for threshold in ['5','10','25','50']:
                    GPF_Seg['if_US_expert_'+sorting_var+'_'+threshold] = False
                    for category in categories:
                        if \
                        if_merging_banks_expert_US['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category]:
                            GPF_Seg.loc[GPF_Seg[sorting_var]==category,'if_US_expert_'+sorting_var+'_'+threshold] = True
                    GPF_Seg['if_CSA_expert_'+sorting_var+'_'+threshold] = False
                    for category in categories:
                        if \
                        if_merging_banks_expert_CSA['BankAttribute_top'+threshold+'_'+sorting_var+'_'+category]:
                            GPF_Seg.loc[GPF_Seg[sorting_var]==category,'if_CSA_expert_'+sorting_var+'_'+threshold] = True

        GPF_Seg['treated'] = 1
        GPF_Seg['episode_start_year'] = row['episode_start_year']
        GPF_Seg['year_to_merger'] = row['year_to_merger']
        GPF_Seg['calendar_year'] = row['calendar_year']
        GPF_Seg['treated_csa'] = row['CSA Code'] # Used for constructing cohort X issuer FEs
        # Event characteristics - strength
        GPF_Seg['acquiror_market_share_avg'] = acquiror_market_share_avg
        GPF_Seg['target_market_share_avg'] = target_market_share_avg
        GPF_Seg['other_targets_market_share_avg'] = other_targets_market_share_avg
        GPF_Seg['hhi_dif'] = hhi_dif
        GPF_Seg['max_sum_share'] = max_sum_share
        GPF_Seg['max_min_share'] = max_min_share
        GPF_Seg['mean_sum_share'] = mean_sum_share
        GPF_Seg['top5share_dif'] = top5share_dif
        # Event characteristics - importance for merging firms
        GPF_Seg['max_acquiror_weight'] = max_acquiror_weight
        GPF_Seg['max_target_weight'] = max_target_weight
        GPF_Seg['max_acquiror_weight_in_neighbour'] = max_acquiror_weight_in_neighbour
        GPF_Seg['max_target_weight_in_neighbour'] = max_target_weight_in_neighbour
        # Event characteristics - driving reasons
        GPF_Seg['reasonMA_endo_possible'] = reasonMA_endo_possible
        GPF_Seg['reasonMA_local_dom'] = reasonMA_local_dom
        GPF_Seg['reasonMA_expand_geo'] = reasonMA_expand_geo
        GPF_Seg['reasonMA_ind_dom'] = reasonMA_ind_dom
        GPF_Seg['reasonMA_syn_comb_lines'] = reasonMA_syn_comb_lines
        GPF_Seg['reasonMA_fin_stress'] = reasonMA_fin_stress
        GPF_Seg['reasonMA_syn_cost'] = reasonMA_syn_cost
        GPF_Seg['reasonMA_diversify'] = reasonMA_diversify
        GPF_Seg['frequency'] = frequency
        GPF_Seg_Treated = GPF_Seg

        # For the sample with all non-treated as control, restrict variables to limit file size
        if "use all non-treated CSAs as matches" in criteria:
            GPF_Seg_Treated = GPF_Seg_Treated[[
                'CSA Code','sale_year','State','County',
                'issuer_type','Issuer',
                'treated','episode_start_year','year_to_merger','calendar_year','treated_csa',
                'avg_maturity','amount',
                'avg_yield','treasury_avg_spread','MMA_avg_spread',
                'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
                'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
                'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
                ]]

        # Control observations
        if row['control']==None:
            continue
        GPF_Seg_Control = pd.DataFrame()
        for item in row['control']:
            GPF_Seg = GPF[(GPF['sale_year']==row['calendar_year'])&(GPF['CSA Code']==item)]
            GPF_Seg = GPF_Seg[[
                'CSA Code','sale_year','State','County',
                'issuer_type','Issuer',
                'avg_maturity','amount',
                'avg_yield','treasury_avg_spread','MMA_avg_spread',
                'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
                'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
                'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
                'underpricing_15to60','underpricing_15to30',
                'Bid','taxable_code','security_type','if_advisor','if_dual_advisor','if_refunding',
                'amount_bracket','mat_bracket','use_short','has_ratings',
                'use_of_proceeds_BB','use_of_proceeds_main','use_of_proceeds_general',
                'has_Moodys','has_Fitch','rating_Moodys','rating_Fitch','insured_amount',
                'AdvisorFeeRatio_hat','CRFeeRatio_hat','InsureFeeRatio_hat',
                'AdvisorFeeRatio_hat_model_timeFE','CRFeeRatio_hat_model_timeFE','InsureFeeRatio_hat_model_timeFE',
                'if_callable','CB_Eligible',
                'num_relationship','TBB_n_bidders',
                ]+name_GPF_colnames+parent_name_GPF_colnames]

            if ("Cross-market" in criteria) or ("Withdrawn" in criteria)  or ("all non-treated CSAs as matches" in criteria):
                x = 1 # Do nothing
            else:
                # Note that for control banks, "bank_is_target" and "bank_is_acquiror" use M&A in the control areas
                mergers = CSA_affected[
                    (CSA_affected['CSA Code']==item)&
                    (CSA_affected['sale_year']>=row['episode_start_year']-4)&
                    (CSA_affected['sale_year']<=row['episode_start_year']+4)
                    ][['acquiror','target','acquiror_parent','target_parent',
                    'acquiror_market_share_N_avg','target_market_share_N_avg','other_targets_market_share_N_avg']]
                mergers = mergers[(mergers['acquiror_market_share_N_avg']>0)&(mergers['target_market_share_N_avg']+mergers['other_targets_market_share_N_avg']>0)]
    
            # Whether the merging underwriter is an expert in underwriting for the specific issue, using multiple ways of
            # dividing expertise. Note that I set it all to 0 for control areas
            if ("Cross-market" in criteria) or ("Withdrawn" in criteria)  or ("all non-treated CSAs as matches" in criteria):
                x = 1 # Do nothing
            else:
                for sorting_var in ['Bid','amount_bracket','mat_bracket','use_short','has_ratings']:
                    if sorting_var=='Bid':
                        categories = ['N','C','P']
                    if sorting_var=='amount_bracket':
                        categories = ['small','med','large','mega']
                    if sorting_var=='mat_bracket':
                        categories = ['short','med','long']
                    if sorting_var=='use_short':
                        categories = ['gp','edu','util','house','health','ed','tsp','pollute']
                    if sorting_var=='has_ratings':
                        categories = ['False','True']
                    for threshold in ['5','10','25','50']:
                        GPF_Seg['if_US_expert_'+sorting_var+'_'+threshold] = False
                        GPF_Seg['if_CSA_expert_'+sorting_var+'_'+threshold] = False

            if ("Cross-market" in criteria) or ("Withdrawn" in criteria)  or ("all non-treated CSAs as matches" in criteria):
                x = 1 # Do nothing
            else:
                # Whether the underwriter is the target bank in M&A
                GPF_Seg['bank_is_target'] = False
                for column in name_GPF_colnames:
                    GPF_Seg['bank_is_target'] = \
                    (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
                    |(GPF_Seg['bank_is_target'])
                for column in parent_name_GPF_colnames:
                    GPF_Seg['bank_is_target'] = \
                    (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
                    |(GPF_Seg['bank_is_target'])
                # Whether the underwriter is the acquiror bank in M&A
                GPF_Seg['bank_is_acquiror'] = False
                for column in name_GPF_colnames:
                    GPF_Seg['bank_is_acquiror'] = \
                    (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
                    |(GPF_Seg['bank_is_acquiror'])
                for column in parent_name_GPF_colnames:
                    GPF_Seg['bank_is_acquiror'] = \
                    (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
                    |(GPF_Seg['bank_is_acquiror'])
                
            GPF_Seg['treated'] = 0
            GPF_Seg['episode_start_year'] = row['episode_start_year']
            GPF_Seg['year_to_merger'] = row['year_to_merger']
            GPF_Seg['calendar_year'] = row['calendar_year']
            GPF_Seg['treated_csa'] = row['CSA Code'] # The treated CSA corresponding to this control unit
            # Event characteristics - strength
            GPF_Seg['acquiror_market_share_avg'] = acquiror_market_share_avg
            GPF_Seg['target_market_share_avg'] = target_market_share_avg
            GPF_Seg['other_targets_market_share_avg'] = other_targets_market_share_avg
            GPF_Seg['hhi_dif'] = hhi_dif
            GPF_Seg['top5share_dif'] = top5share_dif
            # Event characteristics - importance for merging firms
            GPF_Seg['max_acquiror_weight'] = max_acquiror_weight
            GPF_Seg['max_target_weight'] = max_target_weight
            GPF_Seg['max_acquiror_weight_in_neighbour'] = max_acquiror_weight_in_neighbour
            GPF_Seg['max_target_weight_in_neighbour'] = max_target_weight_in_neighbour
            # Event characteristics - driving reasons
            GPF_Seg['reasonMA_endo_possible'] = reasonMA_endo_possible
            GPF_Seg['reasonMA_local_dom'] = reasonMA_local_dom
            GPF_Seg['reasonMA_expand_geo'] = reasonMA_expand_geo
            GPF_Seg['reasonMA_ind_dom'] = reasonMA_ind_dom
            GPF_Seg['reasonMA_syn_comb_lines'] = reasonMA_syn_comb_lines
            GPF_Seg['reasonMA_fin_stress'] = reasonMA_fin_stress
            GPF_Seg['reasonMA_syn_cost'] = reasonMA_syn_cost
            GPF_Seg['reasonMA_diversify'] = reasonMA_diversify
            GPF_Seg['frequency'] = frequency

            # For the sample with all non-treated as control, restrict variables to limit file size
            if "use all non-treated CSAs as matches" in criteria:
                GPF_Seg = GPF_Seg[[
                    'CSA Code','sale_year','State','County',
                    'issuer_type','Issuer',
                    'treated','episode_start_year','year_to_merger','calendar_year','treated_csa',
                    'avg_maturity','amount',
                    'avg_yield','treasury_avg_spread','MMA_avg_spread',
                    'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
                    'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
                    'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
                    ]]

            GPF_Seg_Control = pd.concat([GPF_Seg_Control,GPF_Seg])

        if len(GPF_Seg_Treated)>0 and len(GPF_Seg_Control)>0:
            reg_sample = reg_sample+[GPF_Seg_Treated,GPF_Seg_Control]
    
    reg_sample = pd.concat(reg_sample)
    reg_sample = reg_sample.merge(HHI_byCSA,on=['CSA Code','calendar_year'])
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    reg_sample = reg_sample.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    reg_sample = reg_sample[reg_sample['_merge']!='right_only'].drop(columns=['_merge'])
    reg_sample = reg_sample.merge(CSA_INC.rename(columns={'year':'calendar_year'}),on=['CSA Code','calendar_year'],
        how='outer',indicator=True)
    reg_sample = reg_sample[reg_sample['_merge']!='right_only'].drop(columns=['_merge'])    
    reg_sample.to_csv(file_path)


    # #-----------------------#
    # # Bond level, using GPF #
    # #-----------------------#

    # if 'mergers' in reg_sample.columns:
    #     reg_sample = reg_sample.drop(columns=['mergers'])
    # if 'n_deals_prior' in reg_sample.columns:
    #     reg_sample = reg_sample.drop(columns=['n_deals_prior'])
    # if 'n_deals_post' in reg_sample.columns:
    #     reg_sample = reg_sample.drop(columns=['n_deals_post'])
    
    # def proc_list(reg_sample):
    #     reg_sample_bond_level = []
    #     for idx,row in reg_sample.iterrows():
    #         row_dict = reg_sample.loc[idx].to_dict()
    #         if str(row['yield_by_maturity_list'])!='nan':
    #             yield_by_maturity_list = eval(row['yield_by_maturity_list'])
    #             if str(row['spread_by_maturity_list'])!='nan':
    #                 spread_by_maturity_list = eval(row['spread_by_maturity_list'])
    #             else:
    #                 spread_by_maturity_list = [None for item in yield_by_maturity_list]
    #             maturity_by_maturity_list = eval(row['maturity_by_maturity_list'])
    #             amount_by_maturity_list = eval(row['amount_by_maturity_list'])
    #             for bond_idx in range(0,len(yield_by_maturity)):
    #                 row_dict['yield_one_bond'] = yield_by_maturity_list[bond_idx]
    #                 row_dict['spread_one_bond'] = spread_by_maturity_list[bond_idx]
    #                 row_dict['maturity_one_bond'] = maturity_by_maturity_list[bond_idx]
    #                 row_dict['amount_one_bond'] = amount_by_maturity_list[bond_idx]
    #                 reg_sample_bond_level = reg_sample_bond_level+[row_dict]
    #     reg_sample_bond_level = pd.DataFrame(reg_sample_bond_level)
    #     return reg_sample_bond_level

    # meta_columns = list(proc_list(reg_sample.sample(10)).columns)
    # reg_sample_dd = dd.from_pandas(reg_sample, npartitions=20)
    # with dask.config.set(scheduler='processes',num_workers=20):
    #     reg_sample_bond_level = reg_sample_dd.map_partitions(proc_list,meta=pd.DataFrame(columns=meta_columns)).compute()
    # dropped_columns =\
    #     [item for item in list(reg_sample_bond_level.columns) if item[:11]=='parent_name']+\
    #     [item for item in list(reg_sample_bond_level.columns) if item[:8]=='name_GPF']+\
    #     ['avg_yield','avg_spread','avg_maturity','maturity_by_maturity_list','amount_by_maturity_list','yield_by_maturity_list','spread_by_maturity_list']
    # reg_sample_bond_level = reg_sample_bond_level.drop(columns=dropped_columns)
    # reg_sample_bond_level.to_csv(file_path_bondlevel)
    
    #--------------------#
    # Sample of quantity #
    #--------------------#

    # Markout treated CSA for further inclusion of cohort X issuer fixed effects
    episodes_Exploded['treated_csa'] = episodes_Exploded['CSA Code']
    
    if 'mergers' in episodes_Exploded.columns:
        episodes_Exploded = episodes_Exploded.drop(columns=['mergers'])
    if 'n_deals_prior' in episodes_Exploded.columns:
        episodes_Exploded = episodes_Exploded.drop(columns=['n_deals_prior'])
    if 'n_deals_post' in episodes_Exploded.columns:
        episodes_Exploded = episodes_Exploded.drop(columns=['n_deals_post'])
    
    # Generate "episodes_Exploded_QSample", an indexing file with time to event dimension and has separate observations for both treated
    # and control CSAs
    
    episodes_Exploded_QSample = episodes_Exploded.reset_index(drop=True)
    for idx,row in episodes_Exploded_QSample.iterrows():
        if str(episodes_Exploded_QSample.at[idx,'control'])!='None' and str(episodes_Exploded_QSample.at[idx,'control'])!='nan':
            episodes_Exploded_QSample.at[idx,'num_control'] = len(row['control'])
    num_control = int(np.max(episodes_Exploded_QSample['num_control']))
    for ctrl_ind in range(0,num_control):
        episodes_Exploded_QSample['control_'+str(ctrl_ind)] = None
        for idx,row in episodes_Exploded_QSample.iterrows():
            if str(episodes_Exploded_QSample.at[idx,'control'])!='None' and \
                str(episodes_Exploded_QSample.at[idx,'control'])!='nan':
                # Add variables "control_0", "control_1", etc, for each control CSA
                if ctrl_ind<len(row['control']):
                    episodes_Exploded_QSample.at[idx,'control_'+str(ctrl_ind)] = row['control'][ctrl_ind]

    # Treated observations
    COLs_control = [item for item in episodes_Exploded_QSample.columns if item[:8]=='control_']
    episodes_Exploded_QSample_Treated = episodes_Exploded_QSample.drop(columns=COLs_control+['num_control'])
    episodes_Exploded_QSample_Treated['Treated'] = 1

    # Control observations
    episodes_Exploded_QSample_Control = pd.DataFrame()
    for ctrl_ind in range(0,num_control):
        episodes_Exploded_QSample_OneControl = episodes_Exploded_QSample.drop(columns={'CSA Code'}).\
            rename(columns={'control_'+str(ctrl_ind):'CSA Code'})
        COLs_control = [item for item in episodes_Exploded_QSample_OneControl.columns if item[:8]=='control_']
        episodes_Exploded_QSample_OneControl = episodes_Exploded_QSample_OneControl.drop(columns=COLs_control+['num_control'])
        episodes_Exploded_QSample_Control = pd.concat([episodes_Exploded_QSample_Control,episodes_Exploded_QSample_OneControl])
    episodes_Exploded_QSample_Control['Treated'] = 0
    episodes_Exploded_QSample_Control = episodes_Exploded_QSample_Control[~pd.isnull(episodes_Exploded_QSample_Control['CSA Code'])]

    # A cohort X firm x year to treatment level dataset
    episodes_Exploded_QSample = pd.concat([episodes_Exploded_QSample_Treated,episodes_Exploded_QSample_Control])

    #''''''''''''''''''#
    # Overall quantity #
    #..................#
    
    # Add the county dimension into the data, along with amount of issue
    CountyQuant = StateXCounty.merge(CBSAData[['CSA Code','State','County']],on=['State','County']).\
        rename(columns={'sale_year':'calendar_year'})
    episodes_Exploded_QSample_Overall = episodes_Exploded_QSample.merge(CountyQuant,on=['CSA Code','calendar_year'])
    
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    episodes_Exploded_QSample_Overall['calendar_year'] = episodes_Exploded_QSample_Overall['calendar_year'].astype(int)
    episodes_Exploded_QSample_Overall = episodes_Exploded_QSample_Overall.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    episodes_Exploded_QSample_Overall = episodes_Exploded_QSample_Overall[episodes_Exploded_QSample_Overall['_merge']!='right_only']
    episodes_Exploded_QSample_Overall = episodes_Exploded_QSample_Overall.merge(HHI_byCSA,on=['CSA Code','calendar_year'])
    
    episodes_Exploded_QSample_Overall.to_csv(file_path_Quant)
    
    #''''''''''''''''''''''#
    # By main use quantity #
    #......................#
    
    # Add the county dimension into the data, along with amount of issue
    CountyQuant = StateXCountyXUsageGeneral.merge(CBSAData[['CSA Code','State','County']],on=['State','County']).\
        rename(columns={'sale_year':'calendar_year'})
    episodes_Exploded_QSample_UsageGeneral = episodes_Exploded_QSample.merge(CountyQuant,on=['CSA Code','calendar_year'])
    
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    episodes_Exploded_QSample_UsageGeneral['calendar_year'] = episodes_Exploded_QSample_UsageGeneral['calendar_year'].astype(int)
    episodes_Exploded_QSample_UsageGeneral = episodes_Exploded_QSample_UsageGeneral.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    episodes_Exploded_QSample_UsageGeneral = episodes_Exploded_QSample_UsageGeneral[episodes_Exploded_QSample_UsageGeneral['_merge']!='right_only']
    episodes_Exploded_QSample_UsageGeneral = episodes_Exploded_QSample_UsageGeneral.merge(HHI_byCSA,on=['CSA Code','calendar_year'])
    
    episodes_Exploded_QSample_UsageGeneral.to_csv(file_path_Quant_GeneralUse)
    
    #'''''''''''''''''''''''''#
    # By issuer type quantity #
    #.........................#
    
    # Add the county dimension into the data, along with amount of issue
    CountyQuant = StateXCountyXIssuerType.merge(CBSAData[['CSA Code','State','County']],on=['State','County']).\
        rename(columns={'sale_year':'calendar_year'})
    episodes_Exploded_QSample_IssuerType = episodes_Exploded_QSample.merge(CountyQuant,on=['CSA Code','calendar_year'])
    
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    episodes_Exploded_QSample_IssuerType['calendar_year'] = episodes_Exploded_QSample_IssuerType['calendar_year'].astype(int)
    episodes_Exploded_QSample_IssuerType = episodes_Exploded_QSample_IssuerType.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    episodes_Exploded_QSample_IssuerType = episodes_Exploded_QSample_IssuerType[episodes_Exploded_QSample_IssuerType['_merge']!='right_only']
    episodes_Exploded_QSample_IssuerType = episodes_Exploded_QSample_IssuerType.merge(HHI_byCSA,on=['CSA Code','calendar_year'])
    
    episodes_Exploded_QSample_IssuerType.to_csv(file_path_Quant_IssuerType)

    #''''''''''''''''''''''#
    # By bid type quantity #
    #......................#
    
    # Add the county dimension into the data, along with amount of issue
    CountyQuant = StateXCountyXBid.merge(CBSAData[['CSA Code','State','County']],on=['State','County']).\
        rename(columns={'sale_year':'calendar_year'})
    episodes_Exploded_QSample_Bid = episodes_Exploded_QSample.merge(CountyQuant,on=['CSA Code','calendar_year'])
    
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    episodes_Exploded_QSample_Bid['calendar_year'] = episodes_Exploded_QSample_Bid['calendar_year'].astype(int)
    episodes_Exploded_QSample_Bid = episodes_Exploded_QSample_Bid.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    episodes_Exploded_QSample_Bid = episodes_Exploded_QSample_Bid[episodes_Exploded_QSample_Bid['_merge']!='right_only']
    episodes_Exploded_QSample_Bid = episodes_Exploded_QSample_Bid.merge(HHI_byCSA,on=['CSA Code','calendar_year'])
    
    episodes_Exploded_QSample_Bid.to_csv(file_path_Quant_Bid)

    #-------------------------------#
    # Sample of government finances #
    #-------------------------------#
    
    GovFinData = pd.read_csv('../CleanData/GovFinSurvey/0G_GovFinData.csv',low_memory=False)
    GovFinData = GovFinData.rename(columns={'Year4':'calendar_year'})
    episodes_Exploded_GovFinSample = episodes_Exploded_QSample.merge(GovFinData,on=['CSA Code','calendar_year'])
    episodes_Exploded_GovFinSample = episodes_Exploded_GovFinSample.merge(HHI_byCSA,on=['CSA Code','calendar_year'])
    episodes_Exploded_GovFinSample.to_csv(file_path_GovFin)

    #---------------------------------#
    # Sample of population and income #
    #---------------------------------#
    
    episodes_Exploded_QSample['calendar_year'] = episodes_Exploded_QSample['calendar_year'].astype(int)
    episodes_Exploded_IncPop = episodes_Exploded_QSample.merge(CSA_INC.rename(columns={'year':'calendar_year'}),
        on=['CSA Code','calendar_year'])
    episodes_Exploded_IncPop = episodes_Exploded_IncPop.merge(CSA_POP.rename(columns={'year':'calendar_year'}),
        on=['CSA Code','calendar_year'])
    episodes_Exploded_IncPop.to_csv(file_path_IncPop)

    episodes_Exploded_QSample = episodes_Exploded_QSample[['episode_start_year','CSA Code','year_to_merger','calendar_year','Treated']]
    inc_by_county_long = pd.read_csv("../CleanData/Demographics/0C_County_Inc.csv")
    inc_by_county_long = inc_by_county_long.rename(columns={'year':'calendar_year'})
    episodes_Exploded_CountyInc = episodes_Exploded_QSample.merge(inc_by_county_long,on=['CSA Code','calendar_year'])
    episodes_Exploded_CountyInc.to_csv(file_path_CountyInc)

    print('Exported regression sample for '+episodes_file[0])


A control cannot be found for 1 episodes.
Exported regression sample for By Implied HHI Increase in terms of N deals, >= 0.01


### 5.1.4 Plot treated-control matched sample

In [None]:
%%script false --no-raise-error

episodes['mergers'][20][['CSA Code','sale_year','acquiror','target','other_targets','acquiror_parent',
    'target_parent','acquiror_market_share_N_avg','target_market_share_N_avg']]

In [None]:
%%script false --no-raise-error

CSADict = CBSAData.drop_duplicates(subset=['CSA Code'])[['CSA Code','CSA Title']]
CSADict = CSADict.set_index('CSA Code')['CSA Title'].to_dict()

In [None]:
%%script false --no-raise-error

# Plot how control-treated are assembled

episodes = CSA_episodes_impliedHHI_N

# Read the US states GeoJSON file
gdf = gpd.read_file('../RawData/MSA/US-counties.geojson')
gdf = gdf[gdf['STATE']!='02']
gdf = gdf[gdf['STATE']!='15']
gdf = gdf[gdf['STATE']!='72']
gdf = gdf.rename(columns={'STATE':'FIPS State Code','COUNTY':'FIPS County Code'})

# Sometimes treated and control are adjacent: Improved by requiring treated and control to be not in the same state. Note that further
# improvement can be made

counties = pd.DataFrame()

episode_ind = 30
CSA_treated = episodes['CSA Code'][episode_ind]
CSA_control = episodes['control'][episode_ind][0]
CSA_treated1, CSA_control1 = CSA_treated, CSA_control
counties_treated = CBSAData[(CBSAData['CSA Code']==CSA_treated)][['FIPS State Code','FIPS County Code']]
counties_treated['value'] = 1
counties_control = CBSAData[(CBSAData['CSA Code']==CSA_control)][['FIPS State Code','FIPS County Code']]
counties_control['value'] = 2
counties = pd.concat([counties,counties_treated,counties_control])

episode_ind = 20
CSA_treated = episodes['CSA Code'][episode_ind]
CSA_control = episodes['control'][episode_ind][0]
CSA_treated2, CSA_control2 = CSA_treated, CSA_control
counties_treated = CBSAData[(CBSAData['CSA Code']==CSA_treated)][['FIPS State Code','FIPS County Code']]
counties_treated['value'] = 3
counties_control = CBSAData[(CBSAData['CSA Code']==CSA_control)][['FIPS State Code','FIPS County Code']]
counties_control['value'] = 4
counties = pd.concat([counties,counties_treated,counties_control])

episode_ind = 110
CSA_treated = episodes['CSA Code'][episode_ind]
CSA_control = episodes['control'][episode_ind][0]
CSA_treated3, CSA_control3 = CSA_treated, CSA_control
counties_treated = CBSAData[(CBSAData['CSA Code']==CSA_treated)][['FIPS State Code','FIPS County Code']]
counties_treated['value'] = 5
counties_control = CBSAData[(CBSAData['CSA Code']==CSA_control)][['FIPS State Code','FIPS County Code']]
counties_control['value'] = 6
counties = pd.concat([counties,counties_treated,counties_control])

counties['FIPS State Code'] = counties['FIPS State Code'].astype(int)
counties['FIPS County Code'] = counties['FIPS County Code'].astype(int)
gdf['FIPS State Code'] = gdf['FIPS State Code'].astype(int)
gdf['FIPS County Code'] = gdf['FIPS County Code'].astype(int)

fig, ax = plt.subplots(1, 1, figsize=(12, 8))

color_dict = {0:'azure', 1:'darkgreen', 2:'lime', 3:'darkblue', 4:'cornflowerblue', 5:'darkred', 6:'lightcoral'}

# Merge the GeoDataFrame with data
merged = gdf.merge(counties,on=['FIPS State Code','FIPS County Code'],how='outer')
merged.loc[pd.isnull(merged['value']),'value'] = 0
merged.plot(ax=ax, column='value', cmap=colors.ListedColormap(list(color_dict.values())), edgecolor='0.9', legend=False)
legend_labels = [
    'Treated CSA 1: '+CSADict[CSA_treated1],
    'Treated CSA 2: '+CSADict[CSA_treated2],
    'Treated CSA 3: '+CSADict[CSA_treated3],
    'Control CSA 1: '+CSADict[CSA_control1],
    'Control CSA 2: '+CSADict[CSA_control2],
    'Control CSA 3: '+CSADict[CSA_control3],
    ]
legend_handles = [
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor='darkgreen'),
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor='darkblue'),
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor='darkred'),
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor='lime'),
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor='cornflowerblue'),
    plt.Line2D([0], [0], marker='o', color='w', markersize=10, markerfacecolor='lightcoral'),
    ]
ax.legend(legend_handles, legend_labels,loc='lower center',bbox_to_anchor=(0.5, -0.15),ncol=2,fontsize='11.5')
ax.axis("off")
warnings.filterwarnings("ignore", category=DeprecationWarning)
fig.savefig('../Draft/figs/TreatedControlSample.eps', format='eps', bbox_inches='tight')

# 6. Construct a Dataset for What Predicts Consolidation

In [None]:
# Identify, for each CSA X year, what would be the implied increase in HHI based on M&As in the next three years

ImpliedDeltaHHI = []

for year in range(1970,2020):
    
    GPF_priorMA = GPF[(GPF['sale_year']<=year-1)&(GPF['sale_year']>=year-3)]
    CSAs = list(GPF_priorMA['CSA Code'].unique())
    CSAs = [item for item in CSAs if str(item)!='nan']

    for CSA in CSAs:

        GPF_oneCSA_priorMA = GPF_priorMA[GPF_priorMA['CSA Code']==CSA]

        # Calculate (1) HHI (by parent firm) in the three years prior (2) Predicted HHI after the mergers complete
        
        # Underwriters in the market
        name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneCSA_priorMA[parent_name_colnames]))))
        name_GPFs = [item for item in name_GPFs if item!=None]
        name_GPFs = [item for item in name_GPFs if str(item)!='nan']
        name_GPFs = list(set(name_GPFs))
        n_deals = {}
        for item in name_GPFs:
            n_deals[item] = 0
        
        # Record market shares before merger episode
        parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
        for idx,row in GPF_oneCSA_priorMA.iterrows():
            underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
            n_underwriters = len(underwriters_onedeal)
            for item in underwriters_onedeal:
                n_deals[item] = n_deals[item]+1/n_underwriters
        n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
        n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
        n_deals_prior = n_deals
        
        # HHI prior to merger
        hhi_piror = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)

        # Implied HHI post merger
        MA_post = MA[(MA['sale_year']>=year)&(MA['sale_year']<=year+3)]
        for idx,row in MA_post.iterrows():
            n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror']
        n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
        hhi_predicted = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
        n_deals_post = n_deals

        hhi_dif = hhi_predicted-hhi_piror

        # Also record prior issuance amount, which will be an explanatory variable
        GPF_oneCSA_priorMA = GPF_oneCSA_priorMA[~pd.isnull(GPF_oneCSA_priorMA['amount'])]
        amount = np.sum(GPF_oneCSA_priorMA['amount'])

        ImpliedDeltaHHI = ImpliedDeltaHHI+[{'CSA Code':CSA,'year':year,'hhi_dif':hhi_dif,'hhi_piror':hhi_piror,'amount':amount}]

ImpliedDeltaHHI = pd.DataFrame(ImpliedDeltaHHI)

In [None]:
# Add in explanatory variables:
# Income, income growth
# Population, population growth
# Population age
# Minority ratio
# Current degree of concentration
# Past issuance per-capita

ImpliedDeltaHHI = ImpliedDeltaHHI.merge(CSA_POP,on=['CSA Code','year'])
ImpliedDeltaHHI = ImpliedDeltaHHI.merge(CSA_INC,on=['CSA Code','year'])
ImpliedDeltaHHI.to_csv('../CleanData/MAEvent/ImpliedDeltaHHI.csv')