In [16]:
import pandas as pd
import numpy as np
import scipy as sp
import os
import dask
import dask.dataframe as dd
import itertools
from itertools import chain
from math import sqrt, floor, ceil, isnan
import multiprocess
import multiprocessing
import importlib
from importlib import reload
from collections import Counter
from fuzzywuzzy import process, fuzz
import time
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import warnings
warnings.filterwarnings("error")

pd.options.display.max_columns = 500
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = 400

# A customized winsorisation function that handles None values correctly
# The percentiles are taken and winsorisation are done on non-None values only
def winsor2(series,cutoffs):

    import numpy as np
    import scipy as sp
    
    IsNone = np.isnan(series).copy()
    IsNotNone = np.logical_not(IsNone).copy()
    series_NotNonePart = sp.stats.mstats.winsorize(series[IsNotNone],limits=(cutoffs[0],cutoffs[1]))
    series_new = series.copy()
    series_new[IsNone] = np.nan
    series_new[IsNotNone] = series_NotNonePart

    return series_new


# 1. Import data

In [17]:
# GPF
GPF = pd.read_csv("../CleanData/SDC/0A_GPF.csv",low_memory=False)
raw_name_GPF_colnames = [column for column in GPF.columns if 'raw_name_GPF_' in column]
name_GPF_colnames = [column for column in GPF.columns if column[:9]=='name_GPF_']
parent_name_GPF_colnames = [column for column in GPF.columns if 'parent_name_' in column]

# Parent relationship
GPF_names = pd.read_parquet('../CleanData/SDC/0H_GPF_Parent.parquet')

# HHI and market share of each underwriter
HHI_byCSA = pd.read_csv('../CleanData/SDC/1A_HHI_byCSA.csv')
market_share_all_markets_byCSA = pd.read_csv('../CleanData/SDC/1A_market_share_all_markets_byCSA.csv')
HHI_byCBSA = pd.read_csv('../CleanData/SDC/1A_HHI_byCBSA.csv')
market_share_all_markets_byCBSA = pd.read_csv('../CleanData/SDC/1A_market_share_all_markets_byCBSA.csv')
HHI_byState = pd.read_csv('../CleanData/SDC/1A_HHI_byState.csv')
market_share_all_markets_byState = pd.read_csv('../CleanData/SDC/1A_market_share_all_markets_byState.csv')

# Portfolio weights of CSAs within underwriter
csa_share_withinbank = pd.read_csv('../CleanData/SDC/1A_csa_share_withinbank.csv')

# All M&As
MA = pd.read_parquet('../CleanData/SDC/0B_M&A.parquet')
MA = MA.reset_index(drop=True)

# Withdrawn M&As
MA_withdrawn = pd.read_csv("../CleanData/SDC/0I_MA_withdrawn.csv")

# Quantity of issuance
StateXCountyXBid = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXBid.parquet")
StateXCountyXUsageBB = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXUsageBB.parquet")
StateXCountyXUsageGeneral = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXUsageGeneral.parquet")
StateXCountyXUsageMain = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXUsageMain.parquet")
StateXCountyXIssuerType = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXIssuerType.parquet")

StateXCounty = StateXCountyXBid.groupby(['State','County','sale_year']).agg({'amount':sum})
StateXCounty = StateXCounty.reset_index()

# Demographics
CSA_POP = pd.read_csv("../CleanData/Demographics/0C_CSA_Pop.csv")
CSA_INC = pd.read_csv("../CleanData/Demographics/0C_CSA_Inc.csv")
CBSA_POP = pd.read_csv("../CleanData/Demographics/0C_CBSA_Pop.csv")
CBSA_INC = pd.read_csv("../CleanData/Demographics/0C_CBSA_Inc.csv")
State_POP = pd.read_csv("../CleanData/Demographics/0C_State_Pop.csv")
State_INC = pd.read_csv("../CleanData/Demographics/0C_State_Inc.csv")

#-------------#
# Import CBSA #
#-------------#

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

# "CSA" is for metropolitan and "CBSA" includes also those micropolitan
CBSAData = pd.read_excel("../RawData/MSA/CBSA.xlsx",skiprows=[0,1])
CBSAData = CBSAData[~pd.isnull(CBSAData['County/County Equivalent'])]

# Add state abbreviations
us_state_to_abbrev = pd.DataFrame.from_dict(us_state_to_abbrev,orient='index').reset_index()
us_state_to_abbrev.columns = ['State Name','State']
CBSAData = CBSAData.rename(columns={'County/County Equivalent':'County'})
CBSAData = CBSAData.merge(us_state_to_abbrev,on='State Name',how='outer',indicator=True)
CBSAData = CBSAData[CBSAData['_merge']=='both'].drop(columns=['_merge'])
# Merge is perfect
CBSAData['County'] = CBSAData['County'].str.upper()
CBSAData['County'] = CBSAData['County'].str.replace(' COUNTY','')
CBSAData['County'] = CBSAData['County'].str.replace(' AND ',' & ')
CBSAData['County'] = CBSAData['County'].str.replace('.','',regex=False)
CBSAData['CSA Code'] = CBSAData['CSA Code'].astype(float)
CBSAData['CBSA Code'] = CBSAData['CBSA Code'].astype(float)


# 2. Construct Events of M&As, Using State

## 2.1 Find states affected

In [None]:
%%time

# %%script false --no-raise-error

def proc_list(MA_frag):
    
    raw_name_GPF_colnames = [column for column in GPF.columns if 'raw_name_GPF_' in column]
    name_GPF_colnames = ['name_GPF_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
    parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
    
    State_affected = []
    MA_frag = MA_frag.reset_index(drop=True)
    
    for idx,row in MA_frag.iterrows():
        
        # Find States that this merger affects
        # Determine if an underwriter is active in an State based on activity of PRIOR years
        GPF_prioryears = GPF[(GPF['sale_year']>=row['sale_year']-3)&(GPF['sale_year']<=row['sale_year']-1)]

        # Also check other targets of the acquiror in that year. This accounts for cases where post merger the new formed entity
        # is new and appear as a name that was not in the sample before. Note that here "MA_frag" cannot be used or the other firm
        # involved in the merger will be missed. Instead, use the whole sample "MA"
        other_targets = \
            list(MA[(MA['acquiror']==row['acquiror'])&
            (MA['sale_year']==row['sale_year'])&
            (MA['target']!=row['target'])]['target'])
        
        for State in list(GPF_prioryears['State'].unique()):

            GPF_prioryears_oneState = GPF_prioryears[GPF_prioryears['State']==State]

            # Underwriters in this state
            underwriters_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneState[name_GPF_colnames]))))
            underwriters_priorMA = [item for item in underwriters_priorMA if item!=None]
            underwriters_priorMA = list(set(underwriters_priorMA))
            # Parents of underwriters in this state
            parents_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneState[parent_name_colnames]))))
            parents_priorMA = [item for item in parents_priorMA if item!=None]
            parents_priorMA = list(set(parents_priorMA))
            # Subsidiaries of parents in this state (using data of PRIOR year)
            subsidiaries_priorMA = list(GPF_names[
                (GPF_names['parent_name'].isin(parents_priorMA))&
                (GPF_names['sale_year']>=row['sale_year']-3)&
                (GPF_names['sale_year']<=row['sale_year']-1)]['name_GPF'])

            # Determine if merger affects the State, and if both sides have business
            IF_acquiror_active = None
            IF_target_active = None
            IF_other_target_active = None
            if (row['acquiror'] in parents_priorMA) or (row['acquiror'] in underwriters_priorMA) or (row['acquiror'] in subsidiaries_priorMA):
                IF_acquiror_active = True
            if (row['target'] in parents_priorMA) or (row['target'] in underwriters_priorMA) or (row['target'] in subsidiaries_priorMA):
                IF_target_active = True
            for other_target in other_targets:
                if (other_target in parents_priorMA) or (other_target in underwriters_priorMA):
                    IF_other_target_active = True

            # Get market share of merged banks. Note that this is the market share in the years prior to M&A. Also note that market 
            # share "market_share_all_markets_byState" is calculated at the parent level. There are many cases where market share of a
            # firm in an area is unavailable, which is because of no presence.



            #-------------------------#
            # Market share by N deals #
            #-------------------------#

            # (1) Market share of acquiror
            # Determine parent of target, as "market_share_all_markets_byState" is at parent level
            try:
                # Situation where acquiror is a subsidiary or standalone firm whose parent is itself. Extract its parent
                acquiror_parent = GPF_names[(GPF_names['name_GPF']==row['acquiror'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                # Situation where acquiror is a parent
                acquiror_parent = row['acquiror']
            try:
                acquiror_market_share_N_m1 = \
                    market_share_all_markets_byState[
                    (market_share_all_markets_byState['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byState['State']==State)
                    &(market_share_all_markets_byState['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m1 = 0
            try:
                acquiror_market_share_N_m2 = \
                    market_share_all_markets_byState[
                    (market_share_all_markets_byState['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byState['State']==State)
                    &(market_share_all_markets_byState['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m2 = 0
            try:
                acquiror_market_share_N_m3 = \
                    market_share_all_markets_byState[
                    (market_share_all_markets_byState['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byState['State']==State)
                    &(market_share_all_markets_byState['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m3 = 0

            # (2) Market share of target
            try:
                # Note that I must use "GPF_names" (the parent-subsidiary) mapping use the year(s) prior to the MA
                target_parent = GPF_names[(GPF_names['name_GPF']==row['target'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                target_parent = row['target']
            try:
                target_market_share_N_m1 = \
                    market_share_all_markets_byState[
                    (market_share_all_markets_byState['parent_name']==target_parent)
                    &(market_share_all_markets_byState['State']==State)
                    &(market_share_all_markets_byState['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m1 = 0
            try:
                target_market_share_N_m2 = \
                    market_share_all_markets_byState[
                    (market_share_all_markets_byState['parent_name']==target_parent)
                    &(market_share_all_markets_byState['State']==State)
                    &(market_share_all_markets_byState['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m2 = 0
            try:
                target_market_share_N_m3 = \
                    market_share_all_markets_byState[
                    (market_share_all_markets_byState['parent_name']==target_parent)
                    &(market_share_all_markets_byState['State']==State)
                    &(market_share_all_markets_byState['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m3 = 0

            # (3) Market share of other targets in the same transaction
            # Account for possibility that other targets can be either a parent or a standalone firm
            other_targets_parents = \
                list(GPF_names[(GPF_names['name_GPF'].isin(other_targets))
                &(GPF_names['sale_year']==row['sale_year']-1)]['parent_name'])+\
                list(other_targets)
            other_targets_parents = list(set(other_targets_parents))

            other_targets_market_share_N = \
                market_share_all_markets_byState[
                (market_share_all_markets_byState['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byState['State']==State)
                &(market_share_all_markets_byState['calendar_year']==row['sale_year']-1)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m1 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m1 = 0

            other_targets_market_share_N = \
                market_share_all_markets_byState[
                (market_share_all_markets_byState['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byState['State']==State)
                &(market_share_all_markets_byState['calendar_year']==row['sale_year']-2)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m2 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m2 = 0

            other_targets_market_share_N = \
                market_share_all_markets_byState[
                (market_share_all_markets_byState['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byState['State']==State)
                &(market_share_all_markets_byState['calendar_year']==row['sale_year']-3)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m3 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m3 = 0



            # Record data
            if IF_acquiror_active or IF_target_active or IF_other_target_active:
                State_affected = State_affected+[{
                    'State':State,
                    'sale_year':row['sale_year'],
                    'acquiror':row['acquiror'],
                    'target':row['target'],
                    'other_targets':other_targets,
                    'acquiror_parent':acquiror_parent,
                    'target_parent':target_parent,
                    'acquiror_market_share_N_m1':acquiror_market_share_N_m1,
                    'acquiror_market_share_N_m2':acquiror_market_share_N_m2,
                    'acquiror_market_share_N_m3':acquiror_market_share_N_m3,
                    'target_market_share_N_m1':target_market_share_N_m1,
                    'target_market_share_N_m2':target_market_share_N_m2,
                    'target_market_share_N_m3':target_market_share_N_m3,
                    'other_targets_market_share_N_m1':other_targets_market_share_N_m1,
                    'other_targets_market_share_N_m2':other_targets_market_share_N_m2,
                    'other_targets_market_share_N_m3':other_targets_market_share_N_m3,
                }]
            acquiror_market_share_N_m1 = None
            acquiror_market_share_N_m2 = None
            acquiror_market_share_N_m3 = None
            target_market_share_N_m1 = None
            target_market_share_N_m2 = None
            target_market_share_N_m3 = None
            other_targets_market_share = None
            other_targets_market_share_N_m1 = None
            other_targets_market_share_N_m2 = None
            other_targets_market_share_N_m3 = None
    
    State_affected = pd.DataFrame(State_affected)
    return State_affected

MA_dd = dd.from_pandas(MA, npartitions=10)
with dask.config.set(scheduler='processes',num_workers=10):
    State_affected = MA_dd.map_partitions(proc_list, 
    meta=pd.DataFrame(columns=
    ['State','sale_year','acquiror','target',
    'other_targets','acquiror_parent','target_parent',
    'acquiror_market_share_N_m1','acquiror_market_share_N_m2','acquiror_market_share_N_m3',
    'target_market_share_N_m1','target_market_share_N_m2','target_market_share_N_m3',
    'other_targets_market_share_N_m1','other_targets_market_share_N_m2','other_targets_market_share_N_m3',
    ])).compute()

# Average market share over past three years
State_affected['acquiror_market_share_N_avg'] = \
    (State_affected['acquiror_market_share_N_m1']+\
    State_affected['acquiror_market_share_N_m2']+\
    State_affected['acquiror_market_share_N_m3'])/3
State_affected['target_market_share_N_avg'] = \
    (State_affected['target_market_share_N_m1']+\
    State_affected['target_market_share_N_m2']+\
    State_affected['target_market_share_N_m3'])/3
State_affected['other_targets_market_share_N_avg'] = \
    (State_affected['other_targets_market_share_N_m1']+\
    State_affected['other_targets_market_share_N_m2']+\
    State_affected['other_targets_market_share_N_m3'])/3

# As this step takes significant time, export output
State_affected.to_parquet('../CleanData/MAEvent/1B_State_affected.parquet')

In [None]:
State_affected = pd.read_parquet('../CleanData/MAEvent/1B_State_affected.parquet')

## 2.2 Identify merger episodes

### 2.2.1 Method 1: By market share

In [None]:
#----------------------------#
# Market share by N of deals #
#----------------------------#

# Identify episodes of mergers at the State level

# Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
# identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
# the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
# as in cases where two firms merge into a new one, both will get recorded in "State_affected"

parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]

State_episodes_marketshare_N = []

for State in list(State_affected['State'].unique()):

    State_affected_part = State_affected[State_affected['State']==State]
    State_affected_part = State_affected_part[
        (State_affected_part['acquiror_market_share_N_avg']>0)&
        ((State_affected_part['target_market_share_N_avg']>0)|
        (State_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
    
    episode_start_year = 1900
    years = State_affected_part['sale_year'].unique()
    years = sorted(years)
    for sale_year in years:
    
        # If this year is still within the last merger episode
        if sale_year<=episode_start_year+4:
            continue
        
        # Check intensity of M&A activities in that year and three years following
        State_affected_episode = State_affected_part[(State_affected_part['sale_year']>=sale_year)&(State_affected_part['sale_year']<=sale_year+3)]
        # When a firm acquires multiple firms, market share of other firms are in "other_targets_market_share_N_avg", so just keeping one record
        # is sufficient
        State_affected_episode = State_affected_episode.drop_duplicates(['acquiror','sale_year'])
        # Alternative aggregation methods might be more reasonable. Also, this does not account for that target tends to be smaller so threshold
        # for them should be smaller too. Even better, can compute the implied-HHI change (based on historical data) of this merger, and put threshold
        # on that, which is definitely more powerful.
        acquiror_market_share_N_avg = np.sum(State_affected_episode['acquiror_market_share_N_avg'])
        target_market_share_N_avg = np.sum(State_affected_episode['target_market_share_N_avg'])
        other_targets_market_share_N_avg = np.sum(State_affected_episode['other_targets_market_share_N_avg'])

        # Check if market share in the episode is high enough
        if acquiror_market_share_N_avg>0.05 and target_market_share_N_avg+other_targets_market_share_N_avg>0.05:
            # An episode is identified
            State_episodes_marketshare_N = State_episodes_marketshare_N+[{
                'episode_start_year':sale_year,
                'State':State,
                'mergers':State_affected_episode,
                'acquiror_market_share_N_avg':acquiror_market_share_N_avg,
                'target_market_share_N_avg':target_market_share_N_avg,
                'other_targets_market_share_N_avg':other_targets_market_share_N_avg,
                }]
            episode_start_year = sale_year

State_episodes_marketshare_N = pd.DataFrame(State_episodes_marketshare_N)


### 2.2.2 Method 2: By implied rise in HHI due to merger

In [None]:
#-----------------------------#
# Change in HHI by N of deals #
#-----------------------------#

# Identify episodes of mergers at the State level

# Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
# identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
# the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
# as in cases where two firms merge into a new one, both will get recorded in "State_affected"

State_episodes_impliedHHI_N = []

for State in list(State_affected['State'].unique()):

    State_affected_part = State_affected[State_affected['State']==State]
    State_affected_part = State_affected_part[
        (State_affected_part['acquiror_market_share_N_avg']>0)&
        ((State_affected_part['target_market_share_N_avg']>0)|
        (State_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
    
    episode_start_year = 1900
    for sale_year in State_affected_part['sale_year'].unique():
    
        # If this year is still within the last merger episode
        if sale_year<=episode_start_year+4:
            continue
        
        # Check intensity of M&A activities in that year and three years following
        State_affected_episode = State_affected_part[(State_affected_part['sale_year']>=sale_year)&(State_affected_part['sale_year']<=sale_year+3)]
        GPF_oneState_priorMA = GPF[(GPF['sale_year']>=sale_year-3)&(GPF['sale_year']<=sale_year)&(GPF['State']==State)]
        
        # Calculate (1) HHI (by parent firm) in the three years prior (2) Predicted HHI after the mergers complete
        
        # Underwriters in the market
        name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneState_priorMA[parent_name_colnames]))))
        name_GPFs = [item for item in name_GPFs if item!=None]
        name_GPFs = [item for item in name_GPFs if str(item)!='nan']
        name_GPFs = list(set(name_GPFs))
        n_deals = {}
        for item in name_GPFs:
            n_deals[item] = 0
        
        # Record market shares before merger episode
        parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
        for idx,row in GPF_oneState_priorMA.iterrows():
            underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
            n_underwriters = len(underwriters_onedeal)
            for item in underwriters_onedeal:
                n_deals[item] = n_deals[item]+1/n_underwriters
        n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
        n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
        n_deals_prior = n_deals
        
        # HHI prior to merger
        hhi_piror = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)

        # Implied HHI post merger
        State_affected_episode = State_affected_episode.reset_index(drop=True)
        for idx,row in State_affected_episode.iterrows():
            n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror_parent']
        n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
        hhi_predicted = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
        n_deals_post = n_deals

        hhi_dif = hhi_predicted-hhi_piror

        # Check if market share in the episode is high enough
        if hhi_dif>0.01:
            # An episode is identified
            State_episodes_impliedHHI_N = State_episodes_impliedHHI_N+[{
                'episode_start_year':sale_year,
                'State':State,
                'mergers':State_affected_episode,
                'hhi_dif':hhi_dif,
                'n_deals_prior':n_deals_prior,
                'n_deals_post':n_deals_post,
                }]
            episode_start_year = sale_year

State_episodes_impliedHHI_N = pd.DataFrame(State_episodes_impliedHHI_N)


In [None]:
len(State_episodes_impliedHHI_N)

### 2.2.3 Method 3: By implied rise in top 5 share due to merger

In [None]:
#-------------------------------------#
# Change in top 5 share by N of deals #
#-------------------------------------#

# Identify episodes of mergers at the State level

# Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
# identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
# the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
# as in cases where two firms merge into a new one, both will get recorded in "State_affected"

State_episodes_top5share_N = []

for State in list(State_affected['State'].unique()):

    State_affected_part = State_affected[State_affected['State']==State]
    State_affected_part = State_affected_part[
        (State_affected_part['acquiror_market_share_N_avg']>0)&
        ((State_affected_part['target_market_share_N_avg']>0)|
        (State_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
    
    episode_start_year = 1900
    for sale_year in State_affected_part['sale_year'].unique():
    
        # If this year is still within the last merger episode
        if sale_year<=episode_start_year+4:
            continue
        
        # Check intensity of M&A activities in that year and three years following
        State_affected_episode = State_affected_part[(State_affected_part['sale_year']>=sale_year)&(State_affected_part['sale_year']<=sale_year+3)]
        GPF_oneState_priorMA = GPF[(GPF['sale_year']>=sale_year-3)&(GPF['sale_year']<=sale_year)&(GPF['State']==State)]
        
        # Calculate (1) Top 5 share (by parent firm) in the three years prior (2) Predicted top 5 share after the mergers complete
        
        # Underwriters in the market
        name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneState_priorMA[parent_name_colnames]))))
        name_GPFs = [item for item in name_GPFs if item!=None]
        name_GPFs = [item for item in name_GPFs if str(item)!='nan']
        name_GPFs = list(set(name_GPFs))
        n_deals = {}
        for item in name_GPFs:
            n_deals[item] = 0
        
        # Record market shares before merger episode
        parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
        for idx,row in GPF_oneState_priorMA.iterrows():
            underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
            n_underwriters = len(underwriters_onedeal)
            for item in underwriters_onedeal:
                n_deals[item] = n_deals[item]+1/n_underwriters
        n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
        n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
        n_deals_prior = n_deals
        
        # Top 5 share prior to merger
        n_deals['marketshare'] = n_deals['n_deals']/np.sum(n_deals['n_deals'])
        n_deals = n_deals.sort_values(by=['n_deals'],ascending=False).reset_index(drop=True)
        if len(n_deals)<=5:
            top5share_prior = 1
        else:
            top5share_prior = np.sum(n_deals['marketshare'][:5])

        # Implied top 5 share post merger
        State_affected_episode = State_affected_episode.reset_index(drop=True)
        for idx,row in State_affected_episode.iterrows():
            n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror_parent']
        n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
        n_deals['marketshare'] = n_deals['n_deals']/np.sum(n_deals['n_deals'])
        n_deals = n_deals.sort_values(by=['n_deals'],ascending=False).reset_index(drop=True)
        if len(n_deals)<=5:
            top5share_post = 1
        else:
            top5share_post = np.sum(n_deals['marketshare'][:5])
        n_deals_post = n_deals

        top5share_dif = top5share_post-top5share_prior

        # Market shares used in summary statistics
        State_affected_episode['min_share'] = np.minimum(State_affected_episode['acquiror_market_share_N_avg'],
            State_affected_episode['target_market_share_N_avg']+\
            State_affected_episode['other_targets_market_share_N_avg'])
        State_affected_episode = State_affected_episode.sort_values('min_share')
        State_affected_episode_topshare = State_affected_episode[-1:]
        acquiror_market_share_N_max = np.max(State_affected_episode_topshare['acquiror_market_share_N_avg'])
        target_market_share_N_max = np.max(State_affected_episode_topshare['target_market_share_N_avg'])
        other_targets_market_share_N_max = np.max(State_affected_episode_topshare['other_targets_market_share_N_avg'])

        # Check if market share in the episode is high enough
        if top5share_dif>0.05:
            # An episode is identified
            State_episodes_top5share_N = State_episodes_top5share_N+[{
                'episode_start_year':sale_year,
                'State':State,
                'mergers':State_affected_episode,
                'top5share_dif':top5share_dif,
                'n_deals_prior':n_deals_prior,
                'n_deals_post':n_deals_post,
                'acquiror_market_share_N_max':acquiror_market_share_N_max,
                'target_market_share_N_max':target_market_share_N_max,
                'other_targets_market_share_N_max':other_targets_market_share_N_max,
                }]
            episode_start_year = sale_year

State_episodes_top5share_N = pd.DataFrame(State_episodes_top5share_N)


In [None]:
len(State_episodes_top5share_N)

## 2.3 Assemble a Treatment-Control Matched Sample

In [None]:
episodes_files = [
    ["By Market Share in terms of N deals",State_episodes_marketshare_N,1,
        '../CleanData/MAEvent/State_episodes_marketshareByN.csv',
    ],
    ["By Implied HHI Increase in terms of N deals, >= 0.01",State_episodes_impliedHHI_N,1,
        '../CleanData/MAEvent/State_episodes_impliedHHIByN.csv',
    ],
    ["By Implied Top 5 Share Increase in terms of N deals, >= 0.01",State_episodes_top5share_N,1,
        '../CleanData/MAEvent/State_episodes_top5shareByN.csv',
    ]
    ]

for episodes_file in episodes_files:

    criteria = episodes_file[0]
    episodes = episodes_file[1]
    N_matches = episodes_file[2]
    file_path = episodes_file[3]

    episodes = episodes.copy()
    
    ########################################
    # Find control for each merger episode #
    ########################################
    
    # State demographics to be used in merger
    State_POP = pd.read_csv("../CleanData/Demographics/0C_State_Pop.csv")
    State_INC = pd.read_csv("../CleanData/Demographics/0C_State_Inc.csv")
    State_Data = State_POP.merge(State_INC,on=['State','year'])
    State_Data = State_Data[['State','year','inc','pop']]
    
    def calculate_distance(row,weightingmat):
        return sp.spatial.distance.mahalanobis((row['inc'],row['pop']),\
            (row['treated_inc'],row['treated_pop']),weightingmat)
    
    episodes['control'] = None
    for idx,row in episodes.iterrows():
    
        # Find population of this state
        State_Data_oneyear = State_Data[State_Data['year']==row['episode_start_year']].copy()
    
        # Demographic data of the treated state
        State_Data_oneyear_frag = State_Data_oneyear[State_Data_oneyear['State']==row['State']].copy()
        if len(State_Data_oneyear_frag)==0:
            continue
        episode_pop = State_Data_oneyear_frag.reset_index()['pop'][0]
        episode_inc = State_Data_oneyear_frag.reset_index()['inc'][0]
        
        # Find a match
        State_Data_oneyear['treated_pop'] = episode_pop
        State_Data_oneyear['treated_inc'] = episode_inc
        # Get weighting matrix
        State_Data_oneyear['inc'] = winsor2(State_Data_oneyear['inc'],cutoffs=[0.05,0.05])
        State_Data_oneyear['pop'] = winsor2(State_Data_oneyear['pop'],cutoffs=[0.05,0.05])
        cov = State_Data_oneyear[['inc','pop']].cov()
        invcov = np.linalg.inv(cov)
        State_Data_oneyear['dist'] = State_Data_oneyear.apply(calculate_distance, axis=1,weightingmat=invcov)
        State_Data_oneyear = State_Data_oneyear.sort_values('dist').reset_index(drop=True)
        # Remove oneself from potential matches
        State_Data_oneyear = State_Data_oneyear[State_Data_oneyear['State']!=row['State']]
    
        match_counter = 0
        control = []
        for subidx,subrow in State_Data_oneyear.iterrows():
            # Years for which potential control is treated itself
            State_affected_frag = State_affected[State_affected['State']==subrow['State']]
            State_affected_frag = State_affected_frag[(State_affected_frag['acquiror_market_share_N_avg']>0.01)&
                (State_affected_frag['target_market_share_N_avg']+State_affected_frag['other_targets_market_share_N_avg']>0.01)]
            State_affected_frag_affected_years = list(State_affected_frag['sale_year'].unique())
            # 
            if len(set(list(range(row['episode_start_year']-4,row['episode_start_year']+5))).\
                intersection(set(State_affected_frag_affected_years)))>0:
                # This potential control is treated
                continue
            else:
                # This potential control is not treated => Good control
                control = control+[subrow['State']]
                match_counter = match_counter+1
                if match_counter==N_matches:
                    break
    
        episodes.at[idx,'control'] = control
    
    # Exclude cases where a match cannot be found
    print('A control cannot be found for '+str(np.sum(pd.isnull(episodes['control'])))+' episodes.')
    episodes = episodes[~pd.isnull(episodes['control'])]

    
    #############################################
    # Expand to include an event time dimension #
    #############################################
    
    episodes_Exploded = episodes
    episodes_Exploded['year_to_merger'] = [list(range(-4,11))]*len(episodes_Exploded)
    episodes_Exploded = episodes_Exploded.explode('year_to_merger')
    episodes_Exploded['calendar_year'] = episodes_Exploded['episode_start_year']+episodes_Exploded['year_to_merger']    

    
    ################################
    # Assemble a regression sample #
    ################################

    #------------------------#
    # Issue level, using GPF #
    #------------------------#

    reg_sample = []
    for idx,row in episodes_Exploded.iterrows():

        # Event characteristics - strength
        if 'acquiror_market_share_N_avg' in episodes_Exploded.columns:
            acquiror_market_share_avg = row['acquiror_market_share_N_avg']
        else:
            acquiror_market_share_avg = None

        if 'target_market_share_N_avg' in episodes_Exploded.columns:
            target_market_share_avg = row['target_market_share_N_avg']
        else:
            target_market_share_avg = None

        if 'other_targets_market_share_N_avg' in episodes_Exploded.columns:
            other_targets_market_share_avg = row['other_targets_market_share_N_avg']
        else:
            other_targets_market_share_avg = None

        if 'hhi_dif' in episodes_Exploded.columns:
            hhi_dif = row['hhi_dif']
        else:
            hhi_dif = None

        if 'max_sum_share' in episodes_Exploded.columns:
            max_sum_share = row['max_sum_share']
        else:
            max_sum_share = None

        if 'max_min_share' in episodes_Exploded.columns:
            max_min_share = row['max_min_share']
        else:
            max_min_share = None

        if 'mean_sum_share' in episodes_Exploded.columns:
            mean_sum_share = row['mean_sum_share']
        else:
            mean_sum_share = None
    
        # Treated observations
        GPF_Seg = GPF[(GPF['sale_year']==row['calendar_year'])&(GPF['State']==row['State'])].copy()
        GPF_Seg = GPF_Seg[[
            'CBSA Code','sale_year','State','County',
            'issuer_type','Issuer',
            'avg_maturity','amount',
            'avg_yield','treasury_avg_spread','MMA_avg_spread',
            'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
            'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
            'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
            'underpricing_15to60','underpricing_15to30',
            'Bid','taxable_code','security_type','if_advisor','if_dual_advisor','if_refunding',
            'amount_bracket','mat_bracket','use_short','has_ratings',
            'use_of_proceeds_BB','use_of_proceeds_main','use_of_proceeds_general',
            'has_Moodys','has_Fitch','rating_Moodys','rating_Fitch','insured_amount',
            'AdvisorFeeRatio_hat','CRFeeRatio_hat','InsureFeeRatio_hat',
            'AdvisorFeeRatio_hat_model_timeFE','CRFeeRatio_hat_model_timeFE','InsureFeeRatio_hat_model_timeFE',
            'if_callable','CB_Eligible',
            'num_relationship',
            ]+name_GPF_colnames+parent_name_GPF_colnames]

        #------------------------------------#
        # Some cross-sectional heterogeneity #
        #------------------------------------#

        GPF_Seg['treated'] = 1
        GPF_Seg['episode_start_year'] = row['episode_start_year']
        GPF_Seg['year_to_merger'] = row['year_to_merger']
        GPF_Seg['calendar_year'] = row['calendar_year']
        GPF_Seg['treated_state'] = row['State'] # Used for constructing cohort X issuer FEs
        GPF_Seg['acquiror_market_share_avg'] = acquiror_market_share_avg
        GPF_Seg['target_market_share_avg'] = target_market_share_avg
        GPF_Seg['other_targets_market_share_avg'] = other_targets_market_share_avg
        GPF_Seg['hhi_dif'] = hhi_dif
        GPF_Seg['max_sum_share'] = max_sum_share
        GPF_Seg['max_min_share'] = max_min_share
        GPF_Seg['mean_sum_share'] = mean_sum_share
        GPF_Seg_Treated = GPF_Seg

        # Control observations
        if row['control']==None:
            continue
        GPF_Seg_Control = pd.DataFrame()
        for item in row['control']:
            GPF_Seg = GPF[(GPF['sale_year']==row['calendar_year'])&(GPF['State']==item)]
            GPF_Seg = GPF_Seg[[
                'CBSA Code','sale_year','State','County',
                'issuer_type','Issuer',
                'avg_maturity','amount',
                'avg_yield','treasury_avg_spread','MMA_avg_spread',
                'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
                'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
                'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
                'underpricing_15to60','underpricing_15to30',
                'Bid','taxable_code','security_type','if_advisor','if_dual_advisor','if_refunding',
                'amount_bracket','mat_bracket','use_short','has_ratings',
                'use_of_proceeds_BB','use_of_proceeds_main','use_of_proceeds_general',
                'has_Moodys','has_Fitch','rating_Moodys','rating_Fitch','insured_amount',
                'AdvisorFeeRatio_hat','CRFeeRatio_hat','InsureFeeRatio_hat',
                'AdvisorFeeRatio_hat_model_timeFE','CRFeeRatio_hat_model_timeFE','InsureFeeRatio_hat_model_timeFE',
                'if_callable','CB_Eligible',
                'num_relationship',
                ]+name_GPF_colnames+parent_name_GPF_colnames]
            
            GPF_Seg['treated'] = 0
            GPF_Seg['episode_start_year'] = row['episode_start_year']
            GPF_Seg['year_to_merger'] = row['year_to_merger']
            GPF_Seg['calendar_year'] = row['calendar_year']
            GPF_Seg['treated_state'] = row['State'] # Used for constructing cohort X issuer FEs
            GPF_Seg['hhi_dif'] = hhi_dif
            GPF_Seg_Control = pd.concat([GPF_Seg_Control,GPF_Seg])
    
        if len(GPF_Seg_Treated)>0 and len(GPF_Seg_Control)>0:
            reg_sample = reg_sample+[GPF_Seg_Treated,GPF_Seg_Control]
    
    reg_sample = pd.concat(reg_sample)
    reg_sample = reg_sample.merge(HHI_byState,on=['State','calendar_year'])
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    reg_sample = reg_sample.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    reg_sample = reg_sample[reg_sample['_merge']!='right_only'].drop(columns=['_merge'])
    reg_sample.to_csv(file_path)

    print('Exported regression sample for '+episodes_file[0])



Exported regression sample for Withdrawn M&A, >= 0.002


# 3. Construct Events of M&As, Using CSA and Divide Based on Issue Chars

In [18]:
BondChars = ['amount_bracket','mat_bracket','use_short','has_ratings','Bid']

## 3.1. Find CSA X Year X Bond Characteristic Group affected by merger

Notes:
- Go over each merger. Check the CSAs affected by the merger (i.e., either side has business in the CSA in the year prior to the merger). Check if the merger affects just one underwriter or affects multiple underwriters in this CSA.
- Note that for the column "market share of other targets", the optimal object to put there is the market share of the other target alone. Here I am instead putting in market share of the other target's parent. This should make a minimal difference.

In [19]:
# %%time

# %%script false --no-raise-error

# Affected finer markets by each bond characteristics
CSAXBondChar_affected_AllBondChars = {}

raw_name_GPF_colnames = [column for column in GPF.columns if 'raw_name_GPF_' in column]
name_GPF_colnames = ['name_GPF_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]

for BondChar in BondChars:

    start_time = time.time()

    #-----------------#
    # Define function #
    #-----------------#

    def proc_list(MA_frag):
            
        CSAXBondChar_affected = []
        MA_frag = MA_frag.reset_index(drop=True)
        
        market_share_all_markets_byCSAXBondChar = pd.read_csv('../CleanData/SDC/1A_market_share_all_markets_byCSA'+'X'+BondChar+'.csv')
        market_share_all_markets_byCSAXBondChar_gb = \
            market_share_all_markets_byCSAXBondChar.groupby(['parent_name','CSA Code',BondChar,'calendar_year'])

        for idx,row in MA_frag.iterrows():
            
            # Find CSAs that this merger affects
            # Determine if an underwriter is active in an CSA based on activity of PRIOR years
            GPF_prioryears = GPF[(GPF['sale_year']>=row['sale_year']-3)&(GPF['sale_year']<=row['sale_year']-1)]

            # All markets, defined using both geographic unit and bond characteristics
            markets = []
            for itemA in list(GPF_prioryears['CSA Code'].unique()):
                for itemB in list(GPF_prioryears[BondChar].unique()):
                    markets = markets+[(itemA,itemB)]

            # Also check other targets of the acquiror in that year. This accounts for cases where post merger the new formed entity
            # is new and appear as a name that was not in the sample before. Note that here "MA_frag" cannot be used or the other firm
            # involved in the merger will be missed. Instead, use the whole sample "MA"
            other_targets = \
                list(MA[(MA['acquiror']==row['acquiror'])&
                (MA['sale_year']==row['sale_year'])&
                (MA['target']!=row['target'])]['target'])
            
            for market in markets:
    
                GPF_prioryears_oneCSAXBondChar = GPF_prioryears[(GPF_prioryears['CSA Code']==market[0])&(GPF_prioryears[BondChar]==market[1])]
    
                # Underwriters in this state
                underwriters_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCSAXBondChar[name_GPF_colnames]))))
                underwriters_priorMA = [item for item in underwriters_priorMA if item!=None]
                underwriters_priorMA = list(set(underwriters_priorMA))
                # Parents of underwriters in this state
                parents_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCSAXBondChar[parent_name_colnames]))))
                parents_priorMA = [item for item in parents_priorMA if item!=None]
                parents_priorMA = list(set(parents_priorMA))
                # Subsidiaries of parents in this state (using data of PRIOR year)
                subsidiaries_priorMA = list(GPF_names[
                    (GPF_names['parent_name'].isin(parents_priorMA))&
                    (GPF_names['sale_year']>=row['sale_year']-3)&
                    (GPF_names['sale_year']<=row['sale_year']-1)]['name_GPF'])
    
                # Determine if merger affects the CSA, and if both sides have business
                IF_acquiror_active = None
                IF_target_active = None
                IF_other_target_active = None
                if (row['acquiror'] in parents_priorMA) or (row['acquiror'] in underwriters_priorMA) or (row['acquiror'] in subsidiaries_priorMA):
                    IF_acquiror_active = True
                if (row['target'] in parents_priorMA) or (row['target'] in underwriters_priorMA) or (row['target'] in subsidiaries_priorMA):
                    IF_target_active = True
                for other_target in other_targets:
                    if (other_target in parents_priorMA) or (other_target in underwriters_priorMA):
                        IF_other_target_active = True
    
                # Get market share of merged banks. Note that this is the market share in the years prior to M&A. Also note that market 
                # share "market_share_all_markets_byCSA" is calculated at the parent level. There are many cases where market share of a
                # firm in an area is unavailable, which is because of no presence.
    
    
    
                #-------------------------#
                # Market share by N deals #
                #-------------------------#
    
                # (1) Market share of acquiror
                # Determine parent of target, as "market_share_all_markets_byCSA" is at parent level
                try:
                    # Situation where acquiror is a subsidiary or standalone firm whose parent is itself. Extract its parent
                    acquiror_parent = GPF_names[(GPF_names['name_GPF']==row['acquiror'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                        .reset_index()['parent_name'][0]
                except:
                    # Situation where acquiror is a parent
                    acquiror_parent = row['acquiror']
                try:
                    acquiror_market_share_N_m1 = market_share_all_markets_byCSAXBondChar_gb.get_group(
                        (acquiror_parent,market[0],market[1],row['sale_year']-1))\
                        .reset_index()['market_share_N'][0]
                except:
                    acquiror_market_share_N_m1 = 0
                try:
                    acquiror_market_share_N_m2 = market_share_all_markets_byCSAXBondChar_gb.get_group(
                        (acquiror_parent,market[0],market[1],row['sale_year']-2))\
                        .reset_index()['market_share_N'][0]
                except:
                    acquiror_market_share_N_m2 = 0
                try:
                    acquiror_market_share_N_m3 = market_share_all_markets_byCSAXBondChar_gb.get_group(
                        (acquiror_parent,market[0],market[1],row['sale_year']-3))\
                        .reset_index()['market_share_N'][0]
                except:
                    acquiror_market_share_N_m3 = 0
    
                # (2) Market share of target
                try:
                    # Note that I must use "GPF_names" (the parent-subsidiary) mapping use the year(s) prior to the MA
                    target_parent = GPF_names[(GPF_names['name_GPF']==row['target'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                        .reset_index()['parent_name'][0]
                except:
                    target_parent = row['target']
                try:
                    target_market_share_N_m1 = market_share_all_markets_byCSAXBondChar_gb.get_group(
                        (target_parent,market[0],market[1],row['sale_year']-1))\
                        .reset_index()['market_share_N'][0]
                except:
                    target_market_share_N_m1 = 0
                try:
                    target_market_share_N_m2 = market_share_all_markets_byCSAXBondChar_gb.get_group(
                        (target_parent,market[0],market[1],row['sale_year']-2))\
                        .reset_index()['market_share_N'][0]
                except:
                    target_market_share_N_m2 = 0
                try:
                    target_market_share_N_m3 = market_share_all_markets_byCSAXBondChar_gb.get_group(
                        (target_parent,market[0],market[1],row['sale_year']-3))\
                        .reset_index()['market_share_N'][0]
                except:
                    target_market_share_N_m3 = 0
    
                # (3) Market share of other targets in the same transaction
                # Account for possibility that other targets can be either a parent or a standalone firm
                other_targets_parents = \
                    list(GPF_names[(GPF_names['name_GPF'].isin(other_targets))
                    &(GPF_names['sale_year']==row['sale_year']-1)]['parent_name'])+\
                    list(other_targets)
                other_targets_parents = list(set(other_targets_parents))

                if len(other_targets_parents)==0:
                    other_targets_market_share_N_m1 = 0
                else:
                    other_targets_market_share_N = \
                        market_share_all_markets_byCSAXBondChar[
                        (market_share_all_markets_byCSAXBondChar['parent_name'].isin(other_targets_parents))
                        &(market_share_all_markets_byCSAXBondChar['CSA Code']==market[0])
                        &(market_share_all_markets_byCSAXBondChar[BondChar]==market[1])
                        &(market_share_all_markets_byCSAXBondChar['calendar_year']==row['sale_year']-1)]
                    if len(other_targets_market_share_N)>0:
                        other_targets_market_share_N_m1 = np.sum(other_targets_market_share_N['market_share_N'])
                    else:
                        other_targets_market_share_N_m1 = 0

                if len(other_targets_parents)==0:
                    other_targets_market_share_N_m2 = 0
                else:
                    other_targets_market_share_N = \
                        market_share_all_markets_byCSAXBondChar[
                        (market_share_all_markets_byCSAXBondChar['parent_name'].isin(other_targets_parents))
                        &(market_share_all_markets_byCSAXBondChar['CSA Code']==market[0])
                        &(market_share_all_markets_byCSAXBondChar[BondChar]==market[1])
                        &(market_share_all_markets_byCSAXBondChar['calendar_year']==row['sale_year']-2)]
                    if len(other_targets_market_share_N)>0:
                        other_targets_market_share_N_m2 = np.sum(other_targets_market_share_N['market_share_N'])
                    else:
                        other_targets_market_share_N_m2 = 0

                if len(other_targets_parents)==0:
                    other_targets_market_share_N_m3 = 0
                else:
                    other_targets_market_share_N = \
                        market_share_all_markets_byCSAXBondChar[
                        (market_share_all_markets_byCSAXBondChar['parent_name'].isin(other_targets_parents))
                        &(market_share_all_markets_byCSAXBondChar['CSA Code']==market[0])
                        &(market_share_all_markets_byCSAXBondChar[BondChar]==market[1])
                        &(market_share_all_markets_byCSAXBondChar['calendar_year']==row['sale_year']-3)]
                    if len(other_targets_market_share_N)>0:
                        other_targets_market_share_N_m3 = np.sum(other_targets_market_share_N['market_share_N'])
                    else:
                        other_targets_market_share_N_m3 = 0

                # Record data
                if IF_acquiror_active or IF_target_active or IF_other_target_active:
                    CSAXBondChar_affected = CSAXBondChar_affected+[{
                        'CSA Code':market[0],
                        BondChar:market[1],
                        'sale_year':row['sale_year'],
                        'acquiror':row['acquiror'],
                        'target':row['target'],
                        'other_targets':other_targets,
                        'acquiror_parent':acquiror_parent,
                        'target_parent':target_parent,
                        'acquiror_market_share_N_m1':acquiror_market_share_N_m1,
                        'acquiror_market_share_N_m2':acquiror_market_share_N_m2,
                        'acquiror_market_share_N_m3':acquiror_market_share_N_m3,
                        'target_market_share_N_m1':target_market_share_N_m1,
                        'target_market_share_N_m2':target_market_share_N_m2,
                        'target_market_share_N_m3':target_market_share_N_m3,
                        'other_targets_market_share_N_m1':other_targets_market_share_N_m1,
                        'other_targets_market_share_N_m2':other_targets_market_share_N_m2,
                        'other_targets_market_share_N_m3':other_targets_market_share_N_m3,
                    }]
                acquiror_market_share_N_m1 = None
                acquiror_market_share_N_m2 = None
                acquiror_market_share_N_m3 = None
                target_market_share_N_m1 = None
                target_market_share_N_m2 = None
                target_market_share_N_m3 = None
                other_targets_market_share = None
                other_targets_market_share_N_m1 = None
                other_targets_market_share_N_m2 = None
                other_targets_market_share_N_m3 = None
        
        CSAXBondChar_affected = pd.DataFrame(CSAXBondChar_affected)
        return CSAXBondChar_affected

    #--------------#
    # Process data #
    #--------------#

    MA_dd = dd.from_pandas(MA, npartitions=20)
    with dask.config.set(scheduler='processes',num_workers=20):
        CSAXBondChar_affected = MA_dd.map_partitions(proc_list, 
        meta=pd.DataFrame(columns=
        ['CSA Code',BondChar,'sale_year','acquiror','target',
        'other_targets','acquiror_parent','target_parent',
        'acquiror_market_share_N_m1','acquiror_market_share_N_m2','acquiror_market_share_N_m3',
        'target_market_share_N_m1','target_market_share_N_m2','target_market_share_N_m3',
        'other_targets_market_share_N_m1','other_targets_market_share_N_m2','other_targets_market_share_N_m3',
        ])).compute()
    
    # Average market share over past three years
    CSAXBondChar_affected['acquiror_market_share_N_avg'] = \
        (CSAXBondChar_affected['acquiror_market_share_N_m1']+\
        CSAXBondChar_affected['acquiror_market_share_N_m2']+\
        CSAXBondChar_affected['acquiror_market_share_N_m3'])/3
    CSAXBondChar_affected['target_market_share_N_avg'] = \
        (CSAXBondChar_affected['target_market_share_N_m1']+\
        CSAXBondChar_affected['target_market_share_N_m2']+\
        CSAXBondChar_affected['target_market_share_N_m3'])/3
    CSAXBondChar_affected['other_targets_market_share_N_avg'] = \
        (CSAXBondChar_affected['other_targets_market_share_N_m1']+\
        CSAXBondChar_affected['other_targets_market_share_N_m2']+\
        CSAXBondChar_affected['other_targets_market_share_N_m3'])/3
    
    # As this step takes significant time, export output
    CSAXBondChar_affected.to_parquet('../CleanData/MAEvent/1B_CSA_affected_'+BondChar+'.parquet')

    CSAXBondChar_affected_AllBondChars[BondChar] = CSAXBondChar_affected

    print('Process and export data by both CSA and Bond Characteristic -- '+BondChar+'.')
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Time spent on the block: {elapsed_time:.4f} seconds")


Process and export data by both CSA and Bond Characteristic -- amount_bracket.
Time spent on the block: 1260.8029 seconds
Process and export data by both CSA and Bond Characteristic -- mat_bracket.
Time spent on the block: 1068.5463 seconds
Process and export data by both CSA and Bond Characteristic -- use_short.
Time spent on the block: 2385.6474 seconds
Process and export data by both CSA and Bond Characteristic -- has_ratings.
Time spent on the block: 424.0019 seconds
Process and export data by both CSA and Bond Characteristic -- Bid.
Time spent on the block: 889.9771 seconds


## 3.2 Identify merger episodes

### 3.2.1 Method: By implied rise in HHI due to merger

In [20]:
CSAXBondChar_episodes_impliedHHI_N_AllBondChars = {}

for BondChar in BondChars:

    CSAXBondChar_affected = CSAXBondChar_affected_AllBondChars[BondChar]

    #-----------------------------#
    # Change in HHI by N of deals #
    #-----------------------------#
    
    # Identify episodes of mergers at the CSA level
    
    # Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
    # identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
    # the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
    # as in cases where two firms merge into a new one, both will get recorded in "CSA_affected"
    
    CSAXBondChar_episodes_impliedHHI_N = []

    CSAXBondChar_affected_unique = CSAXBondChar_affected[['CSA Code',BondChar]].drop_duplicates()

    for idx,row in CSAXBondChar_affected_unique.iterrows():
    
        CSAXBondChar_affected_part = \
            CSAXBondChar_affected[(CSAXBondChar_affected['CSA Code']==row['CSA Code'])&(CSAXBondChar_affected[BondChar]==row[BondChar])]
        CSAXBondChar_affected_part = CSAXBondChar_affected_part[
            (CSAXBondChar_affected_part['acquiror_market_share_N_avg']>0)&
            ((CSAXBondChar_affected_part['target_market_share_N_avg']>0)|
            (CSAXBondChar_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        
        episode_start_year = 1900
        for sale_year in CSAXBondChar_affected_part['sale_year'].unique():
        
            # If this year is still within the last merger episode
            if sale_year<=episode_start_year+4:
                continue
            
            # Check intensity of M&A activities in that year and three years following
            CSAXBondChar_affected_episode = CSAXBondChar_affected_part[
                (CSAXBondChar_affected_part['sale_year']>=sale_year)&
                (CSAXBondChar_affected_part['sale_year']<=sale_year+3)]
            GPF_oneCSAXBondChar_priorMA = GPF[
                (GPF['sale_year']>=sale_year-3)&(GPF['sale_year']<=sale_year)
                &(GPF['CSA Code']==row['CSA Code'])
                &(GPF[BondChar]==row[BondChar])]

            # Calculate (1) HHI (by parent firm) in the three years prior (2) Predicted HHI after the mergers complete
            
            # Underwriters in the market
            name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneCSAXBondChar_priorMA[parent_name_colnames]))))
            name_GPFs = [item for item in name_GPFs if item!=None]
            name_GPFs = [item for item in name_GPFs if str(item)!='nan']
            name_GPFs = list(set(name_GPFs))
            n_deals = {}
            for item in name_GPFs:
                n_deals[item] = 0
            
            # Record market shares before merger episode
            parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
            for idx,row in GPF_oneCSAXBondChar_priorMA.iterrows():
                underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
                n_underwriters = len(underwriters_onedeal)
                for item in underwriters_onedeal:
                    n_deals[item] = n_deals[item]+1/n_underwriters
            n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
            n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
            n_deals_prior = n_deals
            
            # HHI prior to merger
            hhi_piror = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
    
            # Implied HHI post merger
            CSAXBondChar_affected_episode = CSAXBondChar_affected_episode.reset_index(drop=True)
            for idx,row in CSAXBondChar_affected_episode.iterrows():
                n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror_parent']
            n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
            hhi_predicted = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
            n_deals_post = n_deals
    
            hhi_dif = hhi_predicted-hhi_piror
        
            # Check if market share in the episode is high enough
            if hhi_dif>0.01:
                # An episode is identified
                CSAXBondChar_episodes_impliedHHI_N = CSAXBondChar_episodes_impliedHHI_N+[{
                    'episode_start_year':sale_year,
                    'CSA Code':row['CSA Code'],
                    BondChar:row[BondChar],
                    'mergers':CSAXBondChar_affected_episode,
                    'hhi_dif':hhi_dif,
                    'n_deals_prior':n_deals_prior,
                    'n_deals_post':n_deals_post,
                    }]
                episode_start_year = sale_year
    
    CSAXBondChar_episodes_impliedHHI_N = pd.DataFrame(CSAXBondChar_episodes_impliedHHI_N)
    
    CSAXBondChar_episodes_impliedHHI_N_AllBondChars[BondChar] = CSAXBondChar_episodes_impliedHHI_N


## 3.3 Assemble a Treatment-Control Matched Sample

In [21]:
# %%script false --no-raise-error

for BondChar in BondChars:

    CSAXBondChar_affected = CSAXBondChar_affected_AllBondChars[BondChar]
    CSAXBondChar_episodes_impliedHHI_N = CSAXBondChar_episodes_impliedHHI_N_AllBondChars[BondChar]

    episodes_files = [
        ["By Implied HHI Increase in terms of N deals, >= 0.01",CSAXBondChar_episodes_impliedHHI_N,1,
            '../CleanData/MAEvent/CSAXBondChar_'+BondChar+'_episodes_impliedHHIByN.csv',
        ],
        ]

    for episodes_file in episodes_files:
    
        criteria = episodes_file[0]
        episodes = episodes_file[1]
        N_matches = episodes_file[2]
        file_path = episodes_file[3]
    
        episodes = episodes.copy()
        
        ########################################
        # Find control for each merger episode #
        ########################################
        
        # State demographics to be used in merger
        CSA_POP = pd.read_csv("../CleanData/Demographics/0C_CSA_Pop.csv")
        CSA_INC = pd.read_csv("../CleanData/Demographics/0C_CSA_Inc.csv")
        CSA_Data = CSA_POP.merge(CSA_INC,on=['CSA Code','year'])
        CSA_Data = CSA_Data[['CSA Code','year','inc','pop']]
        Same_State_CSA_pairs = pd.read_csv("../CleanData/Demographics/0C_Same_State_CSA_pairs.csv")
        
        def calculate_distance(row,weightingmat):
            return sp.spatial.distance.mahalanobis((row['inc'],row['pop']),\
                (row['treated_inc'],row['treated_pop']),weightingmat)
        
        episodes['control'] = None
        for idx,row in episodes.iterrows():
        
            # Find population of this CSA
            CSA_Data_oneyear = CSA_Data[CSA_Data['year']==row['episode_start_year']].copy()
        
            # Demographic data of the treated CSA
            CSA_Data_oneyear_frag = CSA_Data_oneyear[CSA_Data_oneyear['CSA Code']==row['CSA Code']].copy()
            if len(CSA_Data_oneyear_frag)==0:
                continue
            episode_pop = CSA_Data_oneyear_frag.reset_index()['pop'][0]
            episode_inc = CSA_Data_oneyear_frag.reset_index()['inc'][0]
            
            # Find a match
            CSA_Data_oneyear['treated_pop'] = episode_pop
            CSA_Data_oneyear['treated_inc'] = episode_inc
            # Get weighting matrix
            CSA_Data_oneyear['inc'] = winsor2(CSA_Data_oneyear['inc'],cutoffs=[0.05,0.05])
            CSA_Data_oneyear['pop'] = winsor2(CSA_Data_oneyear['pop'],cutoffs=[0.05,0.05])
            cov = CSA_Data_oneyear[['inc','pop']].cov()
            invcov = np.linalg.inv(cov)
            CSA_Data_oneyear['dist'] = CSA_Data_oneyear.apply(calculate_distance, axis=1,weightingmat=invcov)
            CSA_Data_oneyear = CSA_Data_oneyear.sort_values('dist').reset_index(drop=True)
            # Remove oneself from potential matches
            CSA_Data_oneyear = CSA_Data_oneyear[CSA_Data_oneyear['CSA Code']!=row['CSA Code']]
            # Remove other CSAs in the same state from potential matches
            Same_State_CSAs = list(Same_State_CSA_pairs[Same_State_CSA_pairs['CSA_1']==row['CSA Code']]['CSA_2'])
            CSA_Data_oneyear = CSA_Data_oneyear[~CSA_Data_oneyear['CSA Code'].isin(Same_State_CSAs)]
        
            match_counter = 0
            control = []
            for subidx,subrow in CSA_Data_oneyear.iterrows():
                # Years for which potential control is treated itself
                CSAXBondChar_affected_frag = CSAXBondChar_affected[CSAXBondChar_affected['CSA Code']==subrow['CSA Code']]
                CSAXBondChar_affected_frag = CSAXBondChar_affected_frag[(CSAXBondChar_affected_frag['acquiror_market_share_N_avg']>0.01)&
                    (CSAXBondChar_affected_frag['target_market_share_N_avg']+CSAXBondChar_affected_frag['other_targets_market_share_N_avg']>0.01)]
                CSAXBondChar_affected_frag_affected_years = list(CSAXBondChar_affected_frag['sale_year'].unique())
                # 
                if len(set(list(range(row['episode_start_year']-4,row['episode_start_year']+5))).\
                    intersection(set(CSAXBondChar_affected_frag_affected_years)))>0:
                    # This potential control is treated
                    continue
                else:
                    # This potential control is not treated => Good control
                    control = control+[subrow['CSA Code']]
                    match_counter = match_counter+1
                    if match_counter==N_matches:
                        break
        
            episodes.at[idx,'control'] = control
        
        # Exclude cases where a match cannot be found
        print('A control cannot be found for '+str(np.sum(pd.isnull(episodes['control'])))+' episodes.')
        episodes = episodes[~pd.isnull(episodes['control'])]
    
        
        #############################################
        # Expand to include an event time dimension #
        #############################################
        
        episodes_Exploded = episodes
        episodes_Exploded['year_to_merger'] = [list(range(-4,11))]*len(episodes_Exploded)
        episodes_Exploded = episodes_Exploded.explode('year_to_merger')
        episodes_Exploded['calendar_year'] = episodes_Exploded['episode_start_year']+episodes_Exploded['year_to_merger']    
    
        
        ################################
        # Assemble a regression sample #
        ################################
    
        #------------------------#
        # Issue level, using GPF #
        #------------------------#
    
        reg_sample = []
        for idx,row in episodes_Exploded.iterrows():
    
            # Event characteristics - strength
            if 'acquiror_market_share_N_avg' in episodes_Exploded.columns:
                acquiror_market_share_avg = row['acquiror_market_share_N_avg']
            else:
                acquiror_market_share_avg = None
    
            if 'target_market_share_N_avg' in episodes_Exploded.columns:
                target_market_share_avg = row['target_market_share_N_avg']
            else:
                target_market_share_avg = None
    
            if 'other_targets_market_share_N_avg' in episodes_Exploded.columns:
                other_targets_market_share_avg = row['other_targets_market_share_N_avg']
            else:
                other_targets_market_share_avg = None
    
            if 'hhi_dif' in episodes_Exploded.columns:
                hhi_dif = row['hhi_dif']
            else:
                hhi_dif = None
        
            # Treated observations
            GPF_Seg = GPF[
                (GPF['sale_year']==row['calendar_year'])&
                (GPF[BondChar]==row[BondChar])&
                (GPF['CSA Code']==row['CSA Code'])
                ].copy()
            GPF_Seg = GPF_Seg[[
                'CSA Code','sale_year','State','County',
                'issuer_type','Issuer',
                'avg_maturity','amount',
                'avg_yield','treasury_avg_spread','MMA_avg_spread',
                'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
                'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
                'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
                'underpricing_15to60','underpricing_15to30',
                'Bid','taxable_code','security_type','if_advisor','if_dual_advisor','if_refunding',
                'amount_bracket','mat_bracket','use_short','has_ratings',
                'use_of_proceeds_BB','use_of_proceeds_main','use_of_proceeds_general',
                'has_Moodys','has_Fitch','rating_Moodys','rating_Fitch','insured_amount',
                'AdvisorFeeRatio_hat','CRFeeRatio_hat','InsureFeeRatio_hat',
                'AdvisorFeeRatio_hat_model_timeFE','CRFeeRatio_hat_model_timeFE','InsureFeeRatio_hat_model_timeFE',
                'if_callable','CB_Eligible',
                'num_relationship',
                ]+name_GPF_colnames+parent_name_GPF_colnames]
    
            #------------------------------------#
            # Some cross-sectional heterogeneity #
            #------------------------------------#
    
            # Note that I am check if bank is involved in any mergers in [-4,+4], instead of if bank is involved in mergers (the above
            # code block)
            mergers = CSAXBondChar_affected[
                (CSAXBondChar_affected['CSA Code']==row['CSA Code'])&
                (CSAXBondChar_affected['sale_year']>=row['episode_start_year']-4)&
                (CSAXBondChar_affected['sale_year']<=row['episode_start_year']+4)
                ][['acquiror','target','acquiror_parent','target_parent',
                'acquiror_market_share_N_avg','target_market_share_N_avg','other_targets_market_share_N_avg']]
            mergers = mergers[(mergers['acquiror_market_share_N_avg']>0)&(mergers['target_market_share_N_avg']+mergers['other_targets_market_share_N_avg']>0)]
            # Whether the underwriter is the target bank in M&A
            GPF_Seg['bank_is_target'] = False
            for column in name_GPF_colnames:
                GPF_Seg['bank_is_target'] = \
                (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
                |(GPF_Seg['bank_is_target'])
            for column in parent_name_GPF_colnames:
                GPF_Seg['bank_is_target'] = \
                (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
                |(GPF_Seg['bank_is_target'])
            # Whether the underwriter is the acquiror bank in M&A
            GPF_Seg['bank_is_acquiror'] = False
            for column in name_GPF_colnames:
                GPF_Seg['bank_is_acquiror'] = \
                (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
                |(GPF_Seg['bank_is_acquiror'])
            for column in parent_name_GPF_colnames:
                GPF_Seg['bank_is_acquiror'] = \
                (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
                |(GPF_Seg['bank_is_acquiror'])
    
            GPF_Seg['treated'] = 1
            GPF_Seg['episode_start_year'] = row['episode_start_year']
            GPF_Seg['year_to_merger'] = row['year_to_merger']
            GPF_Seg['calendar_year'] = row['calendar_year']
            GPF_Seg['treated_csa'] = row['CSA Code'] # Used for constructing cohort X issuer FEs
            GPF_Seg['treated_BondChar'] = row[BondChar] # Used for constructing cohort X issuer FEs
            GPF_Seg['acquiror_market_share_avg'] = acquiror_market_share_avg
            GPF_Seg['target_market_share_avg'] = target_market_share_avg
            GPF_Seg['other_targets_market_share_avg'] = other_targets_market_share_avg
            GPF_Seg['hhi_dif'] = hhi_dif
            GPF_Seg_Treated = GPF_Seg
    
            # Control observations
            if row['control']==None:
                continue
            GPF_Seg_Control = pd.DataFrame()
            for item in row['control']:
                GPF_Seg = GPF[
                    (GPF['sale_year']==row['calendar_year'])&
                    (GPF[BondChar]==row[BondChar])&
                    (GPF['CSA Code']==item)
                    ].copy()
                GPF_Seg = GPF_Seg[[
                    'CSA Code','sale_year','State','County',
                    'issuer_type','Issuer',
                    'avg_maturity','amount',
                    'avg_yield','treasury_avg_spread','MMA_avg_spread',
                    'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
                    'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
                    'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
                    'underpricing_15to60','underpricing_15to30',
                    'Bid','taxable_code','security_type','if_advisor','if_dual_advisor','if_refunding',
                    'amount_bracket','mat_bracket','use_short','has_ratings',
                    'use_of_proceeds_BB','use_of_proceeds_main','use_of_proceeds_general',
                    'has_Moodys','has_Fitch','rating_Moodys','rating_Fitch','insured_amount',
                    'AdvisorFeeRatio_hat','CRFeeRatio_hat','InsureFeeRatio_hat',
                    'AdvisorFeeRatio_hat_model_timeFE','CRFeeRatio_hat_model_timeFE','InsureFeeRatio_hat_model_timeFE',
                    'if_callable','CB_Eligible',
                    'num_relationship',
                    ]+name_GPF_colnames+parent_name_GPF_colnames]
    
                # Note that for control banks, "bank_is_target" and "bank_is_acquiror" use M&A in the specific areas
                mergers = CSAXBondChar_affected[
                    (CSAXBondChar_affected['CSA Code']==item)&
                    (CSAXBondChar_affected['sale_year']>=row['episode_start_year']-4)&
                    (CSAXBondChar_affected['sale_year']<=row['episode_start_year']+4)
                    ][['acquiror','target','acquiror_parent','target_parent',
                    'acquiror_market_share_N_avg','target_market_share_N_avg','other_targets_market_share_N_avg']]
                mergers = mergers[(mergers['acquiror_market_share_N_avg']>0)&(mergers['target_market_share_N_avg']+mergers['other_targets_market_share_N_avg']>0)]
                # Whether the underwriter is the target bank in M&A
                GPF_Seg['bank_is_target'] = False
                for column in name_GPF_colnames:
                    GPF_Seg['bank_is_target'] = \
                    (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
                    |(GPF_Seg['bank_is_target'])
                for column in parent_name_GPF_colnames:
                    GPF_Seg['bank_is_target'] = \
                    (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
                    |(GPF_Seg['bank_is_target'])
                # Whether the underwriter is the acquiror bank in M&A
                GPF_Seg['bank_is_acquiror'] = False
                for column in name_GPF_colnames:
                    GPF_Seg['bank_is_acquiror'] = \
                    (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
                    |(GPF_Seg['bank_is_acquiror'])
                for column in parent_name_GPF_colnames:
                    GPF_Seg['bank_is_acquiror'] = \
                    (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
                    |(GPF_Seg['bank_is_acquiror'])
                
                GPF_Seg['treated'] = 0
                GPF_Seg['episode_start_year'] = row['episode_start_year']
                GPF_Seg['year_to_merger'] = row['year_to_merger']
                GPF_Seg['calendar_year'] = row['calendar_year']
                GPF_Seg['treated_csa'] = row['CSA Code'] # Used for constructing cohort X issuer FEs
                GPF_Seg['treated_BondChar'] = row[BondChar] # Used for constructing cohort X issuer FEs
                GPF_Seg['hhi_dif'] = hhi_dif
                GPF_Seg_Control = pd.concat([GPF_Seg_Control,GPF_Seg])
        
            if len(GPF_Seg_Treated)>0 and len(GPF_Seg_Control)>0:
                reg_sample = reg_sample+[GPF_Seg_Treated,GPF_Seg_Control]
        
        reg_sample = pd.concat(reg_sample)
        reg_sample = reg_sample.merge(HHI_byCSA,on=['CSA Code','calendar_year'])
        County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
        County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
        reg_sample = reg_sample.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
        reg_sample = reg_sample[reg_sample['_merge']!='right_only'].drop(columns=['_merge'])
        reg_sample.to_csv(file_path)
    
        print('Exported regression sample for '+episodes_file[0])


A control cannot be found for 0 episodes.
Exported regression sample for By Implied HHI Increase in terms of N deals, >= 0.01
A control cannot be found for 0 episodes.
Exported regression sample for By Implied HHI Increase in terms of N deals, >= 0.01
A control cannot be found for 0 episodes.
Exported regression sample for By Implied HHI Increase in terms of N deals, >= 0.01
A control cannot be found for 1 episodes.
Exported regression sample for By Implied HHI Increase in terms of N deals, >= 0.01
A control cannot be found for 1 episodes.
Exported regression sample for By Implied HHI Increase in terms of N deals, >= 0.01
