In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import os
import dask
import dask.dataframe as dd
import itertools
from itertools import chain
from math import sqrt, floor, ceil, isnan
import multiprocess
import importlib
from importlib import reload
from collections import Counter
from fuzzywuzzy import process, fuzz
import time
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import warnings
warnings.filterwarnings("error")

pd.options.display.max_columns = 500
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = 400

# A customized winsorisation function that handles None values correctly
# The percentiles are taken and winsorisation are done on non-None values only
def winsor2(series,cutoffs):

    import numpy as np
    import scipy as sp
    
    IsNone = np.isnan(series).copy()
    IsNotNone = np.logical_not(IsNone).copy()
    series_NotNonePart = sp.stats.mstats.winsorize(series[IsNotNone],limits=(cutoffs[0],cutoffs[1]))
    series_new = series.copy()
    series_new[IsNone] = np.nan
    series_new[IsNotNone] = series_NotNonePart

    return series_new


# 1. Import data

In [2]:
# GPF
GPF = pd.read_csv("../CleanData/SDC/0A_GPF.csv",low_memory=False)
raw_name_GPF_colnames = [column for column in GPF.columns if 'raw_name_GPF_' in column]
name_GPF_colnames = [column for column in GPF.columns if column[:9]=='name_GPF_']
parent_name_GPF_colnames = [column for column in GPF.columns if 'parent_name_' in column]

# Parent relationship
GPF_names = pd.read_parquet('../CleanData/SDC/0H_GPF_Parent.parquet')

# HHI and market share of each underwriter
HHI_byCSA = pd.read_csv('../CleanData/SDC/1A_HHI_byCSA.csv')
market_share_all_markets_byCSA = pd.read_csv('../CleanData/SDC/1A_market_share_all_markets_byCSA.csv')
HHI_byCBSA = pd.read_csv('../CleanData/SDC/1A_HHI_byCBSA.csv')
market_share_all_markets_byCBSA = pd.read_csv('../CleanData/SDC/1A_market_share_all_markets_byCBSA.csv')

# All M&As
MA = pd.read_parquet('../CleanData/SDC/0B_M&A.parquet')
MA = MA.reset_index(drop=True)

# Withdrawn M&As
MA_withdrawn = pd.read_csv("../CleanData/SDC/0I_MA_withdrawn.csv")

# Quantity of issuance
StateXCountyXBid = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXBid.parquet")
StateXCountyXUsageBB = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXUsageBB.parquet")
StateXCountyXUsageGeneral = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXUsageGeneral.parquet")
StateXCountyXUsageMain = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXUsageMain.parquet")
StateXCountyXIssuerType = pd.read_parquet("../CleanData/SDC/0A_StateXCountyXIssuerType.parquet")

StateXCounty = StateXCountyXBid.groupby(['State','County','sale_year']).agg({'amount':sum})
StateXCounty = StateXCounty.reset_index()

# Demographics
CSA_POP = pd.read_csv("../CleanData/Demographics/0C_CSA_Pop.csv")
CSA_INC = pd.read_csv("../CleanData/Demographics/0C_CSA_Inc.csv")
CBSA_POP = pd.read_csv("../CleanData/Demographics/0C_CBSA_Pop.csv")
CBSA_INC = pd.read_csv("../CleanData/Demographics/0C_CBSA_Inc.csv")

#-------------#
# Import CBSA #
#-------------#

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

# "CSA" is for metropolitan and "CBSA" includes also those micropolitan
CBSAData = pd.read_excel("../RawData/MSA/CBSA.xlsx",skiprows=[0,1])
CBSAData = CBSAData[~pd.isnull(CBSAData['County/County Equivalent'])]

# Add state abbreviations
us_state_to_abbrev = pd.DataFrame.from_dict(us_state_to_abbrev,orient='index').reset_index()
us_state_to_abbrev.columns = ['State Name','State']
CBSAData = CBSAData.rename(columns={'County/County Equivalent':'County'})
CBSAData = CBSAData.merge(us_state_to_abbrev,on='State Name',how='outer',indicator=True)
CBSAData = CBSAData[CBSAData['_merge']=='both'].drop(columns=['_merge'])
# Merge is perfect
CBSAData['County'] = CBSAData['County'].str.upper()
CBSAData['County'] = CBSAData['County'].str.replace(' COUNTY','')
CBSAData['County'] = CBSAData['County'].str.replace(' AND ',' & ')
CBSAData['County'] = CBSAData['County'].str.replace('.','',regex=False)
CBSAData['CSA Code'] = CBSAData['CSA Code'].astype(float)
CBSAData['CBSA Code'] = CBSAData['CBSA Code'].astype(float)


# 2. Construct Events of M&As, Using CBSAs

In [3]:
%%time

# %%script false --no-raise-error

def proc_list(MA_frag):
    
    raw_name_GPF_colnames = [column for column in GPF.columns if 'raw_name_GPF_' in column]
    name_GPF_colnames = ['name_GPF_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
    parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
    
    CBSA_affected = []
    MA_frag = MA_frag.reset_index(drop=True)
    
    for idx,row in MA_frag.iterrows():
        
        # Find CBSAs that this merger affects
        # Determine if an underwriter is active in an CBSA based on activity of PRIOR years
        GPF_prioryears = GPF[(GPF['sale_year']>=row['sale_year']-3)&(GPF['sale_year']<=row['sale_year']-1)]

        # Also check other targets of the acquiror in that year. This accounts for cases where post merger the new formed entity
        # is new and appear as a name that was not in the sample before. Note that here "MA_frag" cannot be used or the other firm
        # involved in the merger will be missed. Instead, use the whole sample "MA"
        other_targets = \
            list(MA[(MA['acquiror']==row['acquiror'])&
            (MA['sale_year']==row['sale_year'])&
            (MA['target']!=row['target'])]['target'])
        
        for CBSA in list(GPF_prioryears['CBSA Code'].unique()):

            GPF_prioryears_oneCBSA = GPF_prioryears[GPF_prioryears['CBSA Code']==CBSA]

            # Underwriters in this state
            underwriters_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCBSA[name_GPF_colnames]))))
            underwriters_priorMA = [item for item in underwriters_priorMA if item!=None]
            underwriters_priorMA = list(set(underwriters_priorMA))
            # Parents of underwriters in this state
            parents_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCBSA[parent_name_colnames]))))
            parents_priorMA = [item for item in parents_priorMA if item!=None]
            parents_priorMA = list(set(parents_priorMA))
            # Subsidiaries of parents in this state (using data of PRIOR year)
            subsidiaries_priorMA = list(GPF_names[
                (GPF_names['parent_name'].isin(parents_priorMA))&
                (GPF_names['sale_year']>=row['sale_year']-3)&
                (GPF_names['sale_year']<=row['sale_year']-1)]['name_GPF'])

            # Determine if merger affects the CBSA, and if both sides have business
            IF_acquiror_active = None
            IF_target_active = None
            IF_other_target_active = None
            if (row['acquiror'] in parents_priorMA) or (row['acquiror'] in underwriters_priorMA) or (row['acquiror'] in subsidiaries_priorMA):
                IF_acquiror_active = True
            if (row['target'] in parents_priorMA) or (row['target'] in underwriters_priorMA) or (row['target'] in subsidiaries_priorMA):
                IF_target_active = True
            for other_target in other_targets:
                if (other_target in parents_priorMA) or (other_target in underwriters_priorMA):
                    IF_other_target_active = True

            # Get market share of merged banks. Note that this is the market share in the years prior to M&A. Also note that market 
            # share "market_share_all_markets_byCBSA" is calculated at the parent level. There are many cases where market share of a
            # firm in an area is unavailable, which is because of no presence.



            #-------------------------#
            # Market share by N deals #
            #-------------------------#

            # (1) Market share of acquiror
            # Determine parent of target, as "market_share_all_markets_byCBSA" is at parent level
            try:
                # Situation where acquiror is a subsidiary or standalone firm whose parent is itself. Extract its parent
                acquiror_parent = GPF_names[(GPF_names['name_GPF']==row['acquiror'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                # Situation where acquiror is a parent
                acquiror_parent = row['acquiror']
            try:
                acquiror_market_share_N_m1 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m1 = 0
            try:
                acquiror_market_share_N_m2 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m2 = 0
            try:
                acquiror_market_share_N_m3 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m3 = 0

            # (2) Market share of target
            try:
                # Note that I must use "GPF_names" (the parent-subsidiary) mapping use the year(s) prior to the MA
                target_parent = GPF_names[(GPF_names['name_GPF']==row['target'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                target_parent = row['target']
            try:
                target_market_share_N_m1 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m1 = 0
            try:
                target_market_share_N_m2 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m2 = 0
            try:
                target_market_share_N_m3 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m3 = 0

            # (3) Market share of other targets in the same transaction
            # Account for possibility that other targets can be either a parent or a standalone firm
            other_targets_parents = \
                list(GPF_names[(GPF_names['name_GPF'].isin(other_targets))
                &(GPF_names['sale_year']==row['sale_year']-1)]['parent_name'])+\
                list(other_targets)
            other_targets_parents = list(set(other_targets_parents))

            other_targets_market_share_N = \
                market_share_all_markets_byCBSA[
                (market_share_all_markets_byCBSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-1)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m1 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m1 = 0

            other_targets_market_share_N = \
                market_share_all_markets_byCBSA[
                (market_share_all_markets_byCBSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-2)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m2 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m2 = 0

            other_targets_market_share_N = \
                market_share_all_markets_byCBSA[
                (market_share_all_markets_byCBSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-3)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m3 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m3 = 0



            # Record data
            if IF_acquiror_active or IF_target_active or IF_other_target_active:
                CBSA_affected = CBSA_affected+[{
                    'CBSA Code':CBSA,
                    'sale_year':row['sale_year'],
                    'acquiror':row['acquiror'],
                    'target':row['target'],
                    'other_targets':other_targets,
                    'acquiror_parent':acquiror_parent,
                    'target_parent':target_parent,
                    'acquiror_market_share_N_m1':acquiror_market_share_N_m1,
                    'acquiror_market_share_N_m2':acquiror_market_share_N_m2,
                    'acquiror_market_share_N_m3':acquiror_market_share_N_m3,
                    'target_market_share_N_m1':target_market_share_N_m1,
                    'target_market_share_N_m2':target_market_share_N_m2,
                    'target_market_share_N_m3':target_market_share_N_m3,
                    'other_targets_market_share_N_m1':other_targets_market_share_N_m1,
                    'other_targets_market_share_N_m2':other_targets_market_share_N_m2,
                    'other_targets_market_share_N_m3':other_targets_market_share_N_m3,
                }]
            acquiror_market_share_N_m1 = None
            acquiror_market_share_N_m2 = None
            acquiror_market_share_N_m3 = None
            target_market_share_N_m1 = None
            target_market_share_N_m2 = None
            target_market_share_N_m3 = None
            other_targets_market_share = None
            other_targets_market_share_N_m1 = None
            other_targets_market_share_N_m2 = None
            other_targets_market_share_N_m3 = None
    
    CBSA_affected = pd.DataFrame(CBSA_affected)
    return CBSA_affected

MA_dd = dd.from_pandas(MA, npartitions=40)
with dask.config.set(scheduler='processes',num_workers=40):
    CBSA_affected = MA_dd.map_partitions(proc_list, 
    meta=pd.DataFrame(columns=
    ['CBSA Code','sale_year','acquiror','target',
    'other_targets','acquiror_parent','target_parent',
    'acquiror_market_share_N_m1','acquiror_market_share_N_m2','acquiror_market_share_N_m3',
    'target_market_share_N_m1','target_market_share_N_m2','target_market_share_N_m3',
    'other_targets_market_share_N_m1','other_targets_market_share_N_m2','other_targets_market_share_N_m3',
    ])).compute()

# Average market share over past three years
CBSA_affected['acquiror_market_share_N_avg'] = \
    (CBSA_affected['acquiror_market_share_N_m1']+\
    CBSA_affected['acquiror_market_share_N_m2']+\
    CBSA_affected['acquiror_market_share_N_m3'])/3
CBSA_affected['target_market_share_N_avg'] = \
    (CBSA_affected['target_market_share_N_m1']+\
    CBSA_affected['target_market_share_N_m2']+\
    CBSA_affected['target_market_share_N_m3'])/3
CBSA_affected['other_targets_market_share_N_avg'] = \
    (CBSA_affected['other_targets_market_share_N_m1']+\
    CBSA_affected['other_targets_market_share_N_m2']+\
    CBSA_affected['other_targets_market_share_N_m3'])/3

# As this step takes significant time, export output
CBSA_affected.to_parquet('../CleanData/MAEvent/1B_CBSA_affected.parquet')

CPU times: user 1min 32s, sys: 1min, total: 2min 33s
Wall time: 1h 4min 44s


In [4]:
CBSA_affected = pd.read_parquet('../CleanData/MAEvent/1B_CBSA_affected.parquet')

## 2.2 Identify merger episodes

### 2.2.1 Method 1: By market share

In [5]:
#----------------------------#
# Market share by N of deals #
#----------------------------#

# Identify episodes of mergers at the CBSA level

# Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
# identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
# the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
# as in cases where two firms merge into a new one, both will get recorded in "CBSA_affected"

parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]

CBSA_episodes_marketshare_N = []

for CBSA in list(CBSA_affected['CBSA Code'].unique()):

    CBSA_affected_part = CBSA_affected[CBSA_affected['CBSA Code']==CBSA]
    CBSA_affected_part = CBSA_affected_part[
        (CBSA_affected_part['acquiror_market_share_N_avg']>0)&
        ((CBSA_affected_part['target_market_share_N_avg']>0)|
        (CBSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
    
    episode_start_year = 1900
    years = CBSA_affected_part['sale_year'].unique()
    years = sorted(years)
    for sale_year in years:
    
        # If this year is still within the last merger episode
        if sale_year<=episode_start_year+4:
            continue
        
        # Check intensity of M&A activities in that year and three years following
        CBSA_affected_episode = CBSA_affected_part[(CBSA_affected_part['sale_year']>=sale_year)&(CBSA_affected_part['sale_year']<=sale_year+3)]
        # When a firm acquires multiple firms, market share of other firms are in "other_targets_market_share_N_avg", so just keeping one record
        # is sufficient
        CBSA_affected_episode = CBSA_affected_episode.drop_duplicates(['acquiror','sale_year'])
        # Alternative aggregation methods might be more reasonable. Also, this does not account for that target tends to be smaller so threshold
        # for them should be smaller too. Even better, can compute the implied-HHI change (based on historical data) of this merger, and put threshold
        # on that, which is definitely more powerful.
        acquiror_market_share_N_avg = np.sum(CBSA_affected_episode['acquiror_market_share_N_avg'])
        target_market_share_N_avg = np.sum(CBSA_affected_episode['target_market_share_N_avg'])
        other_targets_market_share_N_avg = np.sum(CBSA_affected_episode['other_targets_market_share_N_avg'])

        # Out of all mergers in this episode, calculate
        # (1) the max of sum of market shares of merging entities
        max_sum_share = \
            np.max(CBSA_affected_episode['acquiror_market_share_N_avg']+CBSA_affected_episode['target_market_share_N_avg']
                +CBSA_affected_episode['other_targets_market_share_N_avg'])
        # (2) the max of min of market shares of merging entities
        max_min_share = \
            np.max(np.minimum(CBSA_affected_episode['acquiror_market_share_N_avg'],
                CBSA_affected_episode['target_market_share_N_avg']+CBSA_affected_episode['other_targets_market_share_N_avg']))
        # (3) the mean of sum of market shares of merging entities
        mean_sum_share = \
            np.mean(CBSA_affected_episode['acquiror_market_share_N_avg']+CBSA_affected_episode['target_market_share_N_avg']
                +CBSA_affected_episode['other_targets_market_share_N_avg'])

        # Check if market share in the episode is high enough
        if acquiror_market_share_N_avg>0.05 and target_market_share_N_avg+other_targets_market_share_N_avg>0.05:
            # An episode is identified
            CBSA_episodes_marketshare_N = CBSA_episodes_marketshare_N+[{
                'episode_start_year':sale_year,
                'CBSA Code':CBSA,
                'mergers':CBSA_affected_episode,
                'acquiror_market_share_N_avg':acquiror_market_share_N_avg,
                'target_market_share_N_avg':target_market_share_N_avg,
                'other_targets_market_share_N_avg':other_targets_market_share_N_avg,
                'max_sum_share':max_sum_share,
                'max_min_share':max_min_share,
                'mean_sum_share':mean_sum_share,
                }]
            episode_start_year = sale_year

CBSA_episodes_marketshare_N = pd.DataFrame(CBSA_episodes_marketshare_N)


### 2.2.2 Method 2: By implied rise in HHI due to merger

In [6]:
#-----------------------------#
# Change in HHI by N of deals #
#-----------------------------#

# Identify episodes of mergers at the CBSA level

# Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
# identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
# the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
# as in cases where two firms merge into a new one, both will get recorded in "CBSA_affected"

CBSA_episodes_impliedHHI_N = []

for CBSA in list(CBSA_affected['CBSA Code'].unique()):

    CBSA_affected_part = CBSA_affected[CBSA_affected['CBSA Code']==CBSA]
    CBSA_affected_part = CBSA_affected_part[
        (CBSA_affected_part['acquiror_market_share_N_avg']>0)&
        ((CBSA_affected_part['target_market_share_N_avg']>0)|
        (CBSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
    
    episode_start_year = 1900
    for sale_year in CBSA_affected_part['sale_year'].unique():
    
        # If this year is still within the last merger episode
        if sale_year<=episode_start_year+4:
            continue
        
        # Check intensity of M&A activities in that year and three years following
        CBSA_affected_episode = CBSA_affected_part[(CBSA_affected_part['sale_year']>=sale_year)&(CBSA_affected_part['sale_year']<=sale_year+3)]
        GPF_oneCBSA_priorMA = GPF[(GPF['sale_year']>=sale_year-3)&(GPF['sale_year']<=sale_year)&(GPF['CBSA Code']==CBSA)]
        
        # Calculate (1) HHI (by parent firm) in the three years prior (2) Predicted HHI after the mergers complete
        
        # Underwriters in the market
        name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneCBSA_priorMA[parent_name_colnames]))))
        name_GPFs = [item for item in name_GPFs if item!=None]
        name_GPFs = [item for item in name_GPFs if str(item)!='nan']
        name_GPFs = list(set(name_GPFs))
        n_deals = {}
        for item in name_GPFs:
            n_deals[item] = 0
        
        # Record market shares before merger episode
        parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
        for idx,row in GPF_oneCBSA_priorMA.iterrows():
            underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
            n_underwriters = len(underwriters_onedeal)
            for item in underwriters_onedeal:
                n_deals[item] = n_deals[item]+1/n_underwriters
        n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
        n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
        n_deals_prior = n_deals
        
        # HHI prior to merger
        hhi_piror = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)

        # Implied HHI post merger
        CBSA_affected_episode = CBSA_affected_episode.reset_index(drop=True)
        for idx,row in CBSA_affected_episode.iterrows():
            n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror_parent']
        n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
        hhi_predicted = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
        n_deals_post = n_deals

        hhi_dif = hhi_predicted-hhi_piror

        # Check if market share in the episode is high enough
        if hhi_dif>0.01:
            # An episode is identified
            CBSA_episodes_impliedHHI_N = CBSA_episodes_impliedHHI_N+[{
                'episode_start_year':sale_year,
                'CBSA Code':CBSA,
                'mergers':CBSA_affected_episode,
                'hhi_dif':hhi_dif,
                'n_deals_prior':n_deals_prior,
                'n_deals_post':n_deals_post,
                }]
            episode_start_year = sale_year

CBSA_episodes_impliedHHI_N = pd.DataFrame(CBSA_episodes_impliedHHI_N)


### 2.2.3 Method 3: By implied rise in top 5 share due to merger

In [7]:
#-------------------------------------#
# Change in top 5 share by N of deals #
#-------------------------------------#

# Identify episodes of mergers at the CBSA level

# Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
# identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
# the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
# as in cases where two firms merge into a new one, both will get recorded in "CBSA_affected"

CBSA_episodes_top5share_N = []

for CBSA in list(CBSA_affected['CBSA Code'].unique()):

    CBSA_affected_part = CBSA_affected[CBSA_affected['CBSA Code']==CBSA]
    CBSA_affected_part = CBSA_affected_part[
        (CBSA_affected_part['acquiror_market_share_N_avg']>0)&
        ((CBSA_affected_part['target_market_share_N_avg']>0)|
        (CBSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
    
    episode_start_year = 1900
    for sale_year in CBSA_affected_part['sale_year'].unique():
    
        # If this year is still within the last merger episode
        if sale_year<=episode_start_year+4:
            continue
        
        # Check intensity of M&A activities in that year and three years following
        CBSA_affected_episode = CBSA_affected_part[(CBSA_affected_part['sale_year']>=sale_year)&(CBSA_affected_part['sale_year']<=sale_year+3)]
        GPF_oneCBSA_priorMA = GPF[(GPF['sale_year']>=sale_year-3)&(GPF['sale_year']<=sale_year)&(GPF['CBSA Code']==CBSA)]
        
        # Calculate (1) Top 5 share (by parent firm) in the three years prior (2) Predicted top 5 share after the mergers complete
        
        # Underwriters in the market
        name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneCBSA_priorMA[parent_name_colnames]))))
        name_GPFs = [item for item in name_GPFs if item!=None]
        name_GPFs = [item for item in name_GPFs if str(item)!='nan']
        name_GPFs = list(set(name_GPFs))
        n_deals = {}
        for item in name_GPFs:
            n_deals[item] = 0
        
        # Record market shares before merger episode
        parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
        for idx,row in GPF_oneCBSA_priorMA.iterrows():
            underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
            n_underwriters = len(underwriters_onedeal)
            for item in underwriters_onedeal:
                n_deals[item] = n_deals[item]+1/n_underwriters
        n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
        n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
        n_deals_prior = n_deals
        
        # Top 5 share prior to merger
        n_deals['marketshare'] = n_deals['n_deals']/np.sum(n_deals['n_deals'])
        n_deals = n_deals.sort_values(by=['n_deals'],ascending=False).reset_index(drop=True)
        if len(n_deals)<=5:
            top5share_prior = 1
        else:
            top5share_prior = np.sum(n_deals['marketshare'][:5])

        # Implied top 5 share post merger
        CBSA_affected_episode = CBSA_affected_episode.reset_index(drop=True)
        for idx,row in CBSA_affected_episode.iterrows():
            n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror_parent']
        n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
        n_deals['marketshare'] = n_deals['n_deals']/np.sum(n_deals['n_deals'])
        n_deals = n_deals.sort_values(by=['n_deals'],ascending=False).reset_index(drop=True)
        if len(n_deals)<=5:
            top5share_post = 1
        else:
            top5share_post = np.sum(n_deals['marketshare'][:5])
        n_deals_post = n_deals

        top5share_dif = top5share_post-top5share_prior

        # Market shares used in summary statistics
        CBSA_affected_episode['min_share'] = np.minimum(CBSA_affected_episode['acquiror_market_share_N_avg'],
            CBSA_affected_episode['target_market_share_N_avg']+\
            CBSA_affected_episode['other_targets_market_share_N_avg'])
        CBSA_affected_episode = CBSA_affected_episode.sort_values('min_share')
        CBSA_affected_episode_topshare = CBSA_affected_episode[-1:]
        acquiror_market_share_N_max = np.max(CBSA_affected_episode_topshare['acquiror_market_share_N_avg'])
        target_market_share_N_max = np.max(CBSA_affected_episode_topshare['target_market_share_N_avg'])
        other_targets_market_share_N_max = np.max(CBSA_affected_episode_topshare['other_targets_market_share_N_avg'])

        # Check if market share in the episode is high enough
        if top5share_dif>0.05:
            # An episode is identified
            CBSA_episodes_top5share_N = CBSA_episodes_top5share_N+[{
                'episode_start_year':sale_year,
                'CBSA Code':CBSA,
                'mergers':CBSA_affected_episode,
                'top5share_dif':top5share_dif,
                'n_deals_prior':n_deals_prior,
                'n_deals_post':n_deals_post,
                'acquiror_market_share_N_max':acquiror_market_share_N_max,
                'target_market_share_N_max':target_market_share_N_max,
                'other_targets_market_share_N_max':other_targets_market_share_N_max,
                }]
            episode_start_year = sale_year

CBSA_episodes_top5share_N = pd.DataFrame(CBSA_episodes_top5share_N)


# 3. Placebo Tests, Using CSA

## 3.1 Withdrawn M&A

In [8]:
MA_withdrawn = pd.read_csv("../CleanData/SDC/0I_MA_withdrawn.csv")
MA_withdrawn = MA_withdrawn.rename(columns={'announce_year':'sale_year'})[['target','acquiror','sale_year']]

### 3.2.1 Find CSA X Year that could be affected by withdrawn merger

In [9]:
# Find CBSA X Year that could be affected by withdrawn merger

# Go over each merger. Check the CBSAs affected by the merger (i.e., either side has business in the CBSA in the year prior 
# to the merger). Check if the merger affects just one underwriter or affects multiple underwriters in this CBSA.

# Note that for the column "market share of other targets", the optimal object to put there is the market share of the other target
# alone. Here I am instead putting in market share of the other target's parent. This should make a minimal difference.

name_GPF_colnames = ['name_GPF_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]

def proc_list(MA_withdrawn_frag):

    CBSA_affected_withdrawn = []
    MA_withdrawn_frag = MA_withdrawn_frag.reset_index(drop=True)
    
    for idx,row in MA_withdrawn_frag.iterrows():
        
        # Find CBSAs that this merger affects
        # Determine if an underwriter is active in an CBSA based on activity of PRIOR years
        GPF_prioryears = GPF[(GPF['sale_year']>=row['sale_year']-3)&(GPF['sale_year']<=row['sale_year']-1)]
    
        # Also check other targets of the acquiror in that year. This accounts for cases where post merger the new formed entity
        # is new and appear as a name that was not in the sample before. Note that here "MA_frag" cannot be used or the other firm
        # involved in the merger will be missed. Instead, use the whole sample "MA"
        other_targets = \
            list(MA_withdrawn[(MA_withdrawn['acquiror']==row['acquiror'])&
            (MA_withdrawn['sale_year']==row['sale_year'])&
            (MA_withdrawn['target']!=row['target'])]['target'])
        
        for CBSA in list(GPF_prioryears['CBSA Code'].unique()):
    
            GPF_prioryears_oneCBSA = GPF_prioryears[GPF_prioryears['CBSA Code']==CBSA]
    
            # Underwriters in this state
            underwriters_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCBSA[name_GPF_colnames]))))
            underwriters_priorMA = [item for item in underwriters_priorMA if item!=None]
            underwriters_priorMA = list(set(underwriters_priorMA))
            # Parents of underwriters in this state
            parents_priorMA = list(chain.from_iterable(list(np.array(GPF_prioryears_oneCBSA[parent_name_colnames]))))
            parents_priorMA = [item for item in parents_priorMA if item!=None]
            parents_priorMA = list(set(parents_priorMA))
            # Subsidiaries of parents in this state (using data of PRIOR year)
            subsidiaries_priorMA = list(GPF_names[
                (GPF_names['parent_name'].isin(parents_priorMA))&
                (GPF_names['sale_year']>=row['sale_year']-3)&
                (GPF_names['sale_year']<=row['sale_year']-1)]['name_GPF'])
    
            # Determine if merger affects the CBSA, and if both sides have business
            IF_acquiror_active = None
            IF_target_active = None
            IF_other_target_active = None
            if (row['acquiror'] in parents_priorMA) or (row['acquiror'] in underwriters_priorMA) or (row['acquiror'] in subsidiaries_priorMA):
                IF_acquiror_active = True
            if (row['target'] in parents_priorMA) or (row['target'] in underwriters_priorMA) or (row['target'] in subsidiaries_priorMA):
                IF_target_active = True
            for other_target in other_targets:
                if (other_target in parents_priorMA) or (other_target in underwriters_priorMA):
                    IF_other_target_active = True
    
            # Get market share of merged banks. Note that this is the market share in the years prior to M&A. Also note that market 
            # share "market_share_all_markets_byCBSA" is calculated at the parent level. There are many cases where market share of a
            # firm in an area is unavailable, which is because of no presence.
    
    
    
            #-------------------------#
            # Market share by N deals #
            #-------------------------#
    
            # (1) Market share of acquiror
            # Determine parent of target, as "market_share_all_markets_byCBSA" is at parent level
            try:
                # Situation where acquiror is a subsidiary or standalone firm whose parent is itself. Extract its parent
                acquiror_parent = GPF_names[(GPF_names['name_GPF']==row['acquiror'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                # Situation where acquiror is a parent
                acquiror_parent = row['acquiror']
            try:
                acquiror_market_share_N_m1 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m1 = 0
            try:
                acquiror_market_share_N_m2 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m2 = 0
            try:
                acquiror_market_share_N_m3 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==acquiror_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                acquiror_market_share_N_m3 = 0
    
            # (2) Market share of target
            try:
                # Note that I must use "GPF_names" (the parent-subsidiary) mapping use the year(s) prior to the MA
                target_parent = GPF_names[(GPF_names['name_GPF']==row['target'])&(GPF_names['sale_year']==row['sale_year']-1)]\
                    .reset_index()['parent_name'][0]
            except:
                target_parent = row['target']
            try:
                target_market_share_N_m1 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-1)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m1 = 0
            try:
                target_market_share_N_m2 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-2)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m2 = 0
            try:
                target_market_share_N_m3 = \
                    market_share_all_markets_byCBSA[
                    (market_share_all_markets_byCBSA['parent_name']==target_parent)
                    &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                    &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-3)]\
                    .reset_index()['market_share_N'][0]
            except:
                target_market_share_N_m3 = 0
    
            # (3) Market share of other targets in the same transaction
            # Account for possibility that other targets can be either a parent or a standalone firm
            other_targets_parents = \
                list(GPF_names[(GPF_names['name_GPF'].isin(other_targets))
                &(GPF_names['sale_year']==row['sale_year']-1)]['parent_name'])+\
                list(other_targets)
            other_targets_parents = list(set(other_targets_parents))
    
            other_targets_market_share_N = \
                market_share_all_markets_byCBSA[
                (market_share_all_markets_byCBSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-1)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m1 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m1 = 0
    
            other_targets_market_share_N = \
                market_share_all_markets_byCBSA[
                (market_share_all_markets_byCBSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-2)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m2 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m2 = 0
    
            other_targets_market_share_N = \
                market_share_all_markets_byCBSA[
                (market_share_all_markets_byCBSA['parent_name'].isin(other_targets_parents))
                &(market_share_all_markets_byCBSA['CBSA Code']==CBSA)
                &(market_share_all_markets_byCBSA['calendar_year']==row['sale_year']-3)]
            if len(other_targets_market_share_N)>0:
                other_targets_market_share_N_m3 = np.sum(other_targets_market_share_N['market_share_N'])
            else:
                other_targets_market_share_N_m3 = 0
    
    
            # Record data
            if IF_acquiror_active or IF_target_active or IF_other_target_active:
                CBSA_affected_withdrawn = CBSA_affected_withdrawn+[{
                    'CBSA Code':CBSA,
                    'sale_year':row['sale_year'],
                    'acquiror':row['acquiror'],
                    'target':row['target'],
                    'other_targets':other_targets,
                    'acquiror_parent':acquiror_parent,
                    'target_parent':target_parent,
                    'acquiror_market_share_N_m1':acquiror_market_share_N_m1,
                    'acquiror_market_share_N_m2':acquiror_market_share_N_m2,
                    'acquiror_market_share_N_m3':acquiror_market_share_N_m3,
                    'target_market_share_N_m1':target_market_share_N_m1,
                    'target_market_share_N_m2':target_market_share_N_m2,
                    'target_market_share_N_m3':target_market_share_N_m3,
                    'other_targets_market_share_N_m1':other_targets_market_share_N_m1,
                    'other_targets_market_share_N_m2':other_targets_market_share_N_m2,
                    'other_targets_market_share_N_m3':other_targets_market_share_N_m3,
                }]
            acquiror_market_share_N_m1 = None
            acquiror_market_share_N_m2 = None
            acquiror_market_share_N_m3 = None
            target_market_share_N_m1 = None
            target_market_share_N_m2 = None
            target_market_share_N_m3 = None
            other_targets_market_share = None
            other_targets_market_share_N_m1 = None
            other_targets_market_share_N_m2 = None
            other_targets_market_share_N_m3 = None
    
    CBSA_affected_withdrawn = pd.DataFrame(CBSA_affected_withdrawn)
        
    return CBSA_affected_withdrawn

MA_withdrawn_dd = dd.from_pandas(MA_withdrawn, npartitions=40)
with dask.config.set(scheduler='processes',num_workers=40):
    CBSA_affected_withdrawn = MA_withdrawn_dd.map_partitions(proc_list, 
    meta=pd.DataFrame(columns=
    ['CBSA Code','sale_year','acquiror','target',
    'other_targets','acquiror_parent','target_parent',
    'acquiror_market_share_N_m1','acquiror_market_share_N_m2','acquiror_market_share_N_m3',
    'target_market_share_N_m1','target_market_share_N_m2','target_market_share_N_m3',
    'other_targets_market_share_N_m1','other_targets_market_share_N_m2','other_targets_market_share_N_m3',
    ])).compute()

# Average market share over past three years
CBSA_affected_withdrawn['acquiror_market_share_N_avg'] = \
    (CBSA_affected_withdrawn['acquiror_market_share_N_m1']+\
    CBSA_affected_withdrawn['acquiror_market_share_N_m2']+\
    CBSA_affected_withdrawn['acquiror_market_share_N_m3'])/3
CBSA_affected_withdrawn['target_market_share_N_avg'] = \
    (CBSA_affected_withdrawn['target_market_share_N_m1']+\
    CBSA_affected_withdrawn['target_market_share_N_m2']+\
    CBSA_affected_withdrawn['target_market_share_N_m3'])/3
CBSA_affected_withdrawn['other_targets_market_share_N_avg'] = \
    (CBSA_affected_withdrawn['other_targets_market_share_N_m1']+\
    CBSA_affected_withdrawn['other_targets_market_share_N_m2']+\
    CBSA_affected_withdrawn['other_targets_market_share_N_m3'])/3


### 3.1.2 Construct events

In [14]:
#----------------------------------------------------#
# Withdrawn episodes with Delta HHI of each treshold #
#----------------------------------------------------#

for threshold in [0.01,0.005,0.003,0.002,0.001]:

    # Identify episodes of mergers at the CBSA level
    
    # Go over each year with merger event, and check the M&As on this year and three years afterwards. If enough consolidation, an episode is 
    # identified. Whether there is enough consolidation can be judged by average market share in the past three years, or market share just in 
    # the year minus one. For each identified merger episode, check if there is reasonable control in the sample. Note that there can be duplicates
    # as in cases where two firms merge into a new one, both will get recorded in "CBSA_affected"
    
    CBSA_episodes_Withdrawn = []
    
    for CBSA in list(CBSA_affected_withdrawn['CBSA Code'].unique()):
    
        CBSA_affected_part = CBSA_affected_withdrawn[CBSA_affected_withdrawn['CBSA Code']==CBSA]
        CBSA_affected_part = CBSA_affected_part[
            (CBSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CBSA_affected_part['target_market_share_N_avg']>0)|
            (CBSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        
        episode_start_year = 1900
        for sale_year in CBSA_affected_part['sale_year'].unique():
        
            # If this year is still within the last merger episode
            if sale_year<=episode_start_year+4:
                continue
            
            # Check intensity of M&A activities in that year and three years following
            CBSA_affected_episode = CBSA_affected_part[(CBSA_affected_part['sale_year']>=sale_year)&(CBSA_affected_part['sale_year']<=sale_year+3)]
            GPF_oneCBSA_priorMA = GPF[(GPF['sale_year']>=sale_year-3)&(GPF['sale_year']<=sale_year)&(GPF['CBSA Code']==CBSA)]
            
            # Calculate (1) HHI (by parent firm) in the three years prior (2) Predicted HHI after the mergers complete
            
            # Underwriters in the market
            name_GPFs = list(chain.from_iterable(list(np.array(GPF_oneCBSA_priorMA[parent_name_colnames]))))
            name_GPFs = [item for item in name_GPFs if item!=None]
            name_GPFs = [item for item in name_GPFs if str(item)!='nan']
            name_GPFs = list(set(name_GPFs))
            n_deals = {}
            for item in name_GPFs:
                n_deals[item] = 0
            
            # Record market shares before merger episode
            parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
            for idx,row in GPF_oneCBSA_priorMA.iterrows():
                underwriters_onedeal = [row[item] for item in parent_name_colnames if row[item]!=None and str(row[item])!='nan']
                n_underwriters = len(underwriters_onedeal)
                for item in underwriters_onedeal:
                    n_deals[item] = n_deals[item]+1/n_underwriters
            n_deals = pd.DataFrame.from_dict(n_deals,orient='index').reset_index()
            n_deals = n_deals.rename(columns={'index':'underwriter',0:'n_deals'})
            n_deals_prior = n_deals
            
            # HHI prior to merger
            hhi_piror = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
    
            # Implied HHI post merger
            CBSA_affected_episode = CBSA_affected_episode.reset_index(drop=True)
            for idx,row in CBSA_affected_episode.iterrows():
                n_deals.loc[n_deals['underwriter']==row['target'],'underwriter'] = row['acquiror_parent']
            n_deals = n_deals.groupby('underwriter').agg({'n_deals':sum}).reset_index()
            hhi_predicted = np.sum((n_deals['n_deals']/np.sum(n_deals['n_deals']))**2)
            n_deals_post = n_deals
    
            hhi_dif = hhi_predicted-hhi_piror
    
            # Check if market share in the episode is high enough
            if hhi_dif>threshold:
                # An episode is identified
                CBSA_episodes_Withdrawn = CBSA_episodes_Withdrawn+[{
                    'episode_start_year':sale_year,
                    'CBSA Code':CBSA,
                    'mergers':CBSA_affected_episode,
                    'hhi_dif':hhi_dif,
                    'n_deals_prior':n_deals_prior,
                    'n_deals_post':n_deals_post,
                    'acquiror_market_share_N_max':acquiror_market_share_N_max,
                    'target_market_share_N_max':target_market_share_N_max,
                    'other_targets_market_share_N_max':other_targets_market_share_N_max,
                    }]
                episode_start_year = sale_year
    
    CBSA_episodes_Withdrawn = pd.DataFrame(CBSA_episodes_Withdrawn)
    
    
    #----------------------------------------------#
    # Check and rule out if affected by actual M&A #
    #----------------------------------------------#
    
    # Check if there is any M&A in between [-4,+4] that has both sides market share above 0.01
    CBSA_episodes_Withdrawn['if_also_withinMA'] = False
    for sub_idx,sub_row in CBSA_episodes_Withdrawn.iterrows():
    
        # M&As in a candidate placebo CBSA in [-4,+4]
        CBSA_affected_part = CBSA_affected[CBSA_affected['CBSA Code']==sub_row['CBSA Code']]
        CBSA_affected_part = CBSA_affected_part[
            (CBSA_affected_part['acquiror_market_share_N_avg']>0)&
            ((CBSA_affected_part['target_market_share_N_avg']>0)|
            (CBSA_affected_part['other_targets_market_share_N_avg']>0))].sort_values('sale_year')
        # Note that market share is on a rolling basis of every three years, so I only need to start from year -1
        CBSA_affected_part = CBSA_affected_part[
            (CBSA_affected_part['sale_year']>=sub_row['episode_start_year']-1)&
            (CBSA_affected_part['sale_year']<=sub_row['episode_start_year']+5)
            ]
        CBSA_affected_episode = CBSA_affected_part.copy()
        CBSA_affected_episode = CBSA_affected_episode[(CBSA_affected_episode['acquiror_market_share_N_avg']>0.01)&
            (CBSA_affected_episode['target_market_share_N_avg']+CBSA_affected_episode['other_targets_market_share_N_avg']>0.01)]
        if len(CBSA_affected_episode)>0:
            CBSA_episodes_Withdrawn.at[sub_idx,'if_also_withinMA'] = True
    
    CBSA_episodes_Withdrawn = CBSA_episodes_Withdrawn[~CBSA_episodes_Withdrawn['if_also_withinMA']]

    if threshold==0.01:
        CBSA_episodes_Withdrawn_DeltaHHI100 = CBSA_episodes_Withdrawn
    if threshold==0.005:
        CBSA_episodes_Withdrawn_DeltaHHI50 = CBSA_episodes_Withdrawn
    if threshold==0.003:
        CBSA_episodes_Withdrawn_DeltaHHI30 = CBSA_episodes_Withdrawn
    if threshold==0.002:
        CBSA_episodes_Withdrawn_DeltaHHI20 = CBSA_episodes_Withdrawn
    if threshold==0.001:
        CBSA_episodes_Withdrawn_DeltaHHI10 = CBSA_episodes_Withdrawn


# 4. Assemble a Treatment-Control Matched Sample

In [18]:
episodes_files = [
    ["By Market Share in terms of N deals",CBSA_episodes_marketshare_N,1,
        '../CleanData/MAEvent/CBSA_episodes_marketshareByN.csv',
        '../CleanData/MAEvent/CBSA_episodes_marketshareByN_bondlevel.csv',
        '../CleanData/MAEvent/CBSA_episodes_marketshareByN_Quant.csv',
        '../CleanData/MAEvent/CBSA_episodes_marketshareByN_Quant_GeneralUse.csv',
        '../CleanData/MAEvent/CBSA_episodes_marketshareByN_Quant_IssuerType.csv',
        '../CleanData/MAEvent/CBSA_episodes_marketshareByN_GovFin.csv',
    ],
    ["By Implied HHI Increase in terms of N deals, >= 0.01",CBSA_episodes_impliedHHI_N,1,
        '../CleanData/MAEvent/CBSA_episodes_impliedHHIByN.csv',
        '../CleanData/MAEvent/CBSA_episodes_impliedHHIByN_bondlevel.csv',
        '../CleanData/MAEvent/CBSA_episodes_impliedHHIByN_Quant.csv',
        '../CleanData/MAEvent/CBSA_episodes_impliedHHIByN_Quant_GeneralUse.csv',
        '../CleanData/MAEvent/CBSA_episodes_impliedHHIByN_Quant_IssuerType.csv',
        '../CleanData/MAEvent/CBSA_episodes_impliedHHIByN_GovFin.csv',
    ],
    ["By Implied Top 5 Share Increase in terms of N deals, >= 0.01",CBSA_episodes_top5share_N,1,
        '../CleanData/MAEvent/CBSA_episodes_top5shareByN.csv',
        '../CleanData/MAEvent/CBSA_episodes_top5shareByN_bondlevel.csv',
        '../CleanData/MAEvent/CBSA_episodes_top5shareByN_Quant.csv',
        '../CleanData/MAEvent/CBSA_episodes_top5shareByN_Quant_GeneralUse.csv',
        '../CleanData/MAEvent/CBSA_episodes_top5shareByN_Quant_IssuerType.csv',
        '../CleanData/MAEvent/CBSA_episodes_top5shareByN_GovFin.csv',
    ],
    ["Withdrawn M&A, >= 0.01",CBSA_episodes_Withdrawn_DeltaHHI100,1,
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI100.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI100_bondlevel.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI100_Quant.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI100_Quant_GeneralUse.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI100_Quant_IssuerType.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI100_GovFin.csv',
    ],
    ["Withdrawn M&A, >= 0.005",CBSA_episodes_Withdrawn_DeltaHHI50,1,
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI50.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI50_bondlevel.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI50_Quant.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI50_Quant_GeneralUse.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI50_Quant_IssuerType.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI50_GovFin.csv',
    ],
    ["Withdrawn M&A, >= 0.003",CBSA_episodes_Withdrawn_DeltaHHI30,1,
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI30.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI30_bondlevel.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI30_Quant.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI30_Quant_GeneralUse.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI30_Quant_IssuerType.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI30_GovFin.csv',
    ],
    ["Withdrawn M&A, >= 0.002",CBSA_episodes_Withdrawn_DeltaHHI20,1,
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI20.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI20_bondlevel.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI20_Quant.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI20_Quant_GeneralUse.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI20_Quant_IssuerType.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI20_GovFin.csv',
    ],
    ["Withdrawn M&A, >= 0.001",CBSA_episodes_Withdrawn_DeltaHHI10,1,
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI10.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI10_bondlevel.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI10_Quant.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI10_Quant_GeneralUse.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI10_Quant_IssuerType.csv',
        '../CleanData/MAEvent/CBSA_Withdrawn_DeltaHHI10_GovFin.csv',
    ],
    ]

for episodes_file in episodes_files:

    criteria = episodes_file[0]
    episodes = episodes_file[1]
    N_matches = episodes_file[2]
    file_path = episodes_file[3]
    file_path_bondlevel = episodes_file[4]
    file_path_Quant = episodes_file[5]
    file_path_Quant_GeneralUse = episodes_file[6]
    file_path_Quant_IssuerType = episodes_file[7]
    file_path_GovFin = episodes_file[8]

    episodes = episodes.copy()
    
    ########################################
    # Find control for each merger episode #
    ########################################
    
    # State demographics to be used in merger
    CBSA_POP = pd.read_csv("../CleanData/Demographics/0C_CBSA_Pop.csv")
    CBSA_INC = pd.read_csv("../CleanData/Demographics/0C_CBSA_Inc.csv")
    CBSA_Data = CBSA_POP.merge(CBSA_INC,on=['CBSA Code','year'])
    CBSA_Data = CBSA_Data[['CBSA Code','year','inc','pop']]
    Same_State_CBSA_pairs = pd.read_csv("../CleanData/Demographics/0C_Same_State_CBSA_pairs.csv")
    
    def calculate_distance(row,weightingmat):
        return sp.spatial.distance.mahalanobis((row['inc'],row['pop']),\
            (row['treated_inc'],row['treated_pop']),weightingmat)
    
    episodes['control'] = None
    for idx,row in episodes.iterrows():
    
        # Find population of this CBSA
        CBSA_Data_oneyear = CBSA_Data[CBSA_Data['year']==row['episode_start_year']].copy()
    
        # Demographic data of the treated CBSA
        CBSA_Data_oneyear_frag = CBSA_Data_oneyear[CBSA_Data_oneyear['CBSA Code']==row['CBSA Code']].copy()
        if len(CBSA_Data_oneyear_frag)==0:
            continue
        episode_pop = CBSA_Data_oneyear_frag.reset_index()['pop'][0]
        episode_inc = CBSA_Data_oneyear_frag.reset_index()['inc'][0]
        
        # Find a match
        CBSA_Data_oneyear['treated_pop'] = episode_pop
        CBSA_Data_oneyear['treated_inc'] = episode_inc
        # Get weighting matrix
        CBSA_Data_oneyear['inc'] = winsor2(CBSA_Data_oneyear['inc'],cutoffs=[0.05,0.05])
        CBSA_Data_oneyear['pop'] = winsor2(CBSA_Data_oneyear['pop'],cutoffs=[0.05,0.05])
        cov = CBSA_Data_oneyear[['inc','pop']].cov()
        invcov = np.linalg.inv(cov)
        CBSA_Data_oneyear['dist'] = CBSA_Data_oneyear.apply(calculate_distance, axis=1,weightingmat=invcov)
        CBSA_Data_oneyear = CBSA_Data_oneyear.sort_values('dist').reset_index(drop=True)
        # Remove oneself from potential matches
        CBSA_Data_oneyear = CBSA_Data_oneyear[CBSA_Data_oneyear['CBSA Code']!=row['CBSA Code']]
        # Remove other CBSAs in the same state from potential matches
        Same_State_CBSAs = list(Same_State_CBSA_pairs[Same_State_CBSA_pairs['CBSA_1']==row['CBSA Code']]['CBSA_2'])
        CBSA_Data_oneyear = CBSA_Data_oneyear[~CBSA_Data_oneyear['CBSA Code'].isin(Same_State_CBSAs)]
    
        match_counter = 0
        control = []
        for subidx,subrow in CBSA_Data_oneyear.iterrows():
            # Years for which potential control is treated itself
            CBSA_affected_frag = CBSA_affected[CBSA_affected['CBSA Code']==subrow['CBSA Code']]
            CBSA_affected_frag = CBSA_affected_frag[(CBSA_affected_frag['acquiror_market_share_N_avg']>0.01)&
                (CBSA_affected_frag['target_market_share_N_avg']+CBSA_affected_frag['other_targets_market_share_N_avg']>0.01)]
            CBSA_affected_frag_affected_years = list(CBSA_affected_frag['sale_year'].unique())
            # 
            if len(set(list(range(row['episode_start_year']-4,row['episode_start_year']+5))).\
                intersection(set(CBSA_affected_frag_affected_years)))>0:
                # This potential control is treated
                continue
            else:
                # This potential control is not treated => Good control
                control = control+[subrow['CBSA Code']]
                match_counter = match_counter+1
                if match_counter==N_matches:
                    break
    
        episodes.at[idx,'control'] = control
    
    # Exclude cases where a match cannot be found
    print('A control cannot be found for '+str(np.sum(pd.isnull(episodes['control'])))+' episodes.')
    episodes = episodes[~pd.isnull(episodes['control'])]

    
    #############################################
    # Expand to include an event time dimension #
    #############################################
    
    episodes_Exploded = episodes
    episodes_Exploded['year_to_merger'] = [list(range(-4,11))]*len(episodes_Exploded)
    episodes_Exploded = episodes_Exploded.explode('year_to_merger')
    episodes_Exploded['calendar_year'] = episodes_Exploded['episode_start_year']+episodes_Exploded['year_to_merger']    

    
    ################################
    # Assemble a regression sample #
    ################################

    #------------------------#
    # Issue level, using GPF #
    #------------------------#

    reg_sample = []
    for idx,row in episodes_Exploded.iterrows():

        # Event characteristics - strength
        if 'acquiror_market_share_N_avg' in episodes_Exploded.columns:
            acquiror_market_share_avg = row['acquiror_market_share_N_avg']
        else:
            acquiror_market_share_avg = None

        if 'target_market_share_N_avg' in episodes_Exploded.columns:
            target_market_share_avg = row['target_market_share_N_avg']
        else:
            target_market_share_avg = None

        if 'other_targets_market_share_N_avg' in episodes_Exploded.columns:
            other_targets_market_share_avg = row['other_targets_market_share_N_avg']
        else:
            other_targets_market_share_avg = None

        if 'hhi_dif' in episodes_Exploded.columns:
            hhi_dif = row['hhi_dif']
        else:
            hhi_dif = None

        if 'max_sum_share' in episodes_Exploded.columns:
            max_sum_share = row['max_sum_share']
        else:
            max_sum_share = None

        if 'max_min_share' in episodes_Exploded.columns:
            max_min_share = row['max_min_share']
        else:
            max_min_share = None

        if 'mean_sum_share' in episodes_Exploded.columns:
            mean_sum_share = row['mean_sum_share']
        else:
            mean_sum_share = None
    
        # Treated observations
        GPF_Seg = GPF[(GPF['sale_year']==row['calendar_year'])&(GPF['CBSA Code']==row['CBSA Code'])].copy()
        GPF_Seg = GPF_Seg[[
            'CBSA Code','sale_year','State','County',
            'issuer_type','Issuer',
            'avg_maturity','amount',
            'avg_yield','treasury_avg_spread','MMA_avg_spread',
            'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
            'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
            'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
            'underpricing_15to60','underpricing_15to30',
            'Bid','taxable_code','security_type','if_advisor','if_dual_advisor','if_refunding',
            'amount_bracket','mat_bracket','use_short','has_ratings',
            'use_of_proceeds_BB','use_of_proceeds_main','use_of_proceeds_general',
            'has_Moodys','has_Fitch','rating_Moodys','rating_Fitch','insured_amount',
            'AdvisorFeeRatio_hat','CRFeeRatio_hat','InsureFeeRatio_hat',
            'AdvisorFeeRatio_hat_model_timeFE','CRFeeRatio_hat_model_timeFE','InsureFeeRatio_hat_model_timeFE',
            'if_callable','CB_Eligible',
            'num_relationship',
            ]+name_GPF_colnames+parent_name_GPF_colnames]

        #------------------------------------#
        # Some cross-sectional heterogeneity #
        #------------------------------------#

        # Note that I am check if bank is involved in any mergers in [-4,+4], instead of if bank is involved in mergers (the above
        # code block)
        mergers = CBSA_affected[
            (CBSA_affected['CBSA Code']==row['CBSA Code'])&
            (CBSA_affected['sale_year']>=row['episode_start_year']-4)&
            (CBSA_affected['sale_year']<=row['episode_start_year']+4)
            ][['acquiror','target','acquiror_parent','target_parent',
            'acquiror_market_share_N_avg','target_market_share_N_avg','other_targets_market_share_N_avg']]
        mergers = mergers[(mergers['acquiror_market_share_N_avg']>0)&(mergers['target_market_share_N_avg']+mergers['other_targets_market_share_N_avg']>0)]
        # Whether the underwriter is the target bank in M&A
        GPF_Seg['bank_is_target'] = False
        for column in name_GPF_colnames:
            GPF_Seg['bank_is_target'] = \
            (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
            |(GPF_Seg['bank_is_target'])
        for column in parent_name_GPF_colnames:
            GPF_Seg['bank_is_target'] = \
            (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
            |(GPF_Seg['bank_is_target'])
        # Whether the underwriter is the acquiror bank in M&A
        GPF_Seg['bank_is_acquiror'] = False
        for column in name_GPF_colnames:
            GPF_Seg['bank_is_acquiror'] = \
            (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
            |(GPF_Seg['bank_is_acquiror'])
        for column in parent_name_GPF_colnames:
            GPF_Seg['bank_is_acquiror'] = \
            (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
            |(GPF_Seg['bank_is_acquiror'])

        GPF_Seg['treated'] = 1
        GPF_Seg['episode_start_year'] = row['episode_start_year']
        GPF_Seg['year_to_merger'] = row['year_to_merger']
        GPF_Seg['calendar_year'] = row['calendar_year']
        GPF_Seg['treated_cbsa'] = row['CBSA Code'] # Used for constructing cohort X issuer FEs
        GPF_Seg['acquiror_market_share_avg'] = acquiror_market_share_avg
        GPF_Seg['target_market_share_avg'] = target_market_share_avg
        GPF_Seg['other_targets_market_share_avg'] = other_targets_market_share_avg
        GPF_Seg['hhi_dif'] = hhi_dif
        GPF_Seg['max_sum_share'] = max_sum_share
        GPF_Seg['max_min_share'] = max_min_share
        GPF_Seg['mean_sum_share'] = mean_sum_share
        GPF_Seg_Treated = GPF_Seg

        # Control observations
        if row['control']==None:
            continue
        GPF_Seg_Control = pd.DataFrame()
        for item in row['control']:
            GPF_Seg = GPF[(GPF['sale_year']==row['calendar_year'])&(GPF['CBSA Code']==item)]
            GPF_Seg = GPF_Seg[[
                'CBSA Code','sale_year','State','County',
                'issuer_type','Issuer',
                'avg_maturity','amount',
                'avg_yield','treasury_avg_spread','MMA_avg_spread',
                'gross_spread','gross_spread_tic_based','gross_spread_nic_based',
                'mod_tic','mod_tic_spread_treasury','mod_tic_spread_MMA',
                'mod_tic_timeFE','mod_tic_spread_treasury_timeFE','mod_tic_spread_MMA_timeFE',
                'underpricing_15to60','underpricing_15to30',
                'Bid','taxable_code','security_type','if_advisor','if_dual_advisor','if_refunding',
                'amount_bracket','mat_bracket','use_short','has_ratings',
                'use_of_proceeds_BB','use_of_proceeds_main','use_of_proceeds_general',
                'has_Moodys','has_Fitch','rating_Moodys','rating_Fitch','insured_amount',
                'AdvisorFeeRatio_hat','CRFeeRatio_hat','InsureFeeRatio_hat',
                'AdvisorFeeRatio_hat_model_timeFE','CRFeeRatio_hat_model_timeFE','InsureFeeRatio_hat_model_timeFE',
                'if_callable','CB_Eligible',
                'num_relationship',
                ]+name_GPF_colnames+parent_name_GPF_colnames]

            # Note that for control banks, "bank_is_target" and "bank_is_acquiror" use M&A in the specific areas
            mergers = CBSA_affected[
                (CBSA_affected['CBSA Code']==item)&
                (CBSA_affected['sale_year']>=row['episode_start_year']-4)&
                (CBSA_affected['sale_year']<=row['episode_start_year']+4)
                ][['acquiror','target','acquiror_parent','target_parent',
                'acquiror_market_share_N_avg','target_market_share_N_avg','other_targets_market_share_N_avg']]
            mergers = mergers[(mergers['acquiror_market_share_N_avg']>0)&(mergers['target_market_share_N_avg']+mergers['other_targets_market_share_N_avg']>0)]
            # Whether the underwriter is the target bank in M&A
            GPF_Seg['bank_is_target'] = False
            for column in name_GPF_colnames:
                GPF_Seg['bank_is_target'] = \
                (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
                |(GPF_Seg['bank_is_target'])
            for column in parent_name_GPF_colnames:
                GPF_Seg['bank_is_target'] = \
                (GPF_Seg[column].isin(list(mergers['target'])+list(mergers['target_parent']))) \
                |(GPF_Seg['bank_is_target'])
            # Whether the underwriter is the acquiror bank in M&A
            GPF_Seg['bank_is_acquiror'] = False
            for column in name_GPF_colnames:
                GPF_Seg['bank_is_acquiror'] = \
                (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
                |(GPF_Seg['bank_is_acquiror'])
            for column in parent_name_GPF_colnames:
                GPF_Seg['bank_is_acquiror'] = \
                (GPF_Seg[column].isin(list(mergers['acquiror'])+list(mergers['acquiror_parent'])))\
                |(GPF_Seg['bank_is_acquiror'])
            
            GPF_Seg['treated'] = 0
            GPF_Seg['episode_start_year'] = row['episode_start_year']
            GPF_Seg['year_to_merger'] = row['year_to_merger']
            GPF_Seg['calendar_year'] = row['calendar_year']
            GPF_Seg['treated_cbsa'] = row['CBSA Code'] # Used for constructing cohort X issuer FEs
            GPF_Seg['hhi_dif'] = hhi_dif
            GPF_Seg_Control = pd.concat([GPF_Seg_Control,GPF_Seg])
    
        if len(GPF_Seg_Treated)>0 and len(GPF_Seg_Control)>0:
            reg_sample = reg_sample+[GPF_Seg_Treated,GPF_Seg_Control]
    
    reg_sample = pd.concat(reg_sample)
    reg_sample = reg_sample.merge(HHI_byCBSA,on=['CBSA Code','calendar_year'])
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    reg_sample = reg_sample.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    reg_sample = reg_sample[reg_sample['_merge']!='right_only'].drop(columns=['_merge'])
    reg_sample.to_csv(file_path)

    # #-----------------------#
    # # Bond level, using GPF #
    # #-----------------------#

    # if 'mergers' in reg_sample.columns:
    #     reg_sample = reg_sample.drop(columns=['mergers'])
    # if 'n_deals_prior' in reg_sample.columns:
    #     reg_sample = reg_sample.drop(columns=['n_deals_prior'])
    # if 'n_deals_post' in reg_sample.columns:
    #     reg_sample = reg_sample.drop(columns=['n_deals_post'])
    
    # def proc_list(reg_sample):
    #     reg_sample_bond_level = []
    #     for idx,row in reg_sample.iterrows():
    #         row_dict = reg_sample.loc[idx].to_dict()
    #         if str(row['yield_by_maturity_list'])!='nan':
    #             yield_by_maturity_list = eval(row['yield_by_maturity_list'])
    #             if str(row['spread_by_maturity_list'])!='nan':
    #                 spread_by_maturity_list = eval(row['spread_by_maturity_list'])
    #             else:
    #                 spread_by_maturity_list = [None for item in yield_by_maturity_list]
    #             maturity_by_maturity_list = eval(row['maturity_by_maturity_list'])
    #             amount_by_maturity_list = eval(row['amount_by_maturity_list'])
    #             for bond_idx in range(0,len(yield_by_maturity)):
    #                 row_dict['yield_one_bond'] = yield_by_maturity_list[bond_idx]
    #                 row_dict['spread_one_bond'] = spread_by_maturity_list[bond_idx]
    #                 row_dict['maturity_one_bond'] = maturity_by_maturity_list[bond_idx]
    #                 row_dict['amount_one_bond'] = amount_by_maturity_list[bond_idx]
    #                 reg_sample_bond_level = reg_sample_bond_level+[row_dict]
    #     reg_sample_bond_level = pd.DataFrame(reg_sample_bond_level)
    #     return reg_sample_bond_level

    # meta_columns = list(proc_list(reg_sample.sample(10)).columns)
    # reg_sample_dd = dd.from_pandas(reg_sample, npartitions=20)
    # with dask.config.set(scheduler='processes',num_workers=20):
    #     reg_sample_bond_level = reg_sample_dd.map_partitions(proc_list,meta=pd.DataFrame(columns=meta_columns)).compute()
    # dropped_columns =\
    #     [item for item in list(reg_sample_bond_level.columns) if item[:11]=='parent_name']+\
    #     [item for item in list(reg_sample_bond_level.columns) if item[:8]=='name_GPF']+\
    #     ['avg_yield','avg_spread','avg_maturity','maturity_by_maturity_list','amount_by_maturity_list','yield_by_maturity_list','spread_by_maturity_list']
    # reg_sample_bond_level = reg_sample_bond_level.drop(columns=dropped_columns)
    # reg_sample_bond_level.to_csv(file_path_bondlevel)
    
    #--------------------#
    # Sample of quantity #
    #--------------------#

    if 'mergers' in episodes_Exploded.columns:
        episodes_Exploded = episodes_Exploded.drop(columns=['mergers'])
    if 'n_deals_prior' in episodes_Exploded.columns:
        episodes_Exploded = episodes_Exploded.drop(columns=['n_deals_prior'])
    if 'n_deals_post' in episodes_Exploded.columns:
        episodes_Exploded = episodes_Exploded.drop(columns=['n_deals_post'])
    
    # Start from CBSA level sample
    episodes_Exploded_QSample = episodes_Exploded.reset_index(drop=True)
    for idx,row in episodes_Exploded_QSample.iterrows():
        if str(episodes_Exploded_QSample.at[idx,'control'])!='None' and str(episodes_Exploded_QSample.at[idx,'control'])!='nan':
            episodes_Exploded_QSample.at[idx,'num_control'] = len(row['control'])
    num_control = int(np.max(episodes_Exploded_QSample['num_control']))
    for ctrl_ind in range(0,num_control):
        episodes_Exploded_QSample['control_'+str(ctrl_ind)] = None
        for idx,row in episodes_Exploded_QSample.iterrows():
            if str(episodes_Exploded_QSample.at[idx,'control'])!='None' and \
                str(episodes_Exploded_QSample.at[idx,'control'])!='nan':
                episodes_Exploded_QSample.at[idx,'control_'+str(ctrl_ind)] = row['control'][ctrl_ind]
        episodes_Exploded_QSample['control_'+str(ctrl_ind)] = episodes_Exploded_QSample['control_'+str(ctrl_ind)].astype(int)
    
    COLs_control = [item for item in episodes_Exploded_QSample.columns if item[:8]=='control_']
    episodes_Exploded_QSample_Treated = episodes_Exploded_QSample.drop(columns=COLs_control+['num_control'])
    episodes_Exploded_QSample_Treated['Treated'] = 1
    
    episodes_Exploded_QSample_Control = pd.DataFrame()
    for ctrl_ind in range(0,num_control):
        episodes_Exploded_QSample_OneControl = episodes_Exploded_QSample.drop(columns={'CBSA Code'}).\
            rename(columns={'control_'+str(ctrl_ind):'CBSA Code'})
        COLs_control = [item for item in episodes_Exploded_QSample_OneControl.columns if item[:8]=='control_']
        episodes_Exploded_QSample_OneControl = episodes_Exploded_QSample_OneControl.drop(columns=COLs_control+['num_control'])
        episodes_Exploded_QSample_Control = pd.concat([episodes_Exploded_QSample_Control,episodes_Exploded_QSample_OneControl])
    episodes_Exploded_QSample_Control['Treated'] = 0
    
    episodes_Exploded_QSample = pd.concat([episodes_Exploded_QSample_Treated,episodes_Exploded_QSample_Control])
    
    #''''''''''''''''''#
    # Overall quantity #
    #..................#
    
    # Add the county dimension into the data, along with amount of issue
    CountyQuant = StateXCounty.merge(CBSAData[['CBSA Code','State','County']],on=['State','County']).\
        rename(columns={'sale_year':'calendar_year'})
    episodes_Exploded_QSample_Overall = episodes_Exploded_QSample.merge(CountyQuant,on=['CBSA Code','calendar_year'])
    
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    episodes_Exploded_QSample_Overall['calendar_year'] = episodes_Exploded_QSample_Overall['calendar_year'].astype(int)
    episodes_Exploded_QSample_Overall = episodes_Exploded_QSample_Overall.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    episodes_Exploded_QSample_Overall = episodes_Exploded_QSample_Overall[episodes_Exploded_QSample_Overall['_merge']!='right_only']
    episodes_Exploded_QSample_Overall = episodes_Exploded_QSample_Overall.merge(HHI_byCBSA,on=['CBSA Code','calendar_year'])
    
    episodes_Exploded_QSample_Overall.to_csv(file_path_Quant)
    
    #''''''''''''''''''''''#
    # By main use quantity #
    #......................#
    
    # Add the county dimension into the data, along with amount of issue
    CountyQuant = StateXCountyXUsageGeneral.merge(CBSAData[['CBSA Code','State','County']],on=['State','County']).\
        rename(columns={'sale_year':'calendar_year'})
    episodes_Exploded_QSample_UsageGeneral = episodes_Exploded_QSample.merge(CountyQuant,on=['CBSA Code','calendar_year'])
    
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    episodes_Exploded_QSample_UsageGeneral['calendar_year'] = episodes_Exploded_QSample_UsageGeneral['calendar_year'].astype(int)
    episodes_Exploded_QSample_UsageGeneral = episodes_Exploded_QSample_UsageGeneral.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    episodes_Exploded_QSample_UsageGeneral = episodes_Exploded_QSample_UsageGeneral[episodes_Exploded_QSample_UsageGeneral['_merge']!='right_only']
    episodes_Exploded_QSample_UsageGeneral = episodes_Exploded_QSample_UsageGeneral.merge(HHI_byCBSA,on=['CBSA Code','calendar_year'])
    
    episodes_Exploded_QSample_UsageGeneral.to_csv(file_path_Quant_GeneralUse)
    
    #'''''''''''''''''''''''''#
    # By issuer type quantity #
    #.........................#
    
    # Add the county dimension into the data, along with amount of issue
    CountyQuant = StateXCountyXIssuerType.merge(CBSAData[['CBSA Code','State','County']],on=['State','County']).\
        rename(columns={'sale_year':'calendar_year'})
    episodes_Exploded_QSample_IssuerType = episodes_Exploded_QSample.merge(CountyQuant,on=['CBSA Code','calendar_year'])
    
    County_Composite = pd.read_csv("../CleanData/Demographics/0C_County_Composite.csv")
    County_Composite = County_Composite[['year','State','County','black_ratio','pop']].rename(columns={'year':'calendar_year'})
    episodes_Exploded_QSample_IssuerType['calendar_year'] = episodes_Exploded_QSample_IssuerType['calendar_year'].astype(int)
    episodes_Exploded_QSample_IssuerType = episodes_Exploded_QSample_IssuerType.merge(County_Composite,on=['State','County','calendar_year'],how='outer',indicator=True)
    episodes_Exploded_QSample_IssuerType = episodes_Exploded_QSample_IssuerType[episodes_Exploded_QSample_IssuerType['_merge']!='right_only']
    episodes_Exploded_QSample_IssuerType = episodes_Exploded_QSample_IssuerType.merge(HHI_byCBSA,on=['CBSA Code','calendar_year'])
    
    episodes_Exploded_QSample_IssuerType.to_csv(file_path_Quant_IssuerType)

    #-------------------------------#
    # Sample of government finances #
    #-------------------------------#
    
    GovFinData = pd.read_csv('../CleanData/GovFinSurvey/0G_GovFinData.csv',low_memory=False)
    GovFinData = GovFinData.rename(columns={'Year4':'calendar_year'})
    episodes_Exploded_GovFinSample = episodes_Exploded_QSample.merge(GovFinData,on=['CBSA Code','calendar_year'])
    episodes_Exploded_GovFinSample = episodes_Exploded_GovFinSample.merge(HHI_byCBSA,on=['CBSA Code','calendar_year'])
    episodes_Exploded_GovFinSample.to_csv(file_path_GovFin)
    
    print('Exported regression sample for '+episodes_file[0])


A control cannot be found for 3 episodes.
Exported regression sample for By Market Share in terms of N deals
A control cannot be found for 2 episodes.
Exported regression sample for By Implied HHI Increase in terms of N deals, >= 0.01
A control cannot be found for 1 episodes.
Exported regression sample for By Implied Top 5 Share Increase in terms of N deals, >= 0.01
A control cannot be found for 0 episodes.
Exported regression sample for Withdrawn M&A, >= 0.01
A control cannot be found for 0 episodes.
Exported regression sample for Withdrawn M&A, >= 0.005
A control cannot be found for 0 episodes.
Exported regression sample for Withdrawn M&A, >= 0.003
A control cannot be found for 0 episodes.
Exported regression sample for Withdrawn M&A, >= 0.002
A control cannot be found for 0 episodes.
Exported regression sample for Withdrawn M&A, >= 0.001


A control cannot be found for 0 episodes.


Exported regression sample for Withdrawn M&A, >= 0.005


A control cannot be found for 0 episodes.


Exported regression sample for Withdrawn M&A, >= 0.002
