In [1]:
import pandas as pd
import numpy as np
import os
import dask
import dask.dataframe as dd
import itertools
from itertools import chain
from math import sqrt, floor, ceil, isnan
import multiprocess
import multiprocessing
import importlib
from importlib import reload
from collections import Counter
from fuzzywuzzy import process, fuzz
import time
import geopandas as gpd
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("error")

pd.options.display.max_columns = 500
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = 400


# 1. Import Data

In [2]:
# GPF
GPF = pd.read_csv("../CleanData/SDC/0A_GPF.csv",low_memory=False)
raw_name_GPF_colnames = [column for column in GPF.columns if 'raw_name_GPF_' in column]
name_GPF_colnames = ['name_GPF_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]
parent_name_colnames = ['parent_name_'+str(i) for i in range(0,len(raw_name_GPF_colnames))]


**Top 10 banks of all time**

In [3]:
%%script false --no-raise-error

name_GPFs = list(chain.from_iterable(list(np.array(
    GPF[(GPF['sale_year']>=1970)&(GPF['sale_year']<=2023)][name_GPF_colnames]))))
name_GPFs = [item for item in name_GPFs if item!=None]
name_GPFs = [item for item in name_GPFs if str(item)!='nan']
name_GPFs = Counter(name_GPFs).most_common()
print('10 banks with most sales are:')
print(*name_GPFs[:10],sep='\n')
name_GPFs = [item[0] for item in name_GPFs]

# 2. Plot market shares

## 2.1 Calculate CSA X year level market share

In [4]:
%%script false --no-raise-error

# %%time

###################################################
# Number of deals of each bank in each CSA X year #
###################################################

name_GPFs = list(chain.from_iterable(list(np.array(GPF[parent_name_GPF_colnames]))))
name_GPFs = [item for item in name_GPFs if item!=None]
name_GPFs = [item for item in name_GPFs if str(item)!='nan']
name_GPFs = list(set(name_GPFs))
name_GPFs = Counter(name_GPFs).most_common()
name_GPFs = [item[0] for item in name_GPFs]
CSAs = list(GPF['CSA Code'].unique())
deals_by_CSA_bank_year = {(item1,item2,item3): 0 for item1 in name_GPFs for item2 in CSAs for item3 in range(1967,2024)}

# Get the number of deals by CSA X bank
for idx,row in GPF.iterrows():
    for column in parent_name_GPF_colnames:
        # To avoid NaN causing errors
        if row['CSA Code'] in CSAs and row[column] in name_GPFs and row['sale_year'] in list(range(1967,2024)):
            deals_by_CSA_bank_year[(row[column],row['CSA Code'],row['sale_year'])] = deals_by_CSA_bank_year[(row[column],row['CSA Code'],row['sale_year'])]+1
# Convert dict to a dataframe
deals_by_CSA_bank_year = pd.DataFrame.from_dict(deals_by_CSA_bank_year,orient='index').reset_index()
deals_by_CSA_bank_year[['name_GPF','CSA Code','sale_year']] = deals_by_CSA_bank_year['index'].astype(str).str.split(',',expand=True)
deals_by_CSA_bank_year['name_GPF'] = deals_by_CSA_bank_year['name_GPF'].str.replace('(','',regex=False)
deals_by_CSA_bank_year['name_GPF'] = deals_by_CSA_bank_year['name_GPF'].str.replace("'",'',regex=False)
deals_by_CSA_bank_year['CSA Code'] = deals_by_CSA_bank_year['CSA Code'].str.replace("'",'',regex=False)
deals_by_CSA_bank_year['sale_year'] = deals_by_CSA_bank_year['sale_year'].str.replace(')','',regex=False)
deals_by_CSA_bank_year['sale_year'] = deals_by_CSA_bank_year['sale_year'].str.replace("'",'',regex=False)
deals_by_CSA_bank_year.rename(columns={0:'n_deals'},inplace=True)
deals_by_CSA_bank_year.drop(columns=['index'],inplace=True)
deals_by_CSA_bank_year = deals_by_CSA_bank_year[~deals_by_CSA_bank_year['CSA Code'].str.contains('nan')]
deals_by_CSA_bank_year['CSA Code'] = deals_by_CSA_bank_year['CSA Code'].str.strip()
deals_by_CSA_bank_year['CSA Code'] = deals_by_CSA_bank_year['CSA Code'].astype(float).astype(int)

# Obtain market share
deals_by_CSA_bank_year = deals_by_CSA_bank_year.merge(
    pd.DataFrame(deals_by_CSA_bank_year.groupby(['CSA Code','sale_year']).agg({'n_deals':sum}).reset_index()).rename(columns={'n_deals':'state_n_deals'}),
    on=['CSA Code','sale_year'])
deals_by_CSA_bank_year['CSA_market_share'] = deals_by_CSA_bank_year['n_deals']/deals_by_CSA_bank_year['state_n_deals']
deals_by_CSA_bank_year['sale_year'] = deals_by_CSA_bank_year['sale_year'].astype(int)
deals_by_CSA_bank_year = deals_by_CSA_bank_year[~pd.isnull(deals_by_CSA_bank_year['CSA_market_share'])]

## 2.2 Functions that plot market share

### 2.2.1 A function that plots market share of any bank in any year by CSA

In [5]:
%%script false --no-raise-error

#####################################################################
# A function that plots market share of any bank in any year by CSA #
#####################################################################

def plot_bank(bank,year):
    
    # Read the US states GeoJSON file
    gdf = gpd.read_file('../RawData/MSA/US-counties.geojson')
    gdf = gdf[gdf['STATE']!='02']
    gdf = gdf[gdf['STATE']!='15']
    gdf = gdf[gdf['STATE']!='72']
    gdf = gdf.rename(columns={'STATE':'FIPS State Code','COUNTY':'FIPS County Code'})
    
    gdf['FIPS State Code'] = gdf['FIPS State Code'].astype(int)
    gdf['FIPS County Code'] = gdf['FIPS County Code'].astype(int)
    gdf = gdf.merge(CBSAData[['FIPS State Code','FIPS County Code','CSA Code']],on=['FIPS State Code','FIPS County Code'],how='outer')
        
    fig, ax = plt.subplots(1, 1, figsize=(12, 8))
    
    deals_by_CSA_bank_onebank_oneyear = deals_by_CSA_bank_year\
        [(deals_by_CSA_bank_year['name_GPF']==bank)&(deals_by_CSA_bank_year['sale_year']==year)]
    # Merge the GeoDataFrame with data
    gdf = gdf.merge(deals_by_CSA_bank_onebank_oneyear,on=['CSA Code'],how='outer')
    gdf.loc[pd.isnull(gdf['CSA_market_share']),'CSA_market_share'] = 0
    gdf = gdf.set_index(['FIPS State Code','FIPS County Code'])
    gdf.plot(column='CSA_market_share', cmap='Reds', edgecolor='0.8', ax=ax, legend=True, legend_kwds={'shrink': 0.5})
    ax.axis("off")
    ax.set_title(bank)

plot_bank('FLEET BANK',1987)

### 2.2.2 A function that plots market share of two banks in any year by CSA

In [6]:
%%script false --no-raise-error

def plot_bank(bank1,bank2,year,path,custom_cmap):
    
    # Read the US states GeoJSON file
    gdf = gpd.read_file('../RawData/MSA/US-counties.geojson')
    gdf = gdf[gdf['STATE']!='02']
    gdf = gdf[gdf['STATE']!='15']
    gdf = gdf[gdf['STATE']!='72']
    gdf = gdf.rename(columns={'STATE':'FIPS State Code','COUNTY':'FIPS County Code'})
    
    gdf['FIPS State Code'] = gdf['FIPS State Code'].astype(int)
    gdf['FIPS County Code'] = gdf['FIPS County Code'].astype(int)
    gdf = gdf.merge(CBSAData[['FIPS State Code','FIPS County Code','CSA Code']],on=['FIPS State Code','FIPS County Code'],how='outer')

    fig, axes = plt.subplots(1, 2, figsize=(16, 8), sharex=True, sharey=True)
    vmin = np.min(deals_by_state_bank[deals_by_state_bank['name_GPF'].isin([bank1,bank2])]['state_market_share'])
    vmax = np.max(deals_by_state_bank[deals_by_state_bank['name_GPF'].isin([bank1,bank2])]['state_market_share'])

    banks = [bank1,bank2]
    
    deals_by_CSA_bank_onebank_oneyear = deals_by_CSA_bank_year\
        [(deals_by_CSA_bank_year['name_GPF']==banks[0])&(deals_by_CSA_bank_year['sale_year']==year)]
    # Merge the GeoDataFrame with data
    gdf_withdata = gdf.merge(deals_by_CSA_bank_onebank_oneyear,on=['CSA Code'],how='outer')
    gdf_withdata.loc[pd.isnull(gdf_withdata['CSA_market_share']),'CSA_market_share'] = 0
    gdf_withdata = gdf_withdata.set_index(['FIPS State Code','FIPS County Code'])
    gdf_withdata.plot(column='CSA_market_share', cmap=custom_cmap, edgecolor='0.9', ax=axes[0], legend=False, 
        legend_kwds={'shrink': 0.5},vmin=vmin, vmax=vmax)
    axes[0].axis("off")
    # axes[0].set_title(banks[0]+" "+str(year))
    # For formatting, manually set name strings
    name = banks[0]
    if name=="RAFFENSPERGERHUGHES & COINC":
        name = "Raffensperger, Hughes & Co."
    if name=="NATIONAL CITY BANK":
        name = "National City Bank"
    if name=="SUNTRUST BANK":
        name = "SunTrust Bank"
    if name=="EQUITABLE SECURITIES":
        name = "Equitable Securities"
    if name=="RBC BANK":
        name = "RBC Bank"
    if name=="WILLIAM R HOUGH":
        name = "William R. Hough"
    if name=="STIFEL NICOLAUS":
        name = "Stifel"
    if name=="GEORGE K BAUM":
        name = "George K. Baum"
    axes[0].set_title(name+" in "+str(year))

    deals_by_CSA_bank_onebank_oneyear = deals_by_CSA_bank_year\
        [(deals_by_CSA_bank_year['name_GPF']==banks[1])&(deals_by_CSA_bank_year['sale_year']==year)]
    # Merge the GeoDataFrame with data
    gdf_withdata = gdf.merge(deals_by_CSA_bank_onebank_oneyear,on=['CSA Code'],how='outer')
    gdf_withdata.loc[pd.isnull(gdf_withdata['CSA_market_share']),'CSA_market_share'] = 0
    gdf_withdata = gdf_withdata.set_index(['FIPS State Code','FIPS County Code'])
    gdf_withdata.plot(column='CSA_market_share', cmap=custom_cmap, edgecolor='0.9', ax=axes[1], legend=False, 
        legend_kwds={'shrink': 0.5},vmin=vmin, vmax=vmax)
    axes[1].axis("off")
    # axes[1].set_title(banks[1]+" "+str(year))
    # For formatting, manually set name strings
    name = banks[1]
    if name=="RAFFENSPERGERHUGHES & COINC":
        name = "Raffensperger, Hughes & Co."
    if name=="NATIONAL CITY BANK":
        name = "National City Bank"
    if name=="SUNTRUST BANK":
        name = "SunTrust Bank"
    if name=="EQUITABLE SECURITIES":
        name = "Equitable Securities"
    if name=="RBC BANK":
        name = "RBC Bank"
    if name=="WILLIAM R HOUGH":
        name = "William R. Hough"
    if name=="STIFEL NICOLAUS":
        name = "Stifel"
    if name=="GEORGE K BAUM":
        name = "George K. Baum"
    axes[1].set_title(name+" in "+str(year))

    # Put panels closer
    plt.subplots_adjust(wspace=-0.05)
    
    # Define a mappable based on which the colorbar will be drawn
    mappable = cm.ScalarMappable(
        norm=mcolors.Normalize(vmin, vmax),
        cmap=custom_cmap)
    # Define position and extent of colorbar
    cb_ax = fig.add_axes([0.2, 0.25, 0.6, 0.01])
    # Draw colorbar
    cbar = fig.colorbar(mappable, cax=cb_ax, orientation='horizontal')

    fig.savefig(path, format='eps', bbox_inches='tight')

## 2.3 Export figure

In [7]:
%%script false --no-raise-error

warnings.filterwarnings("ignore", category=DeprecationWarning)

plot_bank('SUNTRUST BANK','EQUITABLE SECURITIES',1995,"../Draft/figs/Merger_Example1.eps",'Greens')
plot_bank('RBC BANK','WILLIAM R HOUGH',2001,"../Draft/figs/Merger_Example2.eps",'Blues')
plot_bank('STIFEL NICOLAUS','GEORGE K BAUM',2018,"../Draft/figs/Merger_Example3.eps",'Reds')
