# ETL | Financial Institutions - Historical banks failures 1934 - Present

Source [FDIC - Federal Deposit Insurance Corporation]('https://banks.data.fdic.gov/docs/#/Structure/searchInstitutions')

Data last updated June 2018 



# Dependencies

In [17]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
import secret #file with database password
import datetime

# Extract

## Load and Read CSV

In [2]:
# Load CSV files

institutions = "../financial_instituttions_data/institutions.csv"
locations = "../financial_instituttions_data/locations.csv"

# Read files and store into Pandas DataFrame
# Due to low memory error read https://stackoverflow.com/questions/24251219/pandas-read-csv-low-memory-and-dtype-options

institutions_df  = pd.read_csv(institutions,sep=',', error_bad_lines=False, index_col=False, dtype='unicode')
locations_df = pd.read_csv(locations,sep=',', error_bad_lines=False, index_col=False, dtype='unicode')


In [3]:
pd.set_option('display.max_columns', 500)
institutions_df.head()
locations_df.head()
locations_df.head()

Unnamed: 0,ADDRESS,BKCLASS,CBSA,CBSA_DIV,CBSA_DIV_FLG,CBSA_DIV_NO,CBSA_METRO,CBSA_METRO_FLG,CBSA_METRO_NAME,CBSA_MICRO_FLG,CBSA_NO,CERT,CITY,COUNTY,CSA,CSA_FLG,CSA_NO,ESTYMD,FI_UNINUM,MAINOFF,NAME,OFFNAME,OFFNUM,RUNDATE,SERVTYPE,STALP,STCNTY,STNAME,UNINUM,ZIP
0,4500 WASHINGTON AVENUE,N,"Evansville, IN-KY",0,0,0,21780,1,"Evansville, IN-KY",0,21780,3832,EVANSVILLE,VANDERBURGH,0,0,0,10/03/1963,2492,0,Old National Bank,HEBRON PLACE BRANCH,8,09/04/2019,11,IN,18163,INDIANA,203604,47714
1,2202 NORTH SIXTH STREET,SM,"Vincennes, IN",0,0,0,0,0,0,1,47180,12368,VINCENNES,KNOX,0,0,0,11/01/1971,7866,0,Regions Bank,NORTH VINCENNES BRANCH,1636,09/04/2019,11,IN,18083,INDIANA,203651,47591
2,628 WEST MAIN STREET,NM,"Bedford, IN",0,0,0,0,0,0,1,13260,27744,MITCHELL,LAWRENCE,"Bloomington-Bedford, IN",1,144,10/01/1995,40578,0,MutualBank,MESSENGER SERVICE BRANCH,144,09/04/2019,27,IN,18093,INDIANA,203628,47446
3,"17 CHERRY TREE PLAZA, US HIGHWAY 50 EAST",NM,"Washington, IN",0,0,0,0,0,0,1,47780,17393,WASHINGTON,DAVIESS,0,0,0,03/15/1989,11392,0,German American Bank,WASHINGTON CHERRY TREE BRANCH,30,09/04/2019,11,IN,18027,INDIANA,203662,47501
4,"5321 COUNCIL STREET, N.E.",N,"Cedar Rapids, IA",0,0,0,16300,1,"Cedar Rapids, IA",0,16300,6548,CEDAR RAPIDS,LINN,"Cedar Rapids-Iowa City, IA",1,168,01/16/1978,4383,0,U.S. Bank National Association,CEDAR RAPIDS COUNCIL STREET BRANCH,568,09/04/2019,11,IA,19113,IOWA,203680,52402


# Transform Locations DF

## Drop & rearrenge

In [4]:
# Print all columns names
print(locations_df.columns.values)

['ADDRESS' 'BKCLASS' 'CBSA' 'CBSA_DIV' 'CBSA_DIV_FLG' 'CBSA_DIV_NO'
 'CBSA_METRO' 'CBSA_METRO_FLG' 'CBSA_METRO_NAME' 'CBSA_MICRO_FLG'
 'CBSA_NO' 'CERT' 'CITY' 'COUNTY' 'CSA' 'CSA_FLG' 'CSA_NO' 'ESTYMD'
 'FI_UNINUM' 'MAINOFF' 'NAME' 'OFFNAME' 'OFFNUM' 'RUNDATE' 'SERVTYPE'
 'STALP' 'STCNTY' 'STNAME' 'UNINUM' 'ZIP']


In [5]:
locations_df = locations_df.drop(columns = ['BKCLASS','CBSA_DIV','CBSA_DIV_FLG','CBSA_DIV_NO',\
 'CBSA_METRO','CBSA_METRO_FLG','CBSA_METRO_NAME','CBSA_MICRO_FLG',\
   'CSA','CSA_FLG','CSA_NO','ESTYMD',\
  'MAINOFF'])

In [6]:
locations_df.head()

Unnamed: 0,ADDRESS,CBSA,CBSA_NO,CERT,CITY,COUNTY,FI_UNINUM,NAME,OFFNAME,OFFNUM,RUNDATE,SERVTYPE,STALP,STCNTY,STNAME,UNINUM,ZIP
0,4500 WASHINGTON AVENUE,"Evansville, IN-KY",21780,3832,EVANSVILLE,VANDERBURGH,2492,Old National Bank,HEBRON PLACE BRANCH,8,09/04/2019,11,IN,18163,INDIANA,203604,47714
1,2202 NORTH SIXTH STREET,"Vincennes, IN",47180,12368,VINCENNES,KNOX,7866,Regions Bank,NORTH VINCENNES BRANCH,1636,09/04/2019,11,IN,18083,INDIANA,203651,47591
2,628 WEST MAIN STREET,"Bedford, IN",13260,27744,MITCHELL,LAWRENCE,40578,MutualBank,MESSENGER SERVICE BRANCH,144,09/04/2019,27,IN,18093,INDIANA,203628,47446
3,"17 CHERRY TREE PLAZA, US HIGHWAY 50 EAST","Washington, IN",47780,17393,WASHINGTON,DAVIESS,11392,German American Bank,WASHINGTON CHERRY TREE BRANCH,30,09/04/2019,11,IN,18027,INDIANA,203662,47501
4,"5321 COUNCIL STREET, N.E.","Cedar Rapids, IA",16300,6548,CEDAR RAPIDS,LINN,4383,U.S. Bank National Association,CEDAR RAPIDS COUNCIL STREET BRANCH,568,09/04/2019,11,IA,19113,IOWA,203680,52402


In [7]:
# Rearrenge columns where I only care the first 3 

new_cols = ['NAME', 'CITY', 'STNAME'] + [c for c in locations_df.columns if c not in ['NAME', 'CITY', 'STNAME']]
                                    

# Re-index after rearrenging
locations_df = locations_df.reindex(columns = new_cols)
locations_df

Unnamed: 0,NAME,CITY,STNAME,ADDRESS,CBSA,CBSA_NO,CERT,COUNTY,FI_UNINUM,OFFNAME,OFFNUM,RUNDATE,SERVTYPE,STALP,STCNTY,UNINUM,ZIP
0,Old National Bank,EVANSVILLE,INDIANA,4500 WASHINGTON AVENUE,"Evansville, IN-KY",21780,3832,VANDERBURGH,2492,HEBRON PLACE BRANCH,8,09/04/2019,11,IN,18163,203604,47714
1,Regions Bank,VINCENNES,INDIANA,2202 NORTH SIXTH STREET,"Vincennes, IN",47180,12368,KNOX,7866,NORTH VINCENNES BRANCH,1636,09/04/2019,11,IN,18083,203651,47591
2,MutualBank,MITCHELL,INDIANA,628 WEST MAIN STREET,"Bedford, IN",13260,27744,LAWRENCE,40578,MESSENGER SERVICE BRANCH,144,09/04/2019,27,IN,18093,203628,47446
3,German American Bank,WASHINGTON,INDIANA,"17 CHERRY TREE PLAZA, US HIGHWAY 50 EAST","Washington, IN",47780,17393,DAVIESS,11392,WASHINGTON CHERRY TREE BRANCH,30,09/04/2019,11,IN,18027,203662,47501
4,U.S. Bank National Association,CEDAR RAPIDS,IOWA,"5321 COUNCIL STREET, N.E.","Cedar Rapids, IA",16300,6548,LINN,4383,CEDAR RAPIDS COUNCIL STREET BRANCH,568,09/04/2019,11,IA,19113,203680,52402
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88815,Piedmont Federal Savings Bank,HIGH POINT,NORTH CAROLINA,3870 JOHN GORDON LANE,"Greensboro-High Point, NC",24660,27619,GUILFORD,40453,Piedmont Federal Savings Bank-Palladium Branch,103,09/04/2019,11,NC,37081,616500,27265
88816,"Home Bank, National Association",JEFFERSON,LOUISIANA,1105 S CLEARVIEW PKWY,"New Orleans-Metairie, LA",35380,28094,JEFFERSON,40928,CLEARVIEW BRANCH,53,09/04/2019,11,LA,22051,616590,70121
88817,"Northwestern Bank, National Association",FERGUS FALLS,MINNESOTA,402 W LINCOLN AVE,"Fergus Falls, MN",22260,9746,OTTER TAIL,6234,Northwestern Bank NA,5,09/04/2019,11,MN,27111,614809,56537
88818,Penn Community Bank,LANGHORNE,PENNSYLVANIA,200 MANOR AVE,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",37980,30401,BUCKS,43235,Cairn University,126,09/04/2019,29,PA,42017,614819,19047


# Transform Institutions DF

## Drop, fill, rearrenge

In [8]:
# Display DF
institutions_df

Unnamed: 0,STNAME,CERT,DOCKET,ACTIVE,ADDRESS,ASSET,BKCLASS,CHANGEC1,CHANGEC2,CHANGEC3,CHANGEC4,CHANGEC5,CHANGEC6,CHANGEC7,CHANGEC8,CHANGEC9,CHANGEC10,CHANGEC11,CHANGEC12,CHANGEC13,CHANGEC14,CHANGEC15,CHARTER,CHRTAGNT,CONSERVE,CITY,CLCODE,CMSA_NO,CMSA,COUNTY,DATEUPDT,DENOVO,DEP,EFFDATE,ENDEFYMD,EQ,ESTYMD,FDICDBS,FDICREGN,FDICSUPV,FED,FED_RSSD,FEDCHRTR,FLDOFF,IBA,INACTIVE,INSAGNT1,INSAGNT2,INSDATE,INSTCRCD,INSBIF,INSCOML,INSDIF,INSFDIC,INSSAIF,INSSAVE,MSA_NO,MSA,NAME,NEWCERT,OAKAR,OTSDIST,OTSREGNM,PROCDATE,QBPRCOML,REGAGNT,REPDTE,RISDATE,STCHRTR,ROA,ROAQ,ROE,ROEQ,RUNDATE,SASSER,LAW_SASSER_FLG,STALP,STCNTY,STNUM,ZIP,SUPRV_FD,OCCDIST,UNINUM,ULTCERT,CFPBEFFDTE,CFPBENDDTE,CFPBFLAG,REGAGENT2,TE01N528,TE02N528,TE03N528,TE04N528,TE05N528,TE06N528,TE07N528,TE08N528,TE09N528,TE10N528,TE01N529,TE02N529,TE03N529,TE04N529,TE05N529,TE06N529,WEBADDR,OFFICES,CERTCONS,PARCERT,CITYHCR,DEPDOM,FORM31,HCTMULT,INSTAG,MUTUAL,NAMEHCR,NETINC,NETINCQ,OFFDOM,OFFFOR,OFFOA,RSSDHCR,STALPHCR,STMULT,SUBCHAPS,ROAPTX,ROAPTXQ,TRUST,SPECGRP,SPECGRPN,TRACT,CSA,CSA_NO,CSA_FLG,CBSA,CBSA_NO,CBSA_METRO_NAME,CBSA_METRO,CBSA_METRO_FLG,CBSA_MICRO_FLG,CBSA_DIV,CBSA_DIV_NO,CBSA_DIV_FLG,CB
0,PENNSYLVANIA,15698,0,0,401 WEST LANCASTER AVENUE,,SB,213,,,,,,,,,,,,,,,0,STATE,N,HAVERFORD,42,77,"Philadelphia-Wilmington-Atlantic City, PA-NJ-D...",MONTGOMERY,04/07/1982,,,04/03/1982,04/03/1982,,01/01/1847,2,NEW YORK,NEW YORK,3,897116,0,PHILADELPHIA,0,1,BIF,,11/21/1939,0,1,0,,1,,0,6160,"Philadelphia, PA-NJ PMSA",The Western Saving Fund Society of Philadelphia,15750,0,1,NORTHEAST,04/07/1982,1,FDIC,,,1,,,,,09/04/2019,0,N,PA,42091,42,19041,2,1,9966,7946,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Error in Specialization Group,,"Philadelphia-Reading-Camden, PA-NJ-DE-MD",428,1,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",37980,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",37980,1,0,"Montgomery County-Bucks County-Chester County, PA",33874,1,
1,SOUTH CAROLINA,15716,0,0,MAIN STREET,5045,NM,223,,,,,,,,,,,,,,,0,STATE,N,SUMMERTON,21,0,,CLARENDON,10/23/1985,0,4412,09/16/1985,09/16/1985,585,09/11/1934,5,ATLANTA,ATLANTA,5,577221,0,COLUMBIA SC,0,1,BIF,,01/15/1940,0,1,1,0,1,,0,0,,Bank of Summerton,2111,0,2,SOUTHEAST,10/23/1985,2,FDIC,06/30/1985,06/30/1985,1,2.08,3.42,19.09,30.52,09/04/2019,0,N,SC,45027,45,29148,5,1,9979,873,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,0,0,0,4412,0,0,0,0,0,52,43,1,0,0,0,0,0,0,2.45,4.05,1,7,Other Specialized Under 1 Billion,0,,,,,,0,0,,,,,,1
2,TEXAS,15721,0,0,823 CONGRESS AVENUE,345518,N,211,,,,,,,,,,,,,,,14728,OCC,N,AUSTIN,3,0,,TRAVIS,11/03/1992,0,331568,10/30/1992,10/30/1992,6972,06/05/1936,13,DALLAS,DALLAS,11,7353,1,AUSTIN,0,1,BIF,,02/21/1940,0,1,1,0,1,,0,640,"Austin-San Marcos, TX MSA","First City, Texas - Austin, National Association",33714,0,5,WEST,11/03/1992,5,OCC,09/30/1992,09/30/1992,0,-2.07,-0.45,-91.84,-22.36,09/04/2019,0,N,TX,48453,48,78767,13,5,9983,5510,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,0,0,HOUSTON,331568,0,1,0,0,"FIRST CITY BANCORPORATION OF TEXAS, INC.",-5930,-401,3,0,0,1249338,TX,0,0,-2.05,-0.44,1,4,Commercial Lending Specialization,0,0,0,0,"Austin-Round Rock, TX",12420,"Austin-Round Rock, TX",12420,1,0,0,0,0,0
3,IOWA,15736,0,0,606 WEST MILWAUKEE,206135,SM,223,,,,,,,,,,,,,,,0,STATE,N,NEW HAMPTON,13,0,,CHICKASAW,04/07/2011,0,188024,03/31/2011,03/31/2011,16414,06/16/1937,11,KANSAS CITY,KANSAS CITY,7,668848,0,CEDAR RAPIDS,0,1,DIF,,04/15/1940,0,0,1,1,1,,0,0,,BANK IOWA,14521,0,4,WESTERN,04/07/2011,4,FED,12/31/2010,12/31/2010,1,1.09,0.75,13.58,9.09,09/04/2019,0,N,IA,19037,19,50659,11,4,9992,14521,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,http://www.bankiowabanks.com/lawler,,0,0,WEST DES MOINES,188024,0,1,1,0,BANK IOWA CORPORATION,2193,378,4,0,0,1202762,IA,0,1,1.13,0.76,1,2,Agricultural Specialization,0,,,,,,0,0,,,,,,1
4,IOWA,15368,0,0,1306 18TH STREET,38799,SM,223,,,,,,,,,,,,,,,0,STATE,N,SPIRIT LAKE,13,0,,DICKINSON,01/31/2003,0,26764,10/28/2002,10/28/2002,2615,01/01/1935,11,KANSAS CITY,KANSAS CITY,7,810142,0,SIOUX CITY,0,1,BIF,,03/24/1936,0,1,1,0,1,,0,0,,Security State Bank,15555,0,4,WESTERN,01/31/2003,4,FED,09/30/2002,09/30/2002,1,3.27,-0.05,46.44,-0.68,09/04/2019,0,N,IA,19059,19,51360,11,4,9695,15555,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,WWW.CENTRALBANKFDIC.COM,,0,0,STORM LAKE,26764,0,1,0,0,COMMERCIAL FINANCIAL CORP,1059,-5,1,0,0,1978674,IA,0,0,5.44,0.71,1,4,Commercial Lending Specialization,0,0,0,0,"Spirit Lake, IA",44020,0,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27610,ALABAMA,5598,9810,1,1000 HIGHWAY 231 SOUTH,912723,NM,810,,,,,,,,,,,,,,,0,STATE,N,TROY,21,0,,PIKE,09/24/2008,0,777333,07/01/2008,12/31/9999,107713,01/01/1906,5,ATLANTA,ATLANTA,6,134437,0,MONTGOMERY,0,0,DIF,,01/01/1934,0,0,1,1,1,,0,0,,Troy Bank & Trust Company,5598,0,2,SOUTHEAST,09/24/2008,2,FDIC,03/31/2019,03/31/2019,1,0.96,0.96,8.07,8.07,09/04/2019,0,N,AL,01109,1,36081,5,5,3811,5598,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,http://www.troybankandtrust.com,11,0,0,TROY,777333,0,0,0,0,TRUST NO. 3 UNDER THE WILL OF CHARLES HENDERSON,2146,2146,11,0,0,1080139,AL,0,0,1.19,1.19,1,4,Commercial Lending Specialization,0,0,0,0,"Troy, AL",45980,0,0,0,1,0,0,0,1
27611,OKLAHOMA,18924,0,1,206 S MAIN ST,12885,NM,,,,,,,,,,,,,,,,0,STATE,N,ELMORE CITY,21,0,,GARVIN,03/19/2019,0,8788,12/31/2018,12/31/9999,1812,01/01/1903,13,DALLAS,DALLAS,10,116554,0,OKLAHOMA CITY,0,0,DIF,,10/24/1963,0,0,1,1,1,,0,0,,First State Bank,0,0,5,WEST,03/19/2019,5,FDIC,03/31/2019,03/31/2019,1,2.88,2.88,21.56,21.56,09/04/2019,0,N,OK,40049,40,73433,13,5,12771,18924,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,fsbec.com,1,0,0,OKLAHOMA CITY,8788,0,0,0,0,"BLUECHIP BANCSHARES, LLC",95,95,1,0,0,4349837,OK,0,0,2.88,2.88,0,4,Commercial Lending Specialization,0,,,,,,0,0,,,,,,1
27612,OHIO,6540,13123,1,20 SOUTH BROAD STREET,2335841,N,810,,,,,,,,,,,,,,,3654,OCC,N,CANFIELD,3,0,,MAHONING,08/31/2017,0,1959300,08/16/2017,12/31/9999,254487,02/17/1887,9,CHICAGO,CHICAGO,4,680813,1,COLUMBUS,0,0,DIF,,01/01/1934,0,0,1,1,1,,0,9320,"Youngstown-Warren, OH MSA",The Farmers National Bank of Canfield,6540,0,3,CENTRAL,08/31/2017,3,OCC,03/31/2019,03/31/2019,0,1.38,1.38,12.85,12.85,09/04/2019,0,N,OH,39099,39,44406,9,3,4377,6540,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,http://www.farmersbankgroup.com,39,0,0,CANFIELD,1959300,0,1,0,0,FARMERS NATIONAL BANC CORP.,8016,8016,39,0,0,1071191,OH,1,0,1.65,1.65,0,4,Commercial Lending Specialization,0,"Youngstown-Warren, OH-PA",566,1,"Youngstown-Warren-Boardman, OH-PA",49660,"Youngstown-Warren-Boardman, OH-PA",49660,1,0,0,0,0,0
27613,CALIFORNIA,57591,0,1,75 RIVER STREET,668098,NM,520,,,,,,,,,,,,,,,0,STATE,N,SANTA CRUZ,21,84,"San Francisco-Oakland-San Jose, CA CMSA",SANTA CRUZ,09/11/2018,0,581734,09/04/2018,12/31/9999,71466,02/03/2004,14,SAN FRANCISCO,SAN FRANCISCO,12,3235410,0,SAN FRANCISCO,0,0,DIF,,02/03/2004,0,0,1,1,1,,0,7485,"Santa Cruz-Watsonville, CA PMSA",Santa Cruz County Bank,0,0,4,WESTERN,09/11/2018,6,FDIC,03/31/2019,03/31/2019,1,1.78,1.78,16.94,16.94,09/04/2019,0,N,CA,06087,6,95060,14,4,366199,57591,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,http://www.sccountybank.com,5,0,0,0,581734,0,0,0,0,0,2965,2965,5,0,0,0,0,0,0,2.5,2.5,0,4,Commercial Lending Specialization,0,"San Jose-San Francisco-Oakland, CA",488,1,"Santa Cruz-Watsonville, CA",42100,"Santa Cruz-Watsonville, CA",42100,1,0,0,0,0,1


In [9]:
# Print all columns names
print(institutions_df.columns.values)

['STNAME' 'CERT' 'DOCKET' 'ACTIVE' 'ADDRESS' 'ASSET' 'BKCLASS' 'CHANGEC1'
 'CHANGEC2' 'CHANGEC3' 'CHANGEC4' 'CHANGEC5' 'CHANGEC6' 'CHANGEC7'
 'CHANGEC8' 'CHANGEC9' 'CHANGEC10' 'CHANGEC11' 'CHANGEC12' 'CHANGEC13'
 'CHANGEC14' 'CHANGEC15' 'CHARTER' 'CHRTAGNT' 'CONSERVE' 'CITY' 'CLCODE'
 'CMSA_NO' 'CMSA' 'COUNTY' 'DATEUPDT' 'DENOVO' 'DEP' 'EFFDATE' 'ENDEFYMD'
 'EQ' 'ESTYMD' 'FDICDBS' 'FDICREGN' 'FDICSUPV' 'FED' 'FED_RSSD' 'FEDCHRTR'
 'FLDOFF' 'IBA' 'INACTIVE' 'INSAGNT1' 'INSAGNT2' 'INSDATE' 'INSTCRCD'
 'INSBIF' 'INSCOML' 'INSDIF' 'INSFDIC' 'INSSAIF' 'INSSAVE' 'MSA_NO' 'MSA'
 'NAME' 'NEWCERT' 'OAKAR' 'OTSDIST' 'OTSREGNM' 'PROCDATE' 'QBPRCOML'
 'REGAGNT' 'REPDTE' 'RISDATE' 'STCHRTR' 'ROA' 'ROAQ' 'ROE' 'ROEQ'
 'RUNDATE' 'SASSER' 'LAW_SASSER_FLG' 'STALP' 'STCNTY' 'STNUM' 'ZIP'
 'SUPRV_FD' 'OCCDIST' 'UNINUM' 'ULTCERT' 'CFPBEFFDTE' 'CFPBENDDTE'
 'CFPBFLAG' 'REGAGENT2' 'TE01N528' 'TE02N528' 'TE03N528' 'TE04N528'
 'TE05N528' 'TE06N528' 'TE07N528' 'TE08N528' 'TE09N528' 'TE10N528'
 'TE01N529' 'TE02

In [10]:
# Drop unnessary columns
institutions_df.drop(columns = ['CMSA_NO','CMSA','DOCKET','ACTIVE','CHANGEC2','CHANGEC3','CHANGEC4','CHANGEC5','CHANGEC6',\
                                                  'CHANGEC7','CHANGEC8','CHANGEC9','CHANGEC10','CHANGEC11','CHANGEC12','CHANGEC13',\
                                                  'CHANGEC14','CHANGEC15','CHARTER','CHRTAGNT','CONSERVE','DENOVO','FEDCHRTR',\
                                                  'FLDOFF','IBA','INACTIVE','INSAGNT1','INSAGNT2','INSTCRCD','INSBIF','INSCOML',\
                                                  'INSDIF','INSFDIC','INSSAIF','INSSAVE','MSA_NO','MSA','NEWCERT','OAKAR','OTSDIST',\
                                                  'OTSREGNM','PROCDATE','QBPRCOML','REGAGNT','REPDTE','RISDATE','STCHRTR','SASSER',\
                                                  'LAW_SASSER_FLG','CFPBEFFDTE','CFPBENDDTE','CFPBFLAG','REGAGENT2'], inplace= True)


In [11]:
# Print to verify drops
print(institutions_df.columns.values)

['STNAME' 'CERT' 'ADDRESS' 'ASSET' 'BKCLASS' 'CHANGEC1' 'CITY' 'CLCODE'
 'COUNTY' 'DATEUPDT' 'DEP' 'EFFDATE' 'ENDEFYMD' 'EQ' 'ESTYMD' 'FDICDBS'
 'FDICREGN' 'FDICSUPV' 'FED' 'FED_RSSD' 'INSDATE' 'NAME' 'ROA' 'ROAQ'
 'ROE' 'ROEQ' 'RUNDATE' 'STALP' 'STCNTY' 'STNUM' 'ZIP' 'SUPRV_FD'
 'OCCDIST' 'UNINUM' 'ULTCERT' 'TE01N528' 'TE02N528' 'TE03N528' 'TE04N528'
 'TE05N528' 'TE06N528' 'TE07N528' 'TE08N528' 'TE09N528' 'TE10N528'
 'TE01N529' 'TE02N529' 'TE03N529' 'TE04N529' 'TE05N529' 'TE06N529'
 'WEBADDR' 'OFFICES' 'CERTCONS' 'PARCERT' 'CITYHCR' 'DEPDOM' 'FORM31'
 'HCTMULT' 'INSTAG' 'MUTUAL' 'NAMEHCR' 'NETINC' 'NETINCQ' 'OFFDOM'
 'OFFFOR' 'OFFOA' 'RSSDHCR' 'STALPHCR' 'STMULT' 'SUBCHAPS' 'ROAPTX'
 'ROAPTXQ' 'TRUST' 'SPECGRP' 'SPECGRPN' 'TRACT' 'CSA' 'CSA_NO' 'CSA_FLG'
 'CBSA' 'CBSA_NO' 'CBSA_METRO_NAME' 'CBSA_METRO' 'CBSA_METRO_FLG'
 'CBSA_MICRO_FLG' 'CBSA_DIV' 'CBSA_DIV_NO' 'CBSA_DIV_FLG' 'CB']


In [12]:
# Delete more columns using https://stackoverflow.com/questions/28538536/deleting-multiple-columns-based-on-column-names-in-pandas

institutions_df.drop(institutions_df.columns[36:91], axis = 1, inplace = True)

In [13]:
# Replace NAN values with 0
institutions_df = institutions_df.replace(np.nan,0)
institutions_df.head()

Unnamed: 0,STNAME,CERT,ADDRESS,ASSET,BKCLASS,CHANGEC1,CITY,CLCODE,COUNTY,DATEUPDT,DEP,EFFDATE,ENDEFYMD,EQ,ESTYMD,FDICDBS,FDICREGN,FDICSUPV,FED,FED_RSSD,INSDATE,NAME,ROA,ROAQ,ROE,ROEQ,RUNDATE,STALP,STCNTY,STNUM,ZIP,SUPRV_FD,OCCDIST,UNINUM,ULTCERT,TE01N528
0,PENNSYLVANIA,15698,401 WEST LANCASTER AVENUE,0,SB,213,HAVERFORD,42,MONTGOMERY,04/07/1982,0,04/03/1982,04/03/1982,0,01/01/1847,2,NEW YORK,NEW YORK,3,897116,11/21/1939,The Western Saving Fund Society of Philadelphia,0.0,0.0,0.0,0.0,09/04/2019,PA,42091,42,19041,2,1,9966,7946,0
1,SOUTH CAROLINA,15716,MAIN STREET,5045,NM,223,SUMMERTON,21,CLARENDON,10/23/1985,4412,09/16/1985,09/16/1985,585,09/11/1934,5,ATLANTA,ATLANTA,5,577221,01/15/1940,Bank of Summerton,2.08,3.42,19.09,30.52,09/04/2019,SC,45027,45,29148,5,1,9979,873,0
2,TEXAS,15721,823 CONGRESS AVENUE,345518,N,211,AUSTIN,3,TRAVIS,11/03/1992,331568,10/30/1992,10/30/1992,6972,06/05/1936,13,DALLAS,DALLAS,11,7353,02/21/1940,"First City, Texas - Austin, National Association",-2.07,-0.45,-91.84,-22.36,09/04/2019,TX,48453,48,78767,13,5,9983,5510,0
3,IOWA,15736,606 WEST MILWAUKEE,206135,SM,223,NEW HAMPTON,13,CHICKASAW,04/07/2011,188024,03/31/2011,03/31/2011,16414,06/16/1937,11,KANSAS CITY,KANSAS CITY,7,668848,04/15/1940,BANK IOWA,1.09,0.75,13.58,9.09,09/04/2019,IA,19037,19,50659,11,4,9992,14521,0
4,IOWA,15368,1306 18TH STREET,38799,SM,223,SPIRIT LAKE,13,DICKINSON,01/31/2003,26764,10/28/2002,10/28/2002,2615,01/01/1935,11,KANSAS CITY,KANSAS CITY,7,810142,03/24/1936,Security State Bank,3.27,-0.05,46.44,-0.68,09/04/2019,IA,19059,19,51360,11,4,9695,15555,0


In [14]:
# Print to verify columns name
print(institutions_df.columns.values)

['STNAME' 'CERT' 'ADDRESS' 'ASSET' 'BKCLASS' 'CHANGEC1' 'CITY' 'CLCODE'
 'COUNTY' 'DATEUPDT' 'DEP' 'EFFDATE' 'ENDEFYMD' 'EQ' 'ESTYMD' 'FDICDBS'
 'FDICREGN' 'FDICSUPV' 'FED' 'FED_RSSD' 'INSDATE' 'NAME' 'ROA' 'ROAQ'
 'ROE' 'ROEQ' 'RUNDATE' 'STALP' 'STCNTY' 'STNUM' 'ZIP' 'SUPRV_FD'
 'OCCDIST' 'UNINUM' 'ULTCERT' 'TE01N528']


In [15]:
# Rearrenge columns where I only care the first 3 

new_cols = ['NAME', 'CITY', 'STNAME'] + [c for c in institutions_df.columns if c not in ['NAME', 'CITY', 'STNAME']]
                                 
# Re-index after rearrenging
institutions_df = institutions_df.reindex(columns = new_cols)
institutions_df

Unnamed: 0,NAME,CITY,STNAME,CERT,ADDRESS,ASSET,BKCLASS,CHANGEC1,CLCODE,COUNTY,DATEUPDT,DEP,EFFDATE,ENDEFYMD,EQ,ESTYMD,FDICDBS,FDICREGN,FDICSUPV,FED,FED_RSSD,INSDATE,ROA,ROAQ,ROE,ROEQ,RUNDATE,STALP,STCNTY,STNUM,ZIP,SUPRV_FD,OCCDIST,UNINUM,ULTCERT,TE01N528
0,The Western Saving Fund Society of Philadelphia,HAVERFORD,PENNSYLVANIA,15698,401 WEST LANCASTER AVENUE,0,SB,213,42,MONTGOMERY,04/07/1982,0,04/03/1982,04/03/1982,0,01/01/1847,2,NEW YORK,NEW YORK,3,897116,11/21/1939,0,0,0,0,09/04/2019,PA,42091,42,19041,2,1,9966,7946,0
1,Bank of Summerton,SUMMERTON,SOUTH CAROLINA,15716,MAIN STREET,5045,NM,223,21,CLARENDON,10/23/1985,4412,09/16/1985,09/16/1985,585,09/11/1934,5,ATLANTA,ATLANTA,5,577221,01/15/1940,2.08,3.42,19.09,30.52,09/04/2019,SC,45027,45,29148,5,1,9979,873,0
2,"First City, Texas - Austin, National Association",AUSTIN,TEXAS,15721,823 CONGRESS AVENUE,345518,N,211,3,TRAVIS,11/03/1992,331568,10/30/1992,10/30/1992,6972,06/05/1936,13,DALLAS,DALLAS,11,7353,02/21/1940,-2.07,-0.45,-91.84,-22.36,09/04/2019,TX,48453,48,78767,13,5,9983,5510,0
3,BANK IOWA,NEW HAMPTON,IOWA,15736,606 WEST MILWAUKEE,206135,SM,223,13,CHICKASAW,04/07/2011,188024,03/31/2011,03/31/2011,16414,06/16/1937,11,KANSAS CITY,KANSAS CITY,7,668848,04/15/1940,1.09,0.75,13.58,9.09,09/04/2019,IA,19037,19,50659,11,4,9992,14521,0
4,Security State Bank,SPIRIT LAKE,IOWA,15368,1306 18TH STREET,38799,SM,223,13,DICKINSON,01/31/2003,26764,10/28/2002,10/28/2002,2615,01/01/1935,11,KANSAS CITY,KANSAS CITY,7,810142,03/24/1936,3.27,-0.05,46.44,-0.68,09/04/2019,IA,19059,19,51360,11,4,9695,15555,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27610,Troy Bank & Trust Company,TROY,ALABAMA,5598,1000 HIGHWAY 231 SOUTH,912723,NM,810,21,PIKE,09/24/2008,777333,07/01/2008,12/31/9999,107713,01/01/1906,5,ATLANTA,ATLANTA,6,134437,01/01/1934,0.96,0.96,8.07,8.07,09/04/2019,AL,01109,1,36081,5,5,3811,5598,0
27611,First State Bank,ELMORE CITY,OKLAHOMA,18924,206 S MAIN ST,12885,NM,0,21,GARVIN,03/19/2019,8788,12/31/2018,12/31/9999,1812,01/01/1903,13,DALLAS,DALLAS,10,116554,10/24/1963,2.88,2.88,21.56,21.56,09/04/2019,OK,40049,40,73433,13,5,12771,18924,0
27612,The Farmers National Bank of Canfield,CANFIELD,OHIO,6540,20 SOUTH BROAD STREET,2335841,N,810,3,MAHONING,08/31/2017,1959300,08/16/2017,12/31/9999,254487,02/17/1887,9,CHICAGO,CHICAGO,4,680813,01/01/1934,1.38,1.38,12.85,12.85,09/04/2019,OH,39099,39,44406,9,3,4377,6540,0
27613,Santa Cruz County Bank,SANTA CRUZ,CALIFORNIA,57591,75 RIVER STREET,668098,NM,520,21,SANTA CRUZ,09/11/2018,581734,09/04/2018,12/31/9999,71466,02/03/2004,14,SAN FRANCISCO,SAN FRANCISCO,12,3235410,02/03/2004,1.78,1.78,16.94,16.94,09/04/2019,CA,06087,6,95060,14,4,366199,57591,0


## Save to CSV and JSON

In [28]:
# Save to CSV and JSON

institutions_df.to_csv("../financial_instituttions_data/institutions_clean.csv", index=False, encoding='utf8')
locations_df.to_csv("../financial_instituttions_data/locations_clean.csv", index=False, encoding='utf8')


institutions_df.to_json("../financial_instituttions_data/institutions_clean.json", orient='columns')
locations_df.to_json("../financial_instituttions_data/locations_clean.json", orient='columns')


# Load

## Connect to postgresSQL database

In [29]:
# connect to local database 
engine = create_engine(f"postgresql://{secret.user_pass}@localhost:5432/banksDB")

In [30]:
engine.table_names()

[]

## Load csv to database

In [31]:
#  Use pandas to load csv converted DataFrame into database
institutions_df.to_sql(name='institutions', con=engine, if_exists='append', index=False)
locations_df.to_sql(name='locations', con=engine, if_exists='append', index=False)


## Query Database to confirm upload

In [32]:
# Confirm data has been added by querying the tables

pd.read_sql_query('select * from institutions', con=engine).head()

Unnamed: 0,NAME,CITY,STNAME,CERT,ADDRESS,ASSET,BKCLASS,CHANGEC1,CLCODE,COUNTY,DATEUPDT,DEP,EFFDATE,ENDEFYMD,EQ,ESTYMD,FDICDBS,FDICREGN,FDICSUPV,FED,FED_RSSD,INSDATE,ROA,ROAQ,ROE,ROEQ,RUNDATE,STALP,STCNTY,STNUM,ZIP,SUPRV_FD,OCCDIST,UNINUM,ULTCERT,TE01N528
0,The Western Saving Fund Society of Philadelphia,HAVERFORD,PENNSYLVANIA,15698,401 WEST LANCASTER AVENUE,0,SB,213,42,MONTGOMERY,04/07/1982,0,04/03/1982,04/03/1982,0,01/01/1847,2,NEW YORK,NEW YORK,3,897116,11/21/1939,0.0,0.0,0.0,0.0,09/04/2019,PA,42091,42,19041,2,1,9966,7946,0
1,Bank of Summerton,SUMMERTON,SOUTH CAROLINA,15716,MAIN STREET,5045,NM,223,21,CLARENDON,10/23/1985,4412,09/16/1985,09/16/1985,585,09/11/1934,5,ATLANTA,ATLANTA,5,577221,01/15/1940,2.08,3.42,19.09,30.52,09/04/2019,SC,45027,45,29148,5,1,9979,873,0
2,"First City, Texas - Austin, National Association",AUSTIN,TEXAS,15721,823 CONGRESS AVENUE,345518,N,211,3,TRAVIS,11/03/1992,331568,10/30/1992,10/30/1992,6972,06/05/1936,13,DALLAS,DALLAS,11,7353,02/21/1940,-2.07,-0.45,-91.84,-22.36,09/04/2019,TX,48453,48,78767,13,5,9983,5510,0
3,BANK IOWA,NEW HAMPTON,IOWA,15736,606 WEST MILWAUKEE,206135,SM,223,13,CHICKASAW,04/07/2011,188024,03/31/2011,03/31/2011,16414,06/16/1937,11,KANSAS CITY,KANSAS CITY,7,668848,04/15/1940,1.09,0.75,13.58,9.09,09/04/2019,IA,19037,19,50659,11,4,9992,14521,0
4,Security State Bank,SPIRIT LAKE,IOWA,15368,1306 18TH STREET,38799,SM,223,13,DICKINSON,01/31/2003,26764,10/28/2002,10/28/2002,2615,01/01/1935,11,KANSAS CITY,KANSAS CITY,7,810142,03/24/1936,3.27,-0.05,46.44,-0.68,09/04/2019,IA,19059,19,51360,11,4,9695,15555,0


In [33]:
# Confirm data has been added by querying the tables

pd.read_sql_query('select * from locations', con=engine).head()

Unnamed: 0,NAME,CITY,STNAME,ADDRESS,CBSA,CBSA_NO,CERT,COUNTY,FI_UNINUM,OFFNAME,OFFNUM,RUNDATE,SERVTYPE,STALP,STCNTY,UNINUM,ZIP
0,Old National Bank,EVANSVILLE,INDIANA,4500 WASHINGTON AVENUE,"Evansville, IN-KY",21780,3832,VANDERBURGH,2492,HEBRON PLACE BRANCH,8,09/04/2019,11,IN,18163,203604,47714
1,Regions Bank,VINCENNES,INDIANA,2202 NORTH SIXTH STREET,"Vincennes, IN",47180,12368,KNOX,7866,NORTH VINCENNES BRANCH,1636,09/04/2019,11,IN,18083,203651,47591
2,MutualBank,MITCHELL,INDIANA,628 WEST MAIN STREET,"Bedford, IN",13260,27744,LAWRENCE,40578,MESSENGER SERVICE BRANCH,144,09/04/2019,27,IN,18093,203628,47446
3,German American Bank,WASHINGTON,INDIANA,"17 CHERRY TREE PLAZA, US HIGHWAY 50 EAST","Washington, IN",47780,17393,DAVIESS,11392,WASHINGTON CHERRY TREE BRANCH,30,09/04/2019,11,IN,18027,203662,47501
4,U.S. Bank National Association,CEDAR RAPIDS,IOWA,"5321 COUNCIL STREET, N.E.","Cedar Rapids, IA",16300,6548,LINN,4383,CEDAR RAPIDS COUNCIL STREET BRANCH,568,09/04/2019,11,IA,19113,203680,52402


## Create a table view of institutions

In [34]:
# Create a table view of all bankruptcy data from 2009 to 2011

pd.read_sql_query('create or replace view banks_data as select * from institutions', con = engine)

ResourceClosedError: This result object does not return rows. It has been closed automatically.

## Query Table View

In [35]:
# Confirm data has been added by querying the tables

pd.read_sql_query('select * from banks_data', con=engine).head()

Unnamed: 0,NAME,CITY,STNAME,CERT,ADDRESS,ASSET,BKCLASS,CHANGEC1,CLCODE,COUNTY,DATEUPDT,DEP,EFFDATE,ENDEFYMD,EQ,ESTYMD,FDICDBS,FDICREGN,FDICSUPV,FED,FED_RSSD,INSDATE,ROA,ROAQ,ROE,ROEQ,RUNDATE,STALP,STCNTY,STNUM,ZIP,SUPRV_FD,OCCDIST,UNINUM,ULTCERT,TE01N528
0,The Western Saving Fund Society of Philadelphia,HAVERFORD,PENNSYLVANIA,15698,401 WEST LANCASTER AVENUE,0,SB,213,42,MONTGOMERY,04/07/1982,0,04/03/1982,04/03/1982,0,01/01/1847,2,NEW YORK,NEW YORK,3,897116,11/21/1939,0.0,0.0,0.0,0.0,09/04/2019,PA,42091,42,19041,2,1,9966,7946,0
1,Bank of Summerton,SUMMERTON,SOUTH CAROLINA,15716,MAIN STREET,5045,NM,223,21,CLARENDON,10/23/1985,4412,09/16/1985,09/16/1985,585,09/11/1934,5,ATLANTA,ATLANTA,5,577221,01/15/1940,2.08,3.42,19.09,30.52,09/04/2019,SC,45027,45,29148,5,1,9979,873,0
2,"First City, Texas - Austin, National Association",AUSTIN,TEXAS,15721,823 CONGRESS AVENUE,345518,N,211,3,TRAVIS,11/03/1992,331568,10/30/1992,10/30/1992,6972,06/05/1936,13,DALLAS,DALLAS,11,7353,02/21/1940,-2.07,-0.45,-91.84,-22.36,09/04/2019,TX,48453,48,78767,13,5,9983,5510,0
3,BANK IOWA,NEW HAMPTON,IOWA,15736,606 WEST MILWAUKEE,206135,SM,223,13,CHICKASAW,04/07/2011,188024,03/31/2011,03/31/2011,16414,06/16/1937,11,KANSAS CITY,KANSAS CITY,7,668848,04/15/1940,1.09,0.75,13.58,9.09,09/04/2019,IA,19037,19,50659,11,4,9992,14521,0
4,Security State Bank,SPIRIT LAKE,IOWA,15368,1306 18TH STREET,38799,SM,223,13,DICKINSON,01/31/2003,26764,10/28/2002,10/28/2002,2615,01/01/1935,11,KANSAS CITY,KANSAS CITY,7,810142,03/24/1936,3.27,-0.05,46.44,-0.68,09/04/2019,IA,19059,19,51360,11,4,9695,15555,0


# Questions ??