# Dependencies

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
import datetime

# Extract

## Load and Read CSV

In [2]:
# Load CSV files

institutions = "../financial_instituttions_data/institutions.csv"
locations = "../financial_instituttions_data/locations.csv"

# Read files and store into Pandas DataFrame
# Due to low memory error read https://stackoverflow.com/questions/24251219/pandas-read-csv-low-memory-and-dtype-options

institutions_df  = pd.read_csv(institutions,sep=',', error_bad_lines=False, index_col=False, dtype='unicode')
locations_df = pd.read_csv(locations,sep=',', error_bad_lines=False, index_col=False, dtype='unicode')


In [3]:
pd.set_option('display.max_columns', 500)
institutions_df.head()
locations_df.head()
locations_df.head()

Unnamed: 0,ADDRESS,BKCLASS,CBSA,CBSA_DIV,CBSA_DIV_FLG,CBSA_DIV_NO,CBSA_METRO,CBSA_METRO_FLG,CBSA_METRO_NAME,CBSA_MICRO_FLG,CBSA_NO,CERT,CITY,COUNTY,CSA,CSA_FLG,CSA_NO,ESTYMD,FI_UNINUM,MAINOFF,NAME,OFFNAME,OFFNUM,RUNDATE,SERVTYPE,STALP,STCNTY,STNAME,UNINUM,ZIP
0,4500 WASHINGTON AVENUE,N,"Evansville, IN-KY",0,0,0,21780,1,"Evansville, IN-KY",0,21780,3832,EVANSVILLE,VANDERBURGH,0,0,0,10/03/1963,2492,0,Old National Bank,HEBRON PLACE BRANCH,8,09/04/2019,11,IN,18163,INDIANA,203604,47714
1,2202 NORTH SIXTH STREET,SM,"Vincennes, IN",0,0,0,0,0,0,1,47180,12368,VINCENNES,KNOX,0,0,0,11/01/1971,7866,0,Regions Bank,NORTH VINCENNES BRANCH,1636,09/04/2019,11,IN,18083,INDIANA,203651,47591
2,628 WEST MAIN STREET,NM,"Bedford, IN",0,0,0,0,0,0,1,13260,27744,MITCHELL,LAWRENCE,"Bloomington-Bedford, IN",1,144,10/01/1995,40578,0,MutualBank,MESSENGER SERVICE BRANCH,144,09/04/2019,27,IN,18093,INDIANA,203628,47446
3,"17 CHERRY TREE PLAZA, US HIGHWAY 50 EAST",NM,"Washington, IN",0,0,0,0,0,0,1,47780,17393,WASHINGTON,DAVIESS,0,0,0,03/15/1989,11392,0,German American Bank,WASHINGTON CHERRY TREE BRANCH,30,09/04/2019,11,IN,18027,INDIANA,203662,47501
4,"5321 COUNCIL STREET, N.E.",N,"Cedar Rapids, IA",0,0,0,16300,1,"Cedar Rapids, IA",0,16300,6548,CEDAR RAPIDS,LINN,"Cedar Rapids-Iowa City, IA",1,168,01/16/1978,4383,0,U.S. Bank National Association,CEDAR RAPIDS COUNCIL STREET BRANCH,568,09/04/2019,11,IA,19113,IOWA,203680,52402


# Transform Locations DF

## Drop & rearrenge

In [4]:
# Print all columns names
print(locations_df.columns.values)

['ADDRESS' 'BKCLASS' 'CBSA' 'CBSA_DIV' 'CBSA_DIV_FLG' 'CBSA_DIV_NO'
 'CBSA_METRO' 'CBSA_METRO_FLG' 'CBSA_METRO_NAME' 'CBSA_MICRO_FLG'
 'CBSA_NO' 'CERT' 'CITY' 'COUNTY' 'CSA' 'CSA_FLG' 'CSA_NO' 'ESTYMD'
 'FI_UNINUM' 'MAINOFF' 'NAME' 'OFFNAME' 'OFFNUM' 'RUNDATE' 'SERVTYPE'
 'STALP' 'STCNTY' 'STNAME' 'UNINUM' 'ZIP']


In [5]:
locations_df = locations_df.drop(columns = ['BKCLASS','CBSA_DIV','CBSA_DIV_FLG','CBSA_DIV_NO',\
 'CBSA_METRO','CBSA_METRO_FLG','CBSA_METRO_NAME','CBSA_MICRO_FLG',\
   'CSA','CSA_FLG','CSA_NO','ESTYMD',\
  'MAINOFF'])

In [6]:
locations_df.head()

Unnamed: 0,ADDRESS,CBSA,CBSA_NO,CERT,CITY,COUNTY,FI_UNINUM,NAME,OFFNAME,OFFNUM,RUNDATE,SERVTYPE,STALP,STCNTY,STNAME,UNINUM,ZIP
0,4500 WASHINGTON AVENUE,"Evansville, IN-KY",21780,3832,EVANSVILLE,VANDERBURGH,2492,Old National Bank,HEBRON PLACE BRANCH,8,09/04/2019,11,IN,18163,INDIANA,203604,47714
1,2202 NORTH SIXTH STREET,"Vincennes, IN",47180,12368,VINCENNES,KNOX,7866,Regions Bank,NORTH VINCENNES BRANCH,1636,09/04/2019,11,IN,18083,INDIANA,203651,47591
2,628 WEST MAIN STREET,"Bedford, IN",13260,27744,MITCHELL,LAWRENCE,40578,MutualBank,MESSENGER SERVICE BRANCH,144,09/04/2019,27,IN,18093,INDIANA,203628,47446
3,"17 CHERRY TREE PLAZA, US HIGHWAY 50 EAST","Washington, IN",47780,17393,WASHINGTON,DAVIESS,11392,German American Bank,WASHINGTON CHERRY TREE BRANCH,30,09/04/2019,11,IN,18027,INDIANA,203662,47501
4,"5321 COUNCIL STREET, N.E.","Cedar Rapids, IA",16300,6548,CEDAR RAPIDS,LINN,4383,U.S. Bank National Association,CEDAR RAPIDS COUNCIL STREET BRANCH,568,09/04/2019,11,IA,19113,IOWA,203680,52402


In [7]:
# Rearrenge columns where I only care the first 3 

new_cols = ['NAME', 'CITY', 'STNAME'] + [c for c in locations_df.columns if c not in ['NAME', 'CITY', 'STNAME']]
                                    

# Re-index after rearrenging
locations_df = locations_df.reindex(columns = new_cols)
locations_df

Unnamed: 0,NAME,CITY,STNAME,ADDRESS,CBSA,CBSA_NO,CERT,COUNTY,FI_UNINUM,OFFNAME,OFFNUM,RUNDATE,SERVTYPE,STALP,STCNTY,UNINUM,ZIP
0,Old National Bank,EVANSVILLE,INDIANA,4500 WASHINGTON AVENUE,"Evansville, IN-KY",21780,3832,VANDERBURGH,2492,HEBRON PLACE BRANCH,8,09/04/2019,11,IN,18163,203604,47714
1,Regions Bank,VINCENNES,INDIANA,2202 NORTH SIXTH STREET,"Vincennes, IN",47180,12368,KNOX,7866,NORTH VINCENNES BRANCH,1636,09/04/2019,11,IN,18083,203651,47591
2,MutualBank,MITCHELL,INDIANA,628 WEST MAIN STREET,"Bedford, IN",13260,27744,LAWRENCE,40578,MESSENGER SERVICE BRANCH,144,09/04/2019,27,IN,18093,203628,47446
3,German American Bank,WASHINGTON,INDIANA,"17 CHERRY TREE PLAZA, US HIGHWAY 50 EAST","Washington, IN",47780,17393,DAVIESS,11392,WASHINGTON CHERRY TREE BRANCH,30,09/04/2019,11,IN,18027,203662,47501
4,U.S. Bank National Association,CEDAR RAPIDS,IOWA,"5321 COUNCIL STREET, N.E.","Cedar Rapids, IA",16300,6548,LINN,4383,CEDAR RAPIDS COUNCIL STREET BRANCH,568,09/04/2019,11,IA,19113,203680,52402
5,"Midwest Heritage Bank, FSB",WINDSOR HEIGHTS,IOWA,7101 UNIVERSITY,"Des Moines-West Des Moines, IA",19780,4432,POLK,2928,WINDSOR HEIGHTS BRANCH,5,09/04/2019,11,IA,19153,203686,50311
6,First Citizens Bank,LATIMER,IOWA,117 NORTH AKIR,0,0,4433,FRANKLIN,2929,LATIMER BRANCH,7,09/04/2019,11,IA,19069,203692,50452
7,Old National Bank,HENDERSON,KENTUCKY,301 SECOND STREET,"Evansville, IN-KY",21780,3832,HENDERSON,2492,301 SECOND STREET BRANCH,81,09/04/2019,11,KY,21101,203616,42420
8,Fifth Third Bank,TELL CITY,INDIANA,45 HIGHWAY 66 EAST,0,0,6672,PERRY,4470,TELL CITY PLAZA BRANCH,953,09/04/2019,11,IN,18123,203645,47586
9,Regions Bank,BICKNELL,INDIANA,104 W. 11TH STREET,"Vincennes, IN",47180,12368,KNOX,7866,BICKNELL BRANCH,1637,09/04/2019,11,IN,18083,203653,47512


# Transform Institutions DF

## Drop, fill, rearrenge

In [8]:
# Display DF
institutions_df

Unnamed: 0,STNAME,CERT,DOCKET,ACTIVE,ADDRESS,ASSET,BKCLASS,CHANGEC1,CHANGEC2,CHANGEC3,CHANGEC4,CHANGEC5,CHANGEC6,CHANGEC7,CHANGEC8,CHANGEC9,CHANGEC10,CHANGEC11,CHANGEC12,CHANGEC13,CHANGEC14,CHANGEC15,CHARTER,CHRTAGNT,CONSERVE,CITY,CLCODE,CMSA_NO,CMSA,COUNTY,DATEUPDT,DENOVO,DEP,EFFDATE,ENDEFYMD,EQ,ESTYMD,FDICDBS,FDICREGN,FDICSUPV,FED,FED_RSSD,FEDCHRTR,FLDOFF,IBA,INACTIVE,INSAGNT1,INSAGNT2,INSDATE,INSTCRCD,INSBIF,INSCOML,INSDIF,INSFDIC,INSSAIF,INSSAVE,MSA_NO,MSA,NAME,NEWCERT,OAKAR,OTSDIST,OTSREGNM,PROCDATE,QBPRCOML,REGAGNT,REPDTE,RISDATE,STCHRTR,ROA,ROAQ,ROE,ROEQ,RUNDATE,SASSER,LAW_SASSER_FLG,STALP,STCNTY,STNUM,ZIP,SUPRV_FD,OCCDIST,UNINUM,ULTCERT,CFPBEFFDTE,CFPBENDDTE,CFPBFLAG,REGAGENT2,TE01N528,TE02N528,TE03N528,TE04N528,TE05N528,TE06N528,TE07N528,TE08N528,TE09N528,TE10N528,TE01N529,TE02N529,TE03N529,TE04N529,TE05N529,TE06N529,WEBADDR,OFFICES,CERTCONS,PARCERT,CITYHCR,DEPDOM,FORM31,HCTMULT,INSTAG,MUTUAL,NAMEHCR,NETINC,NETINCQ,OFFDOM,OFFFOR,OFFOA,RSSDHCR,STALPHCR,STMULT,SUBCHAPS,ROAPTX,ROAPTXQ,TRUST,SPECGRP,SPECGRPN,TRACT,CSA,CSA_NO,CSA_FLG,CBSA,CBSA_NO,CBSA_METRO_NAME,CBSA_METRO,CBSA_METRO_FLG,CBSA_MICRO_FLG,CBSA_DIV,CBSA_DIV_NO,CBSA_DIV_FLG,CB
0,PENNSYLVANIA,15698,0,0,401 WEST LANCASTER AVENUE,,SB,213,,,,,,,,,,,,,,,0,STATE,N,HAVERFORD,42,77,"Philadelphia-Wilmington-Atlantic City, PA-NJ-D...",MONTGOMERY,04/07/1982,,,04/03/1982,04/03/1982,,01/01/1847,2,NEW YORK,NEW YORK,3,897116,0,PHILADELPHIA,0,1,BIF,,11/21/1939,0,1,0,,1,,0,6160,"Philadelphia, PA-NJ PMSA",The Western Saving Fund Society of Philadelphia,15750,0,1,NORTHEAST,04/07/1982,1,FDIC,,,1,,,,,09/04/2019,0,N,PA,42091,42,19041,2,1,9966,7946,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Error in Specialization Group,,"Philadelphia-Reading-Camden, PA-NJ-DE-MD",428,1,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",37980,"Philadelphia-Camden-Wilmington, PA-NJ-DE-MD",37980,1,0,"Montgomery County-Bucks County-Chester County, PA",33874,1,
1,SOUTH CAROLINA,15716,0,0,MAIN STREET,5045,NM,223,,,,,,,,,,,,,,,0,STATE,N,SUMMERTON,21,0,,CLARENDON,10/23/1985,0,4412,09/16/1985,09/16/1985,585,09/11/1934,5,ATLANTA,ATLANTA,5,577221,0,COLUMBIA SC,0,1,BIF,,01/15/1940,0,1,1,0,1,,0,0,,Bank of Summerton,2111,0,2,SOUTHEAST,10/23/1985,2,FDIC,06/30/1985,06/30/1985,1,2.08,3.42,19.09,30.52,09/04/2019,0,N,SC,45027,45,29148,5,1,9979,873,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,0,0,0,4412,0,0,0,0,0,52,43,1,0,0,0,0,0,0,2.45,4.05,1,7,Other Specialized Under 1 Billion,0,,,,,,0,0,,,,,,1
2,TEXAS,15721,0,0,823 CONGRESS AVENUE,345518,N,211,,,,,,,,,,,,,,,14728,OCC,N,AUSTIN,3,0,,TRAVIS,11/03/1992,0,331568,10/30/1992,10/30/1992,6972,06/05/1936,13,DALLAS,DALLAS,11,7353,1,AUSTIN,0,1,BIF,,02/21/1940,0,1,1,0,1,,0,640,"Austin-San Marcos, TX MSA","First City, Texas - Austin, National Association",33714,0,5,WEST,11/03/1992,5,OCC,09/30/1992,09/30/1992,0,-2.07,-0.45,-91.84,-22.36,09/04/2019,0,N,TX,48453,48,78767,13,5,9983,5510,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,0,0,HOUSTON,331568,0,1,0,0,"FIRST CITY BANCORPORATION OF TEXAS, INC.",-5930,-401,3,0,0,1249338,TX,0,0,-2.05,-0.44,1,4,Commercial Lending Specialization,0,0,0,0,"Austin-Round Rock, TX",12420,"Austin-Round Rock, TX",12420,1,0,0,0,0,0
3,IOWA,15736,0,0,606 WEST MILWAUKEE,206135,SM,223,,,,,,,,,,,,,,,0,STATE,N,NEW HAMPTON,13,0,,CHICKASAW,04/07/2011,0,188024,03/31/2011,03/31/2011,16414,06/16/1937,11,KANSAS CITY,KANSAS CITY,7,668848,0,CEDAR RAPIDS,0,1,DIF,,04/15/1940,0,0,1,1,1,,0,0,,BANK IOWA,14521,0,4,WESTERN,04/07/2011,4,FED,12/31/2010,12/31/2010,1,1.09,0.75,13.58,9.09,09/04/2019,0,N,IA,19037,19,50659,11,4,9992,14521,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,http://www.bankiowabanks.com/lawler,,0,0,WEST DES MOINES,188024,0,1,1,0,BANK IOWA CORPORATION,2193,378,4,0,0,1202762,IA,0,1,1.13,0.76,1,2,Agricultural Specialization,0,,,,,,0,0,,,,,,1
4,IOWA,15368,0,0,1306 18TH STREET,38799,SM,223,,,,,,,,,,,,,,,0,STATE,N,SPIRIT LAKE,13,0,,DICKINSON,01/31/2003,0,26764,10/28/2002,10/28/2002,2615,01/01/1935,11,KANSAS CITY,KANSAS CITY,7,810142,0,SIOUX CITY,0,1,BIF,,03/24/1936,0,1,1,0,1,,0,0,,Security State Bank,15555,0,4,WESTERN,01/31/2003,4,FED,09/30/2002,09/30/2002,1,3.27,-0.05,46.44,-0.68,09/04/2019,0,N,IA,19059,19,51360,11,4,9695,15555,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,WWW.CENTRALBANKFDIC.COM,,0,0,STORM LAKE,26764,0,1,0,0,COMMERCIAL FINANCIAL CORP,1059,-5,1,0,0,1978674,IA,0,0,5.44,0.71,1,4,Commercial Lending Specialization,0,0,0,0,"Spirit Lake, IA",44020,0,0,0,1,0,0,0,1
5,KANSAS,15385,0,0,MAIN STREET,23986,NM,223,,,,,,,,,,,,,,,0,STATE,N,EFFINGHAM,21,0,,ATCHISON,09/27/1999,0,21066,09/10/1999,09/10/1999,2469,06/05/1905,11,KANSAS CITY,KANSAS CITY,10,977456,0,KANSAS CITY,0,1,BIF,,05/14/1936,0,1,1,0,1,,0,0,,"The Farmers and Merchants State Bank, Effingha...",4619,0,4,WESTERN,09/27/1999,4,FDIC,06/30/1999,06/30/1999,1,1.37,1.41,13.21,13.57,09/04/2019,0,N,KS,20005,20,66023,11,4,9707,4619,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,0,0,ATCHISON,21066,0,1,1,0,EXCHANGE BANKSHARES CORPORATION OF KANSAS,165,85,1,0,0,1054000,KS,0,0,1.97,2.02,0,2,Agricultural Specialization,0,"Kansas City-Overland Park-Kansas City, MO-KS",312,1,"Atchison, KS",11860,0,0,0,1,0,0,0,1
6,FLORIDA,15391,0,0,114 NORTH J STREET,119732,N,223,,,,,,,,,,,,,,,14356,OCC,N,LAKE WORTH,3,0,,PALM BEACH,10/12/1979,0,108357,09/29/1979,09/29/1979,10183,06/01/1936,5,ATLANTA,ATLANTA,6,660936,1,SOUTH FLORIDA,0,1,BIF,,06/01/1936,0,1,1,0,1,,0,8960,"West Palm Beach-Boca Raton, FL MSA",First Marine National Bank and Trust Company o...,17018,0,2,SOUTHEAST,10/12/1979,2,OCC,06/30/1979,06/30/1979,0,0,0,0,0,09/04/2019,0,N,FL,12099,12,33460,5,5,9711,3510,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,0,0,0,108357,0,0,0,0,0,501,0,4,0,0,0,0,0,0,0,0,1,7,Other Specialized Under 1 Billion,0,"Miami-Fort Lauderdale-Port St. Lucie, FL",370,1,"Miami-Fort Lauderdale-West Palm Beach, FL",33100,"Miami-Fort Lauderdale-West Palm Beach, FL",33100,1,0,"West Palm Beach-Boca Raton-Delray Beach, FL",48424,1,0
7,NEBRASKA,15419,0,0,BAILEY STREET,16063,NM,223,,,,,,,,,,,,,,,0,STATE,N,STRATTON,21,0,,HITCHCOCK,10/07/2003,0,14031,09/08/2003,09/08/2003,1670,01/01/1887,11,KANSAS CITY,KANSAS CITY,10,658456,0,GRAND ISLAND,0,1,BIF,,01/01/1934,0,1,1,0,1,,0,0,,Commercial Bank,5434,0,5,WEST,10/07/2003,4,FDIC,06/30/2003,06/30/2003,1,0.82,0.69,8.07,6.78,09/04/2019,0,N,NE,31087,31,69043,11,4,9735,5434,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,0,0,STRATTON,14031,0,0,1,0,"STRATTON AGENCY, INC.",66,28,1,0,0,1057449,NE,0,0,1.07,0.91,0,2,Agricultural Specialization,0,,,,,,0,0,,,,,,1
8,WASHINGTON,15430,0,0,103 EAST WASHINGTON AVENUE,23870,NM,223,,,,,,,,,,,,,,,0,STATE,N,SEQUIM,21,0,,CLALLAM,08/27/1976,0,21434,08/24/1976,08/24/1976,1749,10/08/1936,14,SAN FRANCISCO,SAN FRANCISCO,12,76573,0,SEATTLE,0,1,BIF,,10/26/1936,0,1,1,0,1,,0,0,,Bank of Sequim,2970,0,5,WEST,08/27/1976,6,FDIC,06/30/1976,06/30/1976,1,0,0,0,0,09/04/2019,0,N,WA,53009,53,98382,14,4,9744,3510,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,,,0,0,0,21434,0,0,0,0,0,130,0,3,0,0,0,0,0,0,0,0,0,6,Consumer Lending Specialization,0,0,0,0,"Port Angeles, WA",38820,0,0,0,1,0,0,0,0
9,NORTH DAKOTA,15445,11636,0,"20 FIRST STREET, S.W.",415456,N,223,,,,,,,,,,,,,,,23297,OCC,N,MINOT,3,0,,WARD,09/30/2004,0,326056,08/23/2004,08/23/2004,26666,01/01/1934,11,KANSAS CITY,KANSAS CITY,9,760555,1,FARGO,0,1,BIF,,01/01/1934,0,1,1,0,1,,0,0,,"Bremer Bank, National Association",34380,0,5,WEST,09/30/2004,4,OCC,06/30/2004,06/30/2004,0,1.11,0.99,16.49,14.65,09/04/2019,0,N,ND,38101,38,58701,11,3,9757,12923,31-Dec-9999,31-Dec-9999,0,,,,,,,,,,,,,,,,,,www.bremer.com,,0,0,SAINT PAUL,326056,0,1,1,0,OTTO BREMER FOUNDATION,2301,1014,13,0,0,1121340,MN,1,0,1.65,1.45,1,2,Agricultural Specialization,0,0,0,0,"Minot, ND",33500,0,0,0,1,0,0,0,0


In [9]:
# Print all columns names
print(institutions_df.columns.values)

['STNAME' 'CERT' 'DOCKET' 'ACTIVE' 'ADDRESS' 'ASSET' 'BKCLASS' 'CHANGEC1'
 'CHANGEC2' 'CHANGEC3' 'CHANGEC4' 'CHANGEC5' 'CHANGEC6' 'CHANGEC7'
 'CHANGEC8' 'CHANGEC9' 'CHANGEC10' 'CHANGEC11' 'CHANGEC12' 'CHANGEC13'
 'CHANGEC14' 'CHANGEC15' 'CHARTER' 'CHRTAGNT' 'CONSERVE' 'CITY' 'CLCODE'
 'CMSA_NO' 'CMSA' 'COUNTY' 'DATEUPDT' 'DENOVO' 'DEP' 'EFFDATE' 'ENDEFYMD'
 'EQ' 'ESTYMD' 'FDICDBS' 'FDICREGN' 'FDICSUPV' 'FED' 'FED_RSSD' 'FEDCHRTR'
 'FLDOFF' 'IBA' 'INACTIVE' 'INSAGNT1' 'INSAGNT2' 'INSDATE' 'INSTCRCD'
 'INSBIF' 'INSCOML' 'INSDIF' 'INSFDIC' 'INSSAIF' 'INSSAVE' 'MSA_NO' 'MSA'
 'NAME' 'NEWCERT' 'OAKAR' 'OTSDIST' 'OTSREGNM' 'PROCDATE' 'QBPRCOML'
 'REGAGNT' 'REPDTE' 'RISDATE' 'STCHRTR' 'ROA' 'ROAQ' 'ROE' 'ROEQ'
 'RUNDATE' 'SASSER' 'LAW_SASSER_FLG' 'STALP' 'STCNTY' 'STNUM' 'ZIP'
 'SUPRV_FD' 'OCCDIST' 'UNINUM' 'ULTCERT' 'CFPBEFFDTE' 'CFPBENDDTE'
 'CFPBFLAG' 'REGAGENT2' 'TE01N528' 'TE02N528' 'TE03N528' 'TE04N528'
 'TE05N528' 'TE06N528' 'TE07N528' 'TE08N528' 'TE09N528' 'TE10N528'
 'TE01N529' 'TE02

In [10]:
# Drop unnessary columns
institutions_df.drop(columns = ['CMSA_NO','CMSA','DOCKET','ACTIVE','CHANGEC1','CHANGEC2','CHANGEC3','CHANGEC4','CHANGEC5','CHANGEC6',\
                                                  'CHANGEC7','CHANGEC8','CHANGEC9','CHANGEC10','CHANGEC11','CHANGEC12','CHANGEC13',\
                                                  'CHANGEC14','CHANGEC15','CHARTER','CHRTAGNT','CONSERVE','DENOVO','FEDCHRTR',\
                                                  'FLDOFF','IBA','INACTIVE','INSAGNT1','INSAGNT2','INSTCRCD','INSBIF','INSCOML',\
                                                  'INSDIF','INSFDIC','INSSAIF','INSSAVE','MSA_NO','MSA','NEWCERT','OAKAR','OTSDIST',\
                                                  'OTSREGNM','PROCDATE','QBPRCOML','REGAGNT','REPDTE','RISDATE','STCHRTR','SASSER',\
                                                  'LAW_SASSER_FLG','CFPBEFFDTE','CFPBENDDTE','CFPBFLAG','REGAGENT2'], inplace= True)


In [12]:
# Print to verify drops
print(institutions_df.columns.values)

['STNAME' 'CERT' 'ADDRESS' 'ASSET' 'BKCLASS' 'CITY' 'CLCODE' 'CMSA_NO'
 'CMSA' 'COUNTY' 'DATEUPDT' 'DEP' 'EFFDATE' 'ENDEFYMD' 'EQ' 'ESTYMD'
 'FDICDBS' 'FDICREGN' 'FDICSUPV' 'FED' 'FED_RSSD' 'INSDATE' 'NAME' 'ROA'
 'ROAQ' 'ROE' 'ROEQ' 'RUNDATE' 'STALP' 'STCNTY' 'STNUM' 'ZIP' 'SUPRV_FD'
 'OCCDIST' 'UNINUM' 'ULTCERT' 'TE01N528' 'TE02N528' 'TE03N528' 'TE04N528'
 'TE05N528' 'TE06N528' 'TE07N528' 'TE08N528' 'TE09N528' 'TE10N528'
 'TE01N529' 'TE02N529' 'TE03N529' 'TE04N529' 'TE05N529' 'TE06N529'
 'WEBADDR' 'OFFICES' 'CERTCONS' 'PARCERT' 'CITYHCR' 'DEPDOM' 'FORM31'
 'HCTMULT' 'INSTAG' 'MUTUAL' 'NAMEHCR' 'NETINC' 'NETINCQ' 'OFFDOM'
 'OFFFOR' 'OFFOA' 'RSSDHCR' 'STALPHCR' 'STMULT' 'SUBCHAPS' 'ROAPTX'
 'ROAPTXQ' 'TRUST' 'SPECGRP' 'SPECGRPN' 'TRACT' 'CSA' 'CSA_NO' 'CSA_FLG'
 'CBSA' 'CBSA_NO' 'CBSA_METRO_NAME' 'CBSA_METRO' 'CBSA_METRO_FLG'
 'CBSA_MICRO_FLG' 'CBSA_DIV' 'CBSA_DIV_NO' 'CBSA_DIV_FLG' 'CB']


In [13]:
# Delete more columns using https://stackoverflow.com/questions/28538536/deleting-multiple-columns-based-on-column-names-in-pandas

institutions_df.drop(institutions_df.columns[36:91], axis = 1, inplace = True)

In [14]:
# Replace NAN values with 0
institutions_df = institutions_df.replace(np.nan,0)
institutions_df.head()

Unnamed: 0,STNAME,CERT,ADDRESS,ASSET,BKCLASS,CITY,CLCODE,CMSA_NO,CMSA,COUNTY,DATEUPDT,DEP,EFFDATE,ENDEFYMD,EQ,ESTYMD,FDICDBS,FDICREGN,FDICSUPV,FED,FED_RSSD,INSDATE,NAME,ROA,ROAQ,ROE,ROEQ,RUNDATE,STALP,STCNTY,STNUM,ZIP,SUPRV_FD,OCCDIST,UNINUM,ULTCERT
0,PENNSYLVANIA,15698,401 WEST LANCASTER AVENUE,0,SB,HAVERFORD,42,77,"Philadelphia-Wilmington-Atlantic City, PA-NJ-D...",MONTGOMERY,04/07/1982,0,04/03/1982,04/03/1982,0,01/01/1847,2,NEW YORK,NEW YORK,3,897116,11/21/1939,The Western Saving Fund Society of Philadelphia,0.0,0.0,0.0,0.0,09/04/2019,PA,42091,42,19041,2,1,9966,7946
1,SOUTH CAROLINA,15716,MAIN STREET,5045,NM,SUMMERTON,21,0,0,CLARENDON,10/23/1985,4412,09/16/1985,09/16/1985,585,09/11/1934,5,ATLANTA,ATLANTA,5,577221,01/15/1940,Bank of Summerton,2.08,3.42,19.09,30.52,09/04/2019,SC,45027,45,29148,5,1,9979,873
2,TEXAS,15721,823 CONGRESS AVENUE,345518,N,AUSTIN,3,0,0,TRAVIS,11/03/1992,331568,10/30/1992,10/30/1992,6972,06/05/1936,13,DALLAS,DALLAS,11,7353,02/21/1940,"First City, Texas - Austin, National Association",-2.07,-0.45,-91.84,-22.36,09/04/2019,TX,48453,48,78767,13,5,9983,5510
3,IOWA,15736,606 WEST MILWAUKEE,206135,SM,NEW HAMPTON,13,0,0,CHICKASAW,04/07/2011,188024,03/31/2011,03/31/2011,16414,06/16/1937,11,KANSAS CITY,KANSAS CITY,7,668848,04/15/1940,BANK IOWA,1.09,0.75,13.58,9.09,09/04/2019,IA,19037,19,50659,11,4,9992,14521
4,IOWA,15368,1306 18TH STREET,38799,SM,SPIRIT LAKE,13,0,0,DICKINSON,01/31/2003,26764,10/28/2002,10/28/2002,2615,01/01/1935,11,KANSAS CITY,KANSAS CITY,7,810142,03/24/1936,Security State Bank,3.27,-0.05,46.44,-0.68,09/04/2019,IA,19059,19,51360,11,4,9695,15555


In [15]:
# Print to verify columns name
print(institutions_df.columns.values)

['STNAME' 'CERT' 'ADDRESS' 'ASSET' 'BKCLASS' 'CITY' 'CLCODE' 'CMSA_NO'
 'CMSA' 'COUNTY' 'DATEUPDT' 'DEP' 'EFFDATE' 'ENDEFYMD' 'EQ' 'ESTYMD'
 'FDICDBS' 'FDICREGN' 'FDICSUPV' 'FED' 'FED_RSSD' 'INSDATE' 'NAME' 'ROA'
 'ROAQ' 'ROE' 'ROEQ' 'RUNDATE' 'STALP' 'STCNTY' 'STNUM' 'ZIP' 'SUPRV_FD'
 'OCCDIST' 'UNINUM' 'ULTCERT']


In [16]:
# Rearrenge columns where I only care the first 3 

new_cols = ['NAME', 'CITY', 'STNAME'] + [c for c in institutions_df.columns if c not in ['NAME', 'CITY', 'STNAME']]
                                 
# Re-index after rearrenging
institutions_df = institutions_df.reindex(columns = new_cols)
institutions_df

Unnamed: 0,NAME,CITY,STNAME,CERT,ADDRESS,ASSET,BKCLASS,CLCODE,CMSA_NO,CMSA,COUNTY,DATEUPDT,DEP,EFFDATE,ENDEFYMD,EQ,ESTYMD,FDICDBS,FDICREGN,FDICSUPV,FED,FED_RSSD,INSDATE,ROA,ROAQ,ROE,ROEQ,RUNDATE,STALP,STCNTY,STNUM,ZIP,SUPRV_FD,OCCDIST,UNINUM,ULTCERT
0,The Western Saving Fund Society of Philadelphia,HAVERFORD,PENNSYLVANIA,15698,401 WEST LANCASTER AVENUE,0,SB,42,77,"Philadelphia-Wilmington-Atlantic City, PA-NJ-D...",MONTGOMERY,04/07/1982,0,04/03/1982,04/03/1982,0,01/01/1847,2,NEW YORK,NEW YORK,3,897116,11/21/1939,0,0,0,0,09/04/2019,PA,42091,42,19041,2,1,9966,7946
1,Bank of Summerton,SUMMERTON,SOUTH CAROLINA,15716,MAIN STREET,5045,NM,21,0,0,CLARENDON,10/23/1985,4412,09/16/1985,09/16/1985,585,09/11/1934,5,ATLANTA,ATLANTA,5,577221,01/15/1940,2.08,3.42,19.09,30.52,09/04/2019,SC,45027,45,29148,5,1,9979,873
2,"First City, Texas - Austin, National Association",AUSTIN,TEXAS,15721,823 CONGRESS AVENUE,345518,N,3,0,0,TRAVIS,11/03/1992,331568,10/30/1992,10/30/1992,6972,06/05/1936,13,DALLAS,DALLAS,11,7353,02/21/1940,-2.07,-0.45,-91.84,-22.36,09/04/2019,TX,48453,48,78767,13,5,9983,5510
3,BANK IOWA,NEW HAMPTON,IOWA,15736,606 WEST MILWAUKEE,206135,SM,13,0,0,CHICKASAW,04/07/2011,188024,03/31/2011,03/31/2011,16414,06/16/1937,11,KANSAS CITY,KANSAS CITY,7,668848,04/15/1940,1.09,0.75,13.58,9.09,09/04/2019,IA,19037,19,50659,11,4,9992,14521
4,Security State Bank,SPIRIT LAKE,IOWA,15368,1306 18TH STREET,38799,SM,13,0,0,DICKINSON,01/31/2003,26764,10/28/2002,10/28/2002,2615,01/01/1935,11,KANSAS CITY,KANSAS CITY,7,810142,03/24/1936,3.27,-0.05,46.44,-0.68,09/04/2019,IA,19059,19,51360,11,4,9695,15555
5,"The Farmers and Merchants State Bank, Effingha...",EFFINGHAM,KANSAS,15385,MAIN STREET,23986,NM,21,0,0,ATCHISON,09/27/1999,21066,09/10/1999,09/10/1999,2469,06/05/1905,11,KANSAS CITY,KANSAS CITY,10,977456,05/14/1936,1.37,1.41,13.21,13.57,09/04/2019,KS,20005,20,66023,11,4,9707,4619
6,First Marine National Bank and Trust Company o...,LAKE WORTH,FLORIDA,15391,114 NORTH J STREET,119732,N,3,0,0,PALM BEACH,10/12/1979,108357,09/29/1979,09/29/1979,10183,06/01/1936,5,ATLANTA,ATLANTA,6,660936,06/01/1936,0,0,0,0,09/04/2019,FL,12099,12,33460,5,5,9711,3510
7,Commercial Bank,STRATTON,NEBRASKA,15419,BAILEY STREET,16063,NM,21,0,0,HITCHCOCK,10/07/2003,14031,09/08/2003,09/08/2003,1670,01/01/1887,11,KANSAS CITY,KANSAS CITY,10,658456,01/01/1934,0.82,0.69,8.07,6.78,09/04/2019,NE,31087,31,69043,11,4,9735,5434
8,Bank of Sequim,SEQUIM,WASHINGTON,15430,103 EAST WASHINGTON AVENUE,23870,NM,21,0,0,CLALLAM,08/27/1976,21434,08/24/1976,08/24/1976,1749,10/08/1936,14,SAN FRANCISCO,SAN FRANCISCO,12,76573,10/26/1936,0,0,0,0,09/04/2019,WA,53009,53,98382,14,4,9744,3510
9,"Bremer Bank, National Association",MINOT,NORTH DAKOTA,15445,"20 FIRST STREET, S.W.",415456,N,3,0,0,WARD,09/30/2004,326056,08/23/2004,08/23/2004,26666,01/01/1934,11,KANSAS CITY,KANSAS CITY,9,760555,01/01/1934,1.11,0.99,16.49,14.65,09/04/2019,ND,38101,38,58701,11,3,9757,12923


## Save to CSV and JSON

In [18]:
# Save to CSV and JSON

institutions_df.to_csv("../financial_instituttions_data/institutions_clean.csv", index=False, encoding='utf8')
locations_df.to_csv("../financial_instituttions_data/locations_clean.csv", index=False, encoding='utf8')


institutions_df.to_json("../financial_instituttions_data/institutions_clean.json", orient='columns')
locations_df.to_json("../financial_instituttions_data/locations_clean.json", orient='columns')
