In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime
import itertools
import os

### Read and Combine the txt files

In [4]:
# Path to the folder containing the text files
folder_path = r"/content/drive/MyDrive/CS_Finance_Data"

# List all files in the folder
file_list = [f for f in os.listdir(folder_path) if f.endswith('.txt')] ## List Comprehension

# Initialize an empty list to store DataFrames
dataframes = []

# Read each file and append the DataFrame to the list
for file_name in file_list:
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_csv(file_path, sep='^', header=0, engine='python', encoding='latin1', on_bad_lines='skip')
    dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dataframes, ignore_index=True)

# Display the first few rows of the combined DataFrame
combined_df.head()

Unnamed: 0,RSSD9001,RSSD9999,RSSD9007,RSSD9008,RSSD9132,RSSD9032,RSSD9146,BHBC3368,BHBC3402,BHBC3516,...,BHCANC99,BHCKFT42,BHCKFT43,BHCKFT44,BHSPFT42,BHSPFT43,BHSPFT44,BHSPMZ36,BHSPNK60,BHCKMG95
0,1020180,20041231,20040823,20051118,551111,9,10.0,0.0,0.0,0.0,...,,,,,,,,,,
1,1020201,20041231,20001028,20051230,551111,2,1.0,0.0,0.0,0.0,...,,,,,,,,,,
2,1020340,20041231,20041230,20050527,551111,7,30.0,,,,...,,,,,,,,,,
3,1020395,20041231,20040211,20081230,551111,6,1.0,,,,...,,,,,,,,,,
4,1020582,20041231,19980101,20060228,551111,7,1.0,,,,...,,,,,,,,,,


### Subset some columns:

In [18]:
smalldf = combined_df[['RSSD9001','RSSD9017', 'RSSD9032', 'RSSD9146', 'RSSD9999', 'BHCK4107', 'BHCK4074', 'BHCK2170']]
smalldf.head()

Unnamed: 0,RSSD9001,RSSD9017,RSSD9032,RSSD9146,RSSD9999,BHCK4107,BHCK4074,BHCK2170
0,1020180,BREMER FINANCIAL CORPORATION,9,10.0,20041231,291916.0,203077.0,6141519.0
1,1020201,HSBC USA INC.,2,1.0,20041231,4078746.0,2741737.0,141049972.0
2,1020340,"HARRIS BANKCORP, INC.",7,30.0,20041231,1179568.0,832167.0,34389852.0
3,1020395,SOUTHERN NATIONAL CORPORATION,6,1.0,20041231,12248.0,7630.0,226549.0
4,1020582,"WCN BANCORP, INC.",7,1.0,20041231,11035.0,8331.0,250744.0


In [19]:
name_dict = {'RSSD9001':'RSSD ID',
             'RSSD9017':'Firm Legal Name',
             'RSSD9032':'FR District Code',
             'RSSD9146':'Bank Count',
             'RSSD9999':'Reporting Date',
             'BHCK4107':'Interest Income',
             'BHCK4074':'Net Interest Income',
             'BHCK2170':'Total Assets',
             }
smalldf = smalldf.rename(columns = name_dict)
print(smalldf.shape)
smalldf.head()

(243757, 8)


Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets
0,1020180,BREMER FINANCIAL CORPORATION,9,10.0,20041231,291916.0,203077.0,6141519.0
1,1020201,HSBC USA INC.,2,1.0,20041231,4078746.0,2741737.0,141049972.0
2,1020340,"HARRIS BANKCORP, INC.",7,30.0,20041231,1179568.0,832167.0,34389852.0
3,1020395,SOUTHERN NATIONAL CORPORATION,6,1.0,20041231,12248.0,7630.0,226549.0
4,1020582,"WCN BANCORP, INC.",7,1.0,20041231,11035.0,8331.0,250744.0


In [20]:
smalldf['Reporting Date'] = pd.to_datetime(smalldf['Reporting Date'], format = "%Y%m%d")

### Define Quarters and Years


In [21]:
# Define Quarter

smalldf['Quarter'] = smalldf['Reporting Date'].dt.quarter
# smalldf.head()

In [22]:
# Define Year

smalldf['Year'] = smalldf['Reporting Date'].dt.year
smalldf.head()

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets,Quarter,Year
0,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,291916.0,203077.0,6141519.0,4,2004
1,1020201,HSBC USA INC.,2,1.0,2004-12-31,4078746.0,2741737.0,141049972.0,4,2004
2,1020340,"HARRIS BANKCORP, INC.",7,30.0,2004-12-31,1179568.0,832167.0,34389852.0,4,2004
3,1020395,SOUTHERN NATIONAL CORPORATION,6,1.0,2004-12-31,12248.0,7630.0,226549.0,4,2004
4,1020582,"WCN BANCORP, INC.",7,1.0,2004-12-31,11035.0,8331.0,250744.0,4,2004


## Quaterization

In [23]:
def quarterize(df, cols_to_process, dt_column):
  """
  Function to quarterizes the columns of a dataframe.
    df: has to be sorted by year and quarters
    cols_to_process: list of columns to quarterize
    dt_column: string
              column name of the date column

  """
  # indices is considered to be default
  fq_ids = df[(df[dt_column].dt.month == 3) & (df[dt_column].dt.day == 31)].index
  for col in cols_to_process:
    df[f"Q{col}"] = df[col].diff()
    df.loc[fq_ids, f"Q{col}"] = df.loc[fq_ids, col]
  return df

In [24]:
ls_dfs = []
for bank in smalldf['RSSD ID'].unique():
  OneBank = smalldf[smalldf["RSSD ID"] == bank].sort_values(by=['Reporting Date'])
  tmp_df = quarterize(OneBank, ["Interest Income", "Net Interest Income"], dt_column="Reporting Date")
  ls_dfs.append(tmp_df)

Quarterized_dfs = pd.concat(ls_dfs, ignore_index=True)
Quarterized_dfs.head(10)

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets,Quarter,Year,QInterest Income,QNet Interest Income
0,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-03-31,68233.0,46844.0,5721282.0,1,2004,68233.0,46844.0
1,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-06-30,138086.0,95452.0,5870480.0,2,2004,69853.0,48608.0
2,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-09-30,212489.0,147314.0,5963700.0,3,2004,74403.0,51862.0
3,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,291916.0,203077.0,6141519.0,4,2004,79427.0,55763.0
4,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-03-31,81656.0,54637.0,6230236.0,1,2005,81656.0,54637.0
5,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-06-30,170175.0,110844.0,6450716.0,2,2005,88519.0,56207.0
6,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-09-30,265290.0,169488.0,6336699.0,3,2005,95115.0,58644.0
7,1020180,BREMER FINANCIAL CORPORATION,9,9.0,2005-12-31,362347.0,227516.0,6555895.0,4,2005,97057.0,58028.0
8,1020180,BREMER FINANCIAL CORPORATION,9,9.0,2006-03-31,100170.0,56954.0,6480680.0,1,2006,100170.0,56954.0
9,1020180,BREMER FINANCIAL CORPORATION,9,9.0,2006-06-30,206822.0,115120.0,6638952.0,2,2006,106652.0,58166.0


### Normalization

In [25]:
# Normalize Components
Quarterized_dfs['Normalized Interest Income per Quarter'] = Quarterized_dfs['QInterest Income']/Quarterized_dfs['Total Assets']

Quarterized_dfs['Normalized Net Interest Income per Quarter'] = Quarterized_dfs['QNet Interest Income']/Quarterized_dfs['Total Assets']

Quarterized_dfs.head()

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets,Quarter,Year,QInterest Income,QNet Interest Income,Normalized Interest Income per Quarter,Normalized Net Interest Income per Quarter
0,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-03-31,68233.0,46844.0,5721282.0,1,2004,68233.0,46844.0,0.011926,0.008188
1,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-06-30,138086.0,95452.0,5870480.0,2,2004,69853.0,48608.0,0.011899,0.00828
2,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-09-30,212489.0,147314.0,5963700.0,3,2004,74403.0,51862.0,0.012476,0.008696
3,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,291916.0,203077.0,6141519.0,4,2004,79427.0,55763.0,0.012933,0.00908
4,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-03-31,81656.0,54637.0,6230236.0,1,2005,81656.0,54637.0,0.013106,0.00877


### We bring our Macroeconomic Variables

In [28]:
gdp_df = pd.read_csv(r"/content/drive/MyDrive/CS_Finance_Data/MacroEconomic_Variables/GDP.csv", parse_dates=True)
gdp_df['DATE'] = pd.to_datetime(gdp_df['DATE'])
print(gdp_df.shape)
gdp_df.head()

(81, 2)


Unnamed: 0,DATE,GDP
0,2004-01-01,11923.447
1,2004-04-01,12112.815
2,2004-07-01,12305.307
3,2004-10-01,12527.214
4,2005-01-01,12767.286


In [29]:
uner_df = pd.read_csv(r"/content/drive/MyDrive/CS_Finance_Data/MacroEconomic_Variables/UNRATE.csv")
uner_df['DATE'] = pd.to_datetime(uner_df['DATE'])
print(uner_df.shape)
uner_df.head()

(82, 2)


Unnamed: 0,DATE,UNRATE
0,2004-01-01,5.7
1,2004-04-01,5.6
2,2004-07-01,5.433333
3,2004-10-01,5.433333
4,2005-01-01,5.3


In [30]:
df_gdp_uner = pd.merge(gdp_df,uner_df,on='DATE')
print(df_gdp_uner.shape)
df_gdp_uner.head()

(81, 3)


Unnamed: 0,DATE,GDP,UNRATE
0,2004-01-01,11923.447,5.7
1,2004-04-01,12112.815,5.6
2,2004-07-01,12305.307,5.433333
3,2004-10-01,12527.214,5.433333
4,2005-01-01,12767.286,5.3


In [31]:
df_gdp_uner.rename(columns={'DATE': 'date'}, inplace=True)

In [33]:
def update_quart_enddate_shift(df):
    # Define a dictionary to map the month to the new date
    quarter_end_dates = {1: (12, 31), 4: (3, 31), 7: (6, 30), 10: (9, 30)}

    # Apply the mapping to update the dates
    for month, (new_month, new_day) in quarter_end_dates.items():
        df.loc[df['date'].dt.month == month, 'date'] = df['date'].apply(
            lambda x: x.replace(month=new_month, day=new_day) if x.month == month else x
        )

    # offset the years by 1 for those whose month is december
    tmp_ids = df[df["date"].dt.month == 12].index
    df.loc[tmp_ids, "date"] = df.loc[tmp_ids, "date"] - pd.DateOffset(years=1)

    # drop the first row df_gdp_uner
    df.drop(df.index[0], inplace=True)
    df.reset_index(drop=True, inplace=True)

    return df

In [34]:
df_gdp_uner =  update_quart_enddate_shift(df_gdp_uner)
df_gdp_uner.head()

Unnamed: 0,date,GDP,UNRATE
0,2004-03-31,12112.815,5.6
1,2004-06-30,12305.307,5.433333
2,2004-09-30,12527.214,5.433333
3,2004-12-31,12767.286,5.3
4,2005-03-31,12922.656,5.1


### US Department of Treasury Dataset

In [41]:
rates_df = pd.read_csv(r"/content/drive/MyDrive/CS_Finance_Data/MacroEconomic_Variables/yield-curve-rates-2004-2024.csv", encoding='latin1')
rates_df.head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,07/15/2024,5.48,5.51,5.43,5.4,5.23,4.85,4.44,4.23,4.13,4.16,4.23,4.56,4.46
1,07/12/2024,5.47,5.52,5.43,5.41,5.23,4.87,4.45,4.22,4.1,4.13,4.18,4.5,4.39
2,07/11/2024,5.48,5.53,5.44,5.41,5.25,4.91,4.5,4.26,4.13,4.15,4.2,4.51,4.41
3,07/10/2024,5.46,5.5,5.46,5.46,5.33,5.01,4.62,4.38,4.24,4.24,4.28,4.58,4.47
4,07/09/2024,5.45,5.51,5.46,5.46,5.34,5.02,4.62,4.37,4.24,4.25,4.3,4.59,4.49


In [42]:
# Changing Date to Date-time Format
rates_df['Date'] = pd.to_datetime(rates_df['Date'].astype(str), format='mixed', utc=True)
rates_df.head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,2024-07-15 00:00:00+00:00,5.48,5.51,5.43,5.4,5.23,4.85,4.44,4.23,4.13,4.16,4.23,4.56,4.46
1,2024-07-12 00:00:00+00:00,5.47,5.52,5.43,5.41,5.23,4.87,4.45,4.22,4.1,4.13,4.18,4.5,4.39
2,2024-07-11 00:00:00+00:00,5.48,5.53,5.44,5.41,5.25,4.91,4.5,4.26,4.13,4.15,4.2,4.51,4.41
3,2024-07-10 00:00:00+00:00,5.46,5.5,5.46,5.46,5.33,5.01,4.62,4.38,4.24,4.24,4.28,4.58,4.47
4,2024-07-09 00:00:00+00:00,5.45,5.51,5.46,5.46,5.34,5.02,4.62,4.37,4.24,4.25,4.3,4.59,4.49


In [43]:
# Giving the Year and Quarter each entry
rates_df['Year'] = rates_df['Date'].dt.year
rates_df['Quarter'] = rates_df['Date'].dt.quarter
rates_df.sort_values('Date').head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,Year,Quarter
5138,2004-01-02 00:00:00+00:00,0.88,,0.93,,1.02,1.31,1.94,2.47,3.36,3.9,4.38,5.21,,2004,1
5137,2004-01-05 00:00:00+00:00,0.88,,0.91,,1.05,1.35,1.95,2.51,3.39,3.92,4.41,5.23,,2004,1
5136,2004-01-06 00:00:00+00:00,0.88,,0.91,,1.03,1.3,1.84,2.38,3.26,3.8,4.29,5.13,,2004,1
5135,2004-01-07 00:00:00+00:00,0.88,,0.91,,1.02,1.29,1.84,2.36,3.25,3.76,4.27,5.11,,2004,1
5134,2004-01-08 00:00:00+00:00,0.87,,0.88,,1.01,1.29,1.85,2.37,3.24,3.76,4.27,5.12,,2004,1


In [44]:
finalrates = rates_df.groupby(['Year', 'Quarter']).mean()
finalrates = finalrates.drop(columns=["Date"])
finalrates.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
Year,Quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2004,1,0.909677,,0.93371,,1.003548,1.221452,1.686613,2.163065,2.980968,3.501452,4.01,4.877903,
2004,2,0.954516,,1.096129,,1.359677,1.775484,2.454677,2.971935,3.720968,4.183871,4.597097,5.355645,
2004,3,1.37,,1.513906,,1.788437,2.075,2.556563,2.919531,3.505625,3.922188,4.301406,5.068281,
2004,4,1.841774,,2.047419,,2.297581,2.472742,2.822419,3.058871,3.49371,3.855,4.175806,4.874839,
2005,1,2.360984,,2.58918,,2.876885,3.072459,3.454918,3.623934,3.89082,4.098852,4.303607,4.764098,
2005,2,2.709687,,2.926719,,3.179688,3.337344,3.645938,3.732344,3.872969,3.984688,4.158594,4.54875,
2005,3,3.228594,,3.438281,,3.707031,3.791094,3.957969,3.989219,4.039375,4.108437,4.215156,4.507656,
2005,4,3.702131,,3.913607,,4.250492,4.288525,4.36459,4.372295,4.391148,4.425738,4.488197,4.767541,
2006,1,4.35871,,4.506613,,4.657258,4.640323,4.604677,4.582581,4.552419,4.555484,4.577097,4.774032,4.663056
2006,2,4.674921,,4.830159,,5.033651,5.021429,4.997937,4.987302,4.993175,5.018095,5.072698,5.290635,5.143016


In [46]:
finalrates_df = finalrates.copy()
finalrates_df.reset_index(inplace=True)
finalrates_df.head()

Unnamed: 0,Year,Quarter,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,2004,1,0.909677,,0.93371,,1.003548,1.221452,1.686613,2.163065,2.980968,3.501452,4.01,4.877903,
1,2004,2,0.954516,,1.096129,,1.359677,1.775484,2.454677,2.971935,3.720968,4.183871,4.597097,5.355645,
2,2004,3,1.37,,1.513906,,1.788437,2.075,2.556563,2.919531,3.505625,3.922188,4.301406,5.068281,
3,2004,4,1.841774,,2.047419,,2.297581,2.472742,2.822419,3.058871,3.49371,3.855,4.175806,4.874839,
4,2005,1,2.360984,,2.58918,,2.876885,3.072459,3.454918,3.623934,3.89082,4.098852,4.303607,4.764098,


In [56]:
cmev = pd.concat([finalrates_df, df_gdp_uner], axis = 1)
cmev.drop(columns=['Year', 'Quarter'], inplace=True)
cmev.head()

Unnamed: 0,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,date,GDP,UNRATE
0,0.909677,,0.93371,,1.003548,1.221452,1.686613,2.163065,2.980968,3.501452,4.01,4.877903,,2004-03-31,12112.815,5.6
1,0.954516,,1.096129,,1.359677,1.775484,2.454677,2.971935,3.720968,4.183871,4.597097,5.355645,,2004-06-30,12305.307,5.433333
2,1.37,,1.513906,,1.788437,2.075,2.556563,2.919531,3.505625,3.922188,4.301406,5.068281,,2004-09-30,12527.214,5.433333
3,1.841774,,2.047419,,2.297581,2.472742,2.822419,3.058871,3.49371,3.855,4.175806,4.874839,,2004-12-31,12767.286,5.3
4,2.360984,,2.58918,,2.876885,3.072459,3.454918,3.623934,3.89082,4.098852,4.303607,4.764098,,2005-03-31,12922.656,5.1


In [57]:
Quarterized_dfs.head()

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets,Quarter,Year,QInterest Income,QNet Interest Income,Normalized Interest Income per Quarter,Normalized Net Interest Income per Quarter
0,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-03-31,68233.0,46844.0,5721282.0,1,2004,68233.0,46844.0,0.011926,0.008188
1,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-06-30,138086.0,95452.0,5870480.0,2,2004,69853.0,48608.0,0.011899,0.00828
2,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-09-30,212489.0,147314.0,5963700.0,3,2004,74403.0,51862.0,0.012476,0.008696
3,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,291916.0,203077.0,6141519.0,4,2004,79427.0,55763.0,0.012933,0.00908
4,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-03-31,81656.0,54637.0,6230236.0,1,2005,81656.0,54637.0,0.013106,0.00877


In [58]:
# selecting some columns of Quater
small_Quaterized_dfs = Quarterized_dfs[['RSSD ID', 'Firm Legal Name', 'Bank Count', 'Reporting Date', 'Year', 'Quarter',
                                        'Normalized Interest Income per Quarter', 'Normalized Net Interest Income per Quarter']]

small_Quaterized_dfs.head()

Unnamed: 0,RSSD ID,Firm Legal Name,Bank Count,Reporting Date,Year,Quarter,Normalized Interest Income per Quarter,Normalized Net Interest Income per Quarter
0,1020180,BREMER FINANCIAL CORPORATION,11.0,2004-03-31,2004,1,0.011926,0.008188
1,1020180,BREMER FINANCIAL CORPORATION,11.0,2004-06-30,2004,2,0.011899,0.00828
2,1020180,BREMER FINANCIAL CORPORATION,10.0,2004-09-30,2004,3,0.012476,0.008696
3,1020180,BREMER FINANCIAL CORPORATION,10.0,2004-12-31,2004,4,0.012933,0.00908
4,1020180,BREMER FINANCIAL CORPORATION,10.0,2005-03-31,2005,1,0.013106,0.00877


In [65]:
df = small_Quaterized_dfs.merge(cmev, left_on='Reporting Date', right_on='date', how='left')
df.drop(columns=['date'], inplace=True)
df.head()

Unnamed: 0,RSSD ID,Firm Legal Name,Bank Count,Reporting Date,Year,Quarter,Normalized Interest Income per Quarter,Normalized Net Interest Income per Quarter,1 Mo,2 Mo,...,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,GDP,UNRATE
0,1020180,BREMER FINANCIAL CORPORATION,11.0,2004-03-31,2004,1,0.011926,0.008188,0.909677,,...,1.221452,1.686613,2.163065,2.980968,3.501452,4.01,4.877903,,12112.815,5.6
1,1020180,BREMER FINANCIAL CORPORATION,11.0,2004-06-30,2004,2,0.011899,0.00828,0.954516,,...,1.775484,2.454677,2.971935,3.720968,4.183871,4.597097,5.355645,,12305.307,5.433333
2,1020180,BREMER FINANCIAL CORPORATION,10.0,2004-09-30,2004,3,0.012476,0.008696,1.37,,...,2.075,2.556563,2.919531,3.505625,3.922188,4.301406,5.068281,,12527.214,5.433333
3,1020180,BREMER FINANCIAL CORPORATION,10.0,2004-12-31,2004,4,0.012933,0.00908,1.841774,,...,2.472742,2.822419,3.058871,3.49371,3.855,4.175806,4.874839,,12767.286,5.3
4,1020180,BREMER FINANCIAL CORPORATION,10.0,2005-03-31,2005,1,0.013106,0.00877,2.360984,,...,3.072459,3.454918,3.623934,3.89082,4.098852,4.303607,4.764098,,12922.656,5.1


In [64]:
df.shape

(243757, 23)

In [66]:
df.isnull().sum()

RSSD ID                                            0
Firm Legal Name                                    0
Bank Count                                      9119
Reporting Date                                     0
Year                                               0
Quarter                                            0
Normalized Interest Income per Quarter        170337
Normalized Net Interest Income per Quarter    170340
1 Mo                                             473
2 Mo                                          194565
3 Mo                                             473
4 Mo                                          231244
6 Mo                                             473
1 Yr                                             473
2 Yr                                             473
3 Yr                                             473
5 Yr                                             473
7 Yr                                             473
10 Yr                                         