In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime
import itertools
import os

### Read and Combine the txt files

In [None]:
# Path to the folder containing the text files
folder_path = r"/content/drive/MyDrive/CS_Finance_Data"

# List all files in the folder
file_list = [f for f in os.listdir(folder_path) if f.endswith('.txt')] ## List Comprehension

# Initialize an empty list to store DataFrames
dataframes = []

# Read each file and append the DataFrame to the list
for file_name in file_list:
    file_path = os.path.join(folder_path, file_name)
    df = pd.read_csv(file_path, sep='^', header=0, engine='python', encoding='latin1', on_bad_lines='skip')
    dataframes.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(dataframes, ignore_index=True)

# Display the first few rows of the combined DataFrame
combined_df.head()

Unnamed: 0,RSSD9001,RSSD9999,RSSD9007,RSSD9008,RSSD9132,RSSD9032,RSSD9146,BHBC3368,BHBC3402,BHBC3516,...,BHCANC99,BHCKFT42,BHCKFT43,BHCKFT44,BHSPFT42,BHSPFT43,BHSPFT44,BHSPMZ36,BHSPNK60,BHCKMG95
0,1020180,20041231,20040823,20051118,551111,9,10.0,0.0,0.0,0.0,...,,,,,,,,,,
1,1020201,20041231,20001028,20051230,551111,2,1.0,0.0,0.0,0.0,...,,,,,,,,,,
2,1020340,20041231,20041230,20050527,551111,7,30.0,,,,...,,,,,,,,,,
3,1020395,20041231,20040211,20081230,551111,6,1.0,,,,...,,,,,,,,,,
4,1020582,20041231,19980101,20060228,551111,7,1.0,,,,...,,,,,,,,,,


### Subset of columns:

In [None]:
smalldf = combined_df[['RSSD9001','RSSD9017', 'RSSD9032', 'RSSD9146', 'RSSD9999', 'BHCK4107', 'BHCK4074', 'BHCK2170']]
smalldf.head()

Unnamed: 0,RSSD9001,RSSD9017,RSSD9032,RSSD9146,RSSD9999,BHCK4107,BHCK4074,BHCK2170
0,1020180,BREMER FINANCIAL CORPORATION,9,10.0,20041231,291916.0,203077.0,6141519.0
1,1020201,HSBC USA INC.,2,1.0,20041231,4078746.0,2741737.0,141049972.0
2,1020340,"HARRIS BANKCORP, INC.",7,30.0,20041231,1179568.0,832167.0,34389852.0
3,1020395,SOUTHERN NATIONAL CORPORATION,6,1.0,20041231,12248.0,7630.0,226549.0
4,1020582,"WCN BANCORP, INC.",7,1.0,20041231,11035.0,8331.0,250744.0


In [None]:
name_dict = {'RSSD9001':'RSSD ID',
             'RSSD9017':'Firm Legal Name',
             'RSSD9032':'FR District Code',
             'RSSD9146':'Bank Count',
             'RSSD9999':'Reporting Date',
             'BHCK4107':'Interest Income',
             'BHCK4074':'Net Interest Income',
             'BHCK2170':'Total Assets',
             }
smalldf = smalldf.rename(columns = name_dict)
print(smalldf.shape)
smalldf.head()

(243757, 8)


Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets
0,1020180,BREMER FINANCIAL CORPORATION,9,10.0,20041231,291916.0,203077.0,6141519.0
1,1020201,HSBC USA INC.,2,1.0,20041231,4078746.0,2741737.0,141049972.0
2,1020340,"HARRIS BANKCORP, INC.",7,30.0,20041231,1179568.0,832167.0,34389852.0
3,1020395,SOUTHERN NATIONAL CORPORATION,6,1.0,20041231,12248.0,7630.0,226549.0
4,1020582,"WCN BANCORP, INC.",7,1.0,20041231,11035.0,8331.0,250744.0


In [None]:
smalldf['Reporting Date'] = pd.to_datetime(smalldf['Reporting Date'], format = "%Y%m%d")

### Define Quarters and Years


In [None]:
# Define Quarter

smalldf['Quarter'] = smalldf['Reporting Date'].dt.quarter
# smalldf.head()

In [None]:
# Define Year

smalldf['Year'] = smalldf['Reporting Date'].dt.year
smalldf.head()

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets,Quarter,Year
0,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,291916.0,203077.0,6141519.0,4,2004
1,1020201,HSBC USA INC.,2,1.0,2004-12-31,4078746.0,2741737.0,141049972.0,4,2004
2,1020340,"HARRIS BANKCORP, INC.",7,30.0,2004-12-31,1179568.0,832167.0,34389852.0,4,2004
3,1020395,SOUTHERN NATIONAL CORPORATION,6,1.0,2004-12-31,12248.0,7630.0,226549.0,4,2004
4,1020582,"WCN BANCORP, INC.",7,1.0,2004-12-31,11035.0,8331.0,250744.0,4,2004


## Quaterization

In [None]:
def quarterize(df, cols_to_process, dt_column):
  """
  Function to quarterizes the columns of a dataframe.
    df: has to be sorted by year and quarters
    cols_to_process: list of columns to quarterize
    dt_column: string
              column name of the date column

  """
  # indices is considered to be default
  fq_ids = df[(df[dt_column].dt.month == 3) & (df[dt_column].dt.day == 31)].index
  for col in cols_to_process:
    df[f"Q{col}"] = df[col].diff()
    df.loc[fq_ids, f"Q{col}"] = df.loc[fq_ids, col]
  return df

In [None]:
ls_dfs = []
for bank in smalldf['RSSD ID'].unique():
  OneBank = smalldf[smalldf["RSSD ID"] == bank].sort_values(by=['Reporting Date'])
  tmp_df = quarterize(OneBank, ["Interest Income", "Net Interest Income"], dt_column="Reporting Date")
  ls_dfs.append(tmp_df)

Quarterized_dfs = pd.concat(ls_dfs, ignore_index=True)
Quarterized_dfs.head(10)

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets,Quarter,Year,QInterest Income,QNet Interest Income
0,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-03-31,68233.0,46844.0,5721282.0,1,2004,68233.0,46844.0
1,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-06-30,138086.0,95452.0,5870480.0,2,2004,69853.0,48608.0
2,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-09-30,212489.0,147314.0,5963700.0,3,2004,74403.0,51862.0
3,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,291916.0,203077.0,6141519.0,4,2004,79427.0,55763.0
4,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-03-31,81656.0,54637.0,6230236.0,1,2005,81656.0,54637.0
5,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-06-30,170175.0,110844.0,6450716.0,2,2005,88519.0,56207.0
6,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-09-30,265290.0,169488.0,6336699.0,3,2005,95115.0,58644.0
7,1020180,BREMER FINANCIAL CORPORATION,9,9.0,2005-12-31,362347.0,227516.0,6555895.0,4,2005,97057.0,58028.0
8,1020180,BREMER FINANCIAL CORPORATION,9,9.0,2006-03-31,100170.0,56954.0,6480680.0,1,2006,100170.0,56954.0
9,1020180,BREMER FINANCIAL CORPORATION,9,9.0,2006-06-30,206822.0,115120.0,6638952.0,2,2006,106652.0,58166.0


### Normalization

In [None]:
# Normalize Components
Quarterized_dfs['Normalized Interest Income per Quarter'] = Quarterized_dfs['QInterest Income']/Quarterized_dfs['Total Assets']

Quarterized_dfs['Normalized Net Interest Income per Quarter'] = Quarterized_dfs['QNet Interest Income']/Quarterized_dfs['Total Assets']

Quarterized_dfs.head()

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets,Quarter,Year,QInterest Income,QNet Interest Income,Normalized Interest Income per Quarter,Normalized Net Interest Income per Quarter
0,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-03-31,68233.0,46844.0,5721282.0,1,2004,68233.0,46844.0,0.011926,0.008188
1,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-06-30,138086.0,95452.0,5870480.0,2,2004,69853.0,48608.0,0.011899,0.00828
2,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-09-30,212489.0,147314.0,5963700.0,3,2004,74403.0,51862.0,0.012476,0.008696
3,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,291916.0,203077.0,6141519.0,4,2004,79427.0,55763.0,0.012933,0.00908
4,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-03-31,81656.0,54637.0,6230236.0,1,2005,81656.0,54637.0,0.013106,0.00877


# Considering Macroeconomic Variables

### 1. GDP AND UNEMPLOYMENT RATES

In [None]:
gdp_df = pd.read_csv(r"/content/drive/MyDrive/CS_Finance_Data/MacroEconomic_Variables/GDP.csv", parse_dates=True)
gdp_df['DATE'] = pd.to_datetime(gdp_df['DATE'])
print(gdp_df.shape)
gdp_df.head()

(81, 2)


Unnamed: 0,DATE,GDP
0,2004-01-01,11923.447
1,2004-04-01,12112.815
2,2004-07-01,12305.307
3,2004-10-01,12527.214
4,2005-01-01,12767.286


In [None]:
uner_df = pd.read_csv(r"/content/drive/MyDrive/CS_Finance_Data/MacroEconomic_Variables/UNRATE.csv")
uner_df['DATE'] = pd.to_datetime(uner_df['DATE'])
print(uner_df.shape)
uner_df.head()

(82, 2)


Unnamed: 0,DATE,UNRATE
0,2004-01-01,5.7
1,2004-04-01,5.6
2,2004-07-01,5.433333
3,2004-10-01,5.433333
4,2005-01-01,5.3


In [None]:
df_gdp_uner = pd.merge(gdp_df,uner_df,on='DATE')
print(df_gdp_uner.shape)
df_gdp_uner.head()

(81, 3)


Unnamed: 0,DATE,GDP,UNRATE
0,2004-01-01,11923.447,5.7
1,2004-04-01,12112.815,5.6
2,2004-07-01,12305.307,5.433333
3,2004-10-01,12527.214,5.433333
4,2005-01-01,12767.286,5.3


In [None]:
df_gdp_uner.rename(columns={'DATE': 'date'}, inplace=True)

In [None]:
def update_quart_enddate_shift(df):
    # Define a dictionary to map the month to the new date
    quarter_end_dates = {1: (12, 31), 4: (3, 31), 7: (6, 30), 10: (9, 30)}

    # Apply the mapping to update the dates
    for month, (new_month, new_day) in quarter_end_dates.items():
        df.loc[df['date'].dt.month == month, 'date'] = df['date'].apply(
            lambda x: x.replace(month=new_month, day=new_day) if x.month == month else x
        )

    # offset the years by 1 for those whose month is december
    tmp_ids = df[df["date"].dt.month == 12].index
    df.loc[tmp_ids, "date"] = df.loc[tmp_ids, "date"] - pd.DateOffset(years=1)

    # drop the first row df_gdp_uner
    df.drop(df.index[0], inplace=True)
    df.reset_index(drop=True, inplace=True)

    return df

In [None]:
df_gdp_uner =  update_quart_enddate_shift(df_gdp_uner)
df_gdp_uner.head()

Unnamed: 0,date,GDP,UNRATE
0,2004-03-31,12112.815,5.6
1,2004-06-30,12305.307,5.433333
2,2004-09-30,12527.214,5.433333
3,2004-12-31,12767.286,5.3
4,2005-03-31,12922.656,5.1


### 2. US Department of Treasury Dataset - Yield Curve Rates from 2004 - 2024

In [None]:
rates_df = pd.read_csv(r"/content/drive/MyDrive/CS_Finance_Data/MacroEconomic_Variables/yield-curve-rates-2004-2024.csv", encoding='latin1')
rates_df.head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,07/15/2024,5.48,5.51,5.43,5.4,5.23,4.85,4.44,4.23,4.13,4.16,4.23,4.56,4.46
1,07/12/2024,5.47,5.52,5.43,5.41,5.23,4.87,4.45,4.22,4.1,4.13,4.18,4.5,4.39
2,07/11/2024,5.48,5.53,5.44,5.41,5.25,4.91,4.5,4.26,4.13,4.15,4.2,4.51,4.41
3,07/10/2024,5.46,5.5,5.46,5.46,5.33,5.01,4.62,4.38,4.24,4.24,4.28,4.58,4.47
4,07/09/2024,5.45,5.51,5.46,5.46,5.34,5.02,4.62,4.37,4.24,4.25,4.3,4.59,4.49


In [None]:
# Changing Date to Date-time Format
rates_df['Date'] = pd.to_datetime(rates_df['Date'].astype(str), format='mixed', utc=True)
rates_df.head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,2024-07-15 00:00:00+00:00,5.48,5.51,5.43,5.4,5.23,4.85,4.44,4.23,4.13,4.16,4.23,4.56,4.46
1,2024-07-12 00:00:00+00:00,5.47,5.52,5.43,5.41,5.23,4.87,4.45,4.22,4.1,4.13,4.18,4.5,4.39
2,2024-07-11 00:00:00+00:00,5.48,5.53,5.44,5.41,5.25,4.91,4.5,4.26,4.13,4.15,4.2,4.51,4.41
3,2024-07-10 00:00:00+00:00,5.46,5.5,5.46,5.46,5.33,5.01,4.62,4.38,4.24,4.24,4.28,4.58,4.47
4,2024-07-09 00:00:00+00:00,5.45,5.51,5.46,5.46,5.34,5.02,4.62,4.37,4.24,4.25,4.3,4.59,4.49


In [None]:
# Giving the Year and Quarter each entry
rates_df['Year'] = rates_df['Date'].dt.year
rates_df['Quarter'] = rates_df['Date'].dt.quarter
rates_df.sort_values('Date').head()

Unnamed: 0,Date,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,Year,Quarter
5138,2004-01-02 00:00:00+00:00,0.88,,0.93,,1.02,1.31,1.94,2.47,3.36,3.9,4.38,5.21,,2004,1
5137,2004-01-05 00:00:00+00:00,0.88,,0.91,,1.05,1.35,1.95,2.51,3.39,3.92,4.41,5.23,,2004,1
5136,2004-01-06 00:00:00+00:00,0.88,,0.91,,1.03,1.3,1.84,2.38,3.26,3.8,4.29,5.13,,2004,1
5135,2004-01-07 00:00:00+00:00,0.88,,0.91,,1.02,1.29,1.84,2.36,3.25,3.76,4.27,5.11,,2004,1
5134,2004-01-08 00:00:00+00:00,0.87,,0.88,,1.01,1.29,1.85,2.37,3.24,3.76,4.27,5.12,,2004,1


In [None]:
finalrates = rates_df.groupby(['Year', 'Quarter']).mean()
finalrates = finalrates.drop(columns=["Date"])
finalrates.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
Year,Quarter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2004,1,0.909677,,0.93371,,1.003548,1.221452,1.686613,2.163065,2.980968,3.501452,4.01,4.877903,
2004,2,0.954516,,1.096129,,1.359677,1.775484,2.454677,2.971935,3.720968,4.183871,4.597097,5.355645,
2004,3,1.37,,1.513906,,1.788437,2.075,2.556563,2.919531,3.505625,3.922188,4.301406,5.068281,
2004,4,1.841774,,2.047419,,2.297581,2.472742,2.822419,3.058871,3.49371,3.855,4.175806,4.874839,
2005,1,2.360984,,2.58918,,2.876885,3.072459,3.454918,3.623934,3.89082,4.098852,4.303607,4.764098,
2005,2,2.709687,,2.926719,,3.179688,3.337344,3.645938,3.732344,3.872969,3.984688,4.158594,4.54875,
2005,3,3.228594,,3.438281,,3.707031,3.791094,3.957969,3.989219,4.039375,4.108437,4.215156,4.507656,
2005,4,3.702131,,3.913607,,4.250492,4.288525,4.36459,4.372295,4.391148,4.425738,4.488197,4.767541,
2006,1,4.35871,,4.506613,,4.657258,4.640323,4.604677,4.582581,4.552419,4.555484,4.577097,4.774032,4.663056
2006,2,4.674921,,4.830159,,5.033651,5.021429,4.997937,4.987302,4.993175,5.018095,5.072698,5.290635,5.143016


### 3. Spreads

In [None]:
#Second we modify the format, this is the solution that I found for the two digit year
def chain_date(date_str):
    for fmt in ('%m/%d/%Y', '%m/%d/%y'):
        try:
            return pd.to_datetime(date_str, format=fmt)
        except ValueError:
            pass
    return pd.NaT

# We apply the chain_date function
rates_df['Date'] = rates_df['Date'].apply(chain_date)


copy_df=rates_df.copy()

In [None]:
# Assign quarter
def assign_quarter(date):
    month = date.month
    if month in [1, 2, 3]:
        return 'Q1'
    elif month in [4, 5, 6]:
        return 'Q2'
    elif month in [7, 8, 9]:
        return 'Q3'
    else:
        return 'Q4'

# Asisign the quarters in a new column
copy_df['Quarter'] = copy_df['Date'].apply(assign_quarter)
copy_df['Year'] = copy_df['Date'].dt.year


# Agrupar por cuatrimestre y calcular el promedio de todas las columnas excepto 'Date'
quarterly_averages = copy_df.groupby(['Year', 'Quarter']).mean(numeric_only=True).reset_index()

In [None]:
#calculate the spreads for all the possible spreads (around 78 possible)
columns_to_consider = quarterly_averages.columns[2:]


column_pairs = list(itertools.combinations(columns_to_consider, 2))


#new data frame to store the spreads
spreads_df = pd.DataFrame()

for (col1, col2) in column_pairs:
    spread_col_name = f"{col2}-{col1}_spread"
    spreads_df[spread_col_name] = quarterly_averages[col2] - quarterly_averages[col1]


#Add the year-Quarter columns must have 80 columns in total
result_df = pd.concat([quarterly_averages.iloc[:, :2], spreads_df], axis=1)

result_df.head()

Unnamed: 0,Year,Quarter,2 Mo-1 Mo_spread,3 Mo-1 Mo_spread,4 Mo-1 Mo_spread,6 Mo-1 Mo_spread,1 Yr-1 Mo_spread,2 Yr-1 Mo_spread,3 Yr-1 Mo_spread,5 Yr-1 Mo_spread,...,7 Yr-5 Yr_spread,10 Yr-5 Yr_spread,20 Yr-5 Yr_spread,30 Yr-5 Yr_spread,10 Yr-7 Yr_spread,20 Yr-7 Yr_spread,30 Yr-7 Yr_spread,20 Yr-10 Yr_spread,30 Yr-10 Yr_spread,30 Yr-20 Yr_spread
0,2004,Q1,,0.024032,,0.093871,0.311774,0.776935,1.253387,2.07129,...,0.520484,1.029032,1.896935,,0.508548,1.376452,,0.867903,,
1,2004,Q2,,0.141613,,0.405161,0.820968,1.500161,2.017419,2.766452,...,0.462903,0.876129,1.634677,,0.413226,1.171774,,0.758548,,
2,2004,Q3,,0.143906,,0.418437,0.705,1.186562,1.549531,2.135625,...,0.416562,0.795781,1.562656,,0.379219,1.146094,,0.766875,,
3,2004,Q4,,0.205645,,0.455806,0.630968,0.980645,1.217097,1.651935,...,0.36129,0.682097,1.381129,,0.320806,1.019839,,0.699032,,
4,2005,Q1,,0.228197,,0.515902,0.711475,1.093934,1.262951,1.529836,...,0.208033,0.412787,0.873279,,0.204754,0.665246,,0.460492,,


In [None]:
Spreads_df = result_df.copy()
Spreads_df.drop(columns=['Year', 'Quarter'], inplace=True)

In [None]:
finalrates_df = finalrates.copy()
finalrates_df.reset_index(inplace=True)
finalrates_df.head()

Unnamed: 0,Year,Quarter,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,2004,1,0.909677,,0.93371,,1.003548,1.221452,1.686613,2.163065,2.980968,3.501452,4.01,4.877903,
1,2004,2,0.954516,,1.096129,,1.359677,1.775484,2.454677,2.971935,3.720968,4.183871,4.597097,5.355645,
2,2004,3,1.37,,1.513906,,1.788437,2.075,2.556563,2.919531,3.505625,3.922188,4.301406,5.068281,
3,2004,4,1.841774,,2.047419,,2.297581,2.472742,2.822419,3.058871,3.49371,3.855,4.175806,4.874839,
4,2005,1,2.360984,,2.58918,,2.876885,3.072459,3.454918,3.623934,3.89082,4.098852,4.303607,4.764098,


In [41]:
cmev = pd.concat([finalrates_df, Spreads_df, df_gdp_uner], axis = 1)
cmev.drop(columns=['Year', 'Quarter'], inplace=True)
cmev.head()

Unnamed: 0,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,...,30 Yr-5 Yr_spread,10 Yr-7 Yr_spread,20 Yr-7 Yr_spread,30 Yr-7 Yr_spread,20 Yr-10 Yr_spread,30 Yr-10 Yr_spread,30 Yr-20 Yr_spread,date,GDP,UNRATE
0,0.909677,,0.93371,,1.003548,1.221452,1.686613,2.163065,2.980968,3.501452,...,,0.508548,1.376452,,0.867903,,,2004-03-31,12112.815,5.6
1,0.954516,,1.096129,,1.359677,1.775484,2.454677,2.971935,3.720968,4.183871,...,,0.413226,1.171774,,0.758548,,,2004-06-30,12305.307,5.433333
2,1.37,,1.513906,,1.788437,2.075,2.556563,2.919531,3.505625,3.922188,...,,0.379219,1.146094,,0.766875,,,2004-09-30,12527.214,5.433333
3,1.841774,,2.047419,,2.297581,2.472742,2.822419,3.058871,3.49371,3.855,...,,0.320806,1.019839,,0.699032,,,2004-12-31,12767.286,5.3
4,2.360984,,2.58918,,2.876885,3.072459,3.454918,3.623934,3.89082,4.098852,...,,0.204754,0.665246,,0.460492,,,2005-03-31,12922.656,5.1


In [42]:
Quarterized_dfs.head()

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Interest Income,Net Interest Income,Total Assets,Quarter,Year,QInterest Income,QNet Interest Income,Normalized Interest Income per Quarter,Normalized Net Interest Income per Quarter
0,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-03-31,68233.0,46844.0,5721282.0,1,2004,68233.0,46844.0,0.011926,0.008188
1,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-06-30,138086.0,95452.0,5870480.0,2,2004,69853.0,48608.0,0.011899,0.00828
2,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-09-30,212489.0,147314.0,5963700.0,3,2004,74403.0,51862.0,0.012476,0.008696
3,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,291916.0,203077.0,6141519.0,4,2004,79427.0,55763.0,0.012933,0.00908
4,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-03-31,81656.0,54637.0,6230236.0,1,2005,81656.0,54637.0,0.013106,0.00877


In [50]:
# selecting some columns of Quater
small_Quaterized_dfs = Quarterized_dfs[['RSSD ID', 'Firm Legal Name', "FR District Code", 'Bank Count', 'Reporting Date', 'Year', 'Quarter',
                                        'Normalized Interest Income per Quarter', 'Normalized Net Interest Income per Quarter']]

small_Quaterized_dfs.head()

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Year,Quarter,Normalized Interest Income per Quarter,Normalized Net Interest Income per Quarter
0,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-03-31,2004,1,0.011926,0.008188
1,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-06-30,2004,2,0.011899,0.00828
2,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-09-30,2004,3,0.012476,0.008696
3,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,2004,4,0.012933,0.00908
4,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-03-31,2005,1,0.013106,0.00877


In [51]:
df = small_Quaterized_dfs.merge(cmev, left_on='Reporting Date', right_on='date', how='left')
df.drop(columns=['date'], inplace=True)
df.head()

Unnamed: 0,RSSD ID,Firm Legal Name,FR District Code,Bank Count,Reporting Date,Year,Quarter,Normalized Interest Income per Quarter,Normalized Net Interest Income per Quarter,1 Mo,2 Mo,3 Mo,4 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr,2 Mo-1 Mo_spread,3 Mo-1 Mo_spread,4 Mo-1 Mo_spread,6 Mo-1 Mo_spread,1 Yr-1 Mo_spread,2 Yr-1 Mo_spread,3 Yr-1 Mo_spread,5 Yr-1 Mo_spread,7 Yr-1 Mo_spread,10 Yr-1 Mo_spread,20 Yr-1 Mo_spread,30 Yr-1 Mo_spread,3 Mo-2 Mo_spread,4 Mo-2 Mo_spread,6 Mo-2 Mo_spread,1 Yr-2 Mo_spread,2 Yr-2 Mo_spread,3 Yr-2 Mo_spread,5 Yr-2 Mo_spread,7 Yr-2 Mo_spread,10 Yr-2 Mo_spread,20 Yr-2 Mo_spread,30 Yr-2 Mo_spread,4 Mo-3 Mo_spread,6 Mo-3 Mo_spread,1 Yr-3 Mo_spread,2 Yr-3 Mo_spread,3 Yr-3 Mo_spread,5 Yr-3 Mo_spread,7 Yr-3 Mo_spread,10 Yr-3 Mo_spread,20 Yr-3 Mo_spread,30 Yr-3 Mo_spread,6 Mo-4 Mo_spread,1 Yr-4 Mo_spread,2 Yr-4 Mo_spread,3 Yr-4 Mo_spread,5 Yr-4 Mo_spread,7 Yr-4 Mo_spread,10 Yr-4 Mo_spread,20 Yr-4 Mo_spread,30 Yr-4 Mo_spread,1 Yr-6 Mo_spread,2 Yr-6 Mo_spread,3 Yr-6 Mo_spread,5 Yr-6 Mo_spread,7 Yr-6 Mo_spread,10 Yr-6 Mo_spread,20 Yr-6 Mo_spread,30 Yr-6 Mo_spread,2 Yr-1 Yr_spread,3 Yr-1 Yr_spread,5 Yr-1 Yr_spread,7 Yr-1 Yr_spread,10 Yr-1 Yr_spread,20 Yr-1 Yr_spread,30 Yr-1 Yr_spread,3 Yr-2 Yr_spread,5 Yr-2 Yr_spread,7 Yr-2 Yr_spread,10 Yr-2 Yr_spread,20 Yr-2 Yr_spread,30 Yr-2 Yr_spread,5 Yr-3 Yr_spread,7 Yr-3 Yr_spread,10 Yr-3 Yr_spread,20 Yr-3 Yr_spread,30 Yr-3 Yr_spread,7 Yr-5 Yr_spread,10 Yr-5 Yr_spread,20 Yr-5 Yr_spread,30 Yr-5 Yr_spread,10 Yr-7 Yr_spread,20 Yr-7 Yr_spread,30 Yr-7 Yr_spread,20 Yr-10 Yr_spread,30 Yr-10 Yr_spread,30 Yr-20 Yr_spread,GDP,UNRATE
0,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-03-31,2004,1,0.011926,0.008188,0.909677,,0.93371,,1.003548,1.221452,1.686613,2.163065,2.980968,3.501452,4.01,4.877903,,,0.024032,,0.093871,0.311774,0.776935,1.253387,2.07129,2.591774,3.100323,3.968226,,,,,,,,,,,,,,0.069839,0.287742,0.752903,1.229355,2.047258,2.567742,3.07629,3.944194,,,,,,,,,,,0.217903,0.683065,1.159516,1.977419,2.497903,3.006452,3.874355,,0.465161,0.941613,1.759516,2.28,2.788548,3.656452,,0.476452,1.294355,1.814839,2.323387,3.19129,,0.817903,1.338387,1.846935,2.714839,,0.520484,1.029032,1.896935,,0.508548,1.376452,,0.867903,,,12112.815,5.6
1,1020180,BREMER FINANCIAL CORPORATION,9,11.0,2004-06-30,2004,2,0.011899,0.00828,0.954516,,1.096129,,1.359677,1.775484,2.454677,2.971935,3.720968,4.183871,4.597097,5.355645,,,0.141613,,0.405161,0.820968,1.500161,2.017419,2.766452,3.229355,3.642581,4.401129,,,,,,,,,,,,,,0.263548,0.679355,1.358548,1.875806,2.624839,3.087742,3.500968,4.259516,,,,,,,,,,,0.415806,1.095,1.612258,2.36129,2.824194,3.237419,3.995968,,0.679194,1.196452,1.945484,2.408387,2.821613,3.580161,,0.517258,1.26629,1.729194,2.142419,2.900968,,0.749032,1.211935,1.625161,2.38371,,0.462903,0.876129,1.634677,,0.413226,1.171774,,0.758548,,,12305.307,5.433333
2,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-09-30,2004,3,0.012476,0.008696,1.37,,1.513906,,1.788437,2.075,2.556563,2.919531,3.505625,3.922188,4.301406,5.068281,,,0.143906,,0.418437,0.705,1.186562,1.549531,2.135625,2.552188,2.931406,3.698281,,,,,,,,,,,,,,0.274531,0.561094,1.042656,1.405625,1.991719,2.408281,2.7875,3.554375,,,,,,,,,,,0.286563,0.768125,1.131094,1.717188,2.13375,2.512969,3.279844,,0.481562,0.844531,1.430625,1.847187,2.226406,2.993281,,0.362969,0.949063,1.365625,1.744844,2.511719,,0.586094,1.002656,1.381875,2.14875,,0.416562,0.795781,1.562656,,0.379219,1.146094,,0.766875,,,12527.214,5.433333
3,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2004-12-31,2004,4,0.012933,0.00908,1.841774,,2.047419,,2.297581,2.472742,2.822419,3.058871,3.49371,3.855,4.175806,4.874839,,,0.205645,,0.455806,0.630968,0.980645,1.217097,1.651935,2.013226,2.334032,3.033065,,,,,,,,,,,,,,0.250161,0.425323,0.775,1.011452,1.44629,1.807581,2.128387,2.827419,,,,,,,,,,,0.175161,0.524839,0.76129,1.196129,1.557419,1.878226,2.577258,,0.349677,0.586129,1.020968,1.382258,1.703065,2.402097,,0.236452,0.67129,1.032581,1.353387,2.052419,,0.434839,0.796129,1.116935,1.815968,,0.36129,0.682097,1.381129,,0.320806,1.019839,,0.699032,,,12767.286,5.3
4,1020180,BREMER FINANCIAL CORPORATION,9,10.0,2005-03-31,2005,1,0.013106,0.00877,2.360984,,2.58918,,2.876885,3.072459,3.454918,3.623934,3.89082,4.098852,4.303607,4.764098,,,0.228197,,0.515902,0.711475,1.093934,1.262951,1.529836,1.737869,1.942623,2.403115,,,,,,,,,,,,,,0.287705,0.483279,0.865738,1.034754,1.301639,1.509672,1.714426,2.174918,,,,,,,,,,,0.195574,0.578033,0.747049,1.013934,1.221967,1.426721,1.887213,,0.382459,0.551475,0.818361,1.026393,1.231148,1.691639,,0.169016,0.435902,0.643934,0.848689,1.30918,,0.266885,0.474918,0.679672,1.140164,,0.208033,0.412787,0.873279,,0.204754,0.665246,,0.460492,,,12922.656,5.1


In [45]:
df.shape

(243757, 101)

In [48]:
pd.set_option('display.max_rows', None)
df.isnull().sum()

RSSD ID                                            0
Firm Legal Name                                    0
Bank Count                                      9119
Reporting Date                                     0
Year                                               0
Quarter                                            0
Normalized Interest Income per Quarter        170337
Normalized Net Interest Income per Quarter    170340
1 Mo                                             473
2 Mo                                          194565
3 Mo                                             473
4 Mo                                          231244
6 Mo                                             473
1 Yr                                             473
2 Yr                                             473
3 Yr                                             473
5 Yr                                             473
7 Yr                                             473
10 Yr                                         