In [45]:
import pandas as pd
import glob

In [46]:
"Reload the CSV file after code execution state reset"
financial_files_path = "./data_csv/Financial_*.csv"
all_files = glob.glob(financial_files_path)

In [69]:
df_list = []
for file in all_files:
    temp_df = pd.read_csv(file)
    quarter = file.split("_")[2] + "_" + file.split("_")[3].split(".")[0]
    temp_df["quarter"] = quarter
    df_list.append(temp_df)

In [70]:
merged_df = pd.concat(df_list)

In [None]:
# Define all additional CAMELS ratios from the available columns
camels_ratios = pd.DataFrame({
    "cert" : merged_df["CERT"],
    "bank_name" : merged_df["NAMEFULL"],
    
    # Capital Adequacy (C)
    "equity_to_assets": merged_df["EQV"] / merged_df["NAASSET"],
    "tier1_capital_ratio": merged_df["IDT1CER"],
    "tier1_rwa_ratio": merged_df["IDT1RWAJR"],
    "total_equity_ratio": merged_df["EQTOT"] / merged_df["NAASSET"],
    "dividends_to_equity": merged_df["EQCDIVNTINC"] / merged_df["EQV"],

    # Asset Quality (A)
    "loan_loss_reserve_to_loans": merged_df["LNATRESR"] / merged_df["NTLNLSR"],
    "noncurrent_loans_to_loans": merged_df["NCLNLSR"] / merged_df["NTLNLSR"],
    "net_loans_to_assets": merged_df["LNLSNET"] / merged_df["NAASSET"],
    "loan_depreciation_to_loans": merged_df["LNLSDEPR"] / merged_df["NTLNLSR"],
    "ore_to_assets": merged_df["LSAORE"] / merged_df["NAASSET"],
    "assets_past_due_30_89_to_assets": merged_df["P3ASSET"] / merged_df["NAASSET"],
    "noncurrent_real_estate_to_assets": merged_df["NCRER"] / merged_df["NAASSET"],
    "noncurrent_re_to_loans": merged_df["NCRERESR"] / merged_df["NTLNLSR"],

    # Management (M)
    "efficiency_ratio_proxy": merged_df["EINTEXP"] / merged_df["INTINC"],
    "noninterest_income_to_assets": merged_df["NONII"] / merged_df["NAASSET"],
    "operating_income_to_assets": merged_df["NOIJ"] / merged_df["NAASSET"],
    "assets_per_employee": merged_df["NAASSET"] / merged_df["NUMEMP"],

    # Earnings (E)
    "return_on_assets": merged_df["ROA"],
    "pretax_return_on_assets": merged_df["ROAPTX"],
    "return_on_equity": merged_df["ROE"],
    "net_interest_margin": merged_df["NIM"],
    "net_income_to_assets": merged_df["NETINC"] / merged_df["NAASSET"],
    "pretax_income_to_assets": merged_df["PTAXNETINC"] / merged_df["NAASSET"],

    # Liquidity (L)
    "net_loans_to_total_deposits": merged_df["LNLSNET"] / merged_df["DEP"],
    "core_deposits_to_assets": merged_df["COREDEP"] / merged_df["NAASSET"],
    "uninsured_deposits_to_total_deposits": merged_df["DEPUNINS"] / merged_df["DEP"],
    "nonint_bearing_deposits_to_assets": merged_df["DEPNIDOM"] / merged_df["NAASSET"],
    "insured_deposits_to_total_deposits": merged_df["DEPINS"] / merged_df["DEP"],

    # Sensitivity to Market Risk (S)
    "securities_to_assets": merged_df["IGLSEC"] / merged_df["NAASSET"],
    "asset_sensitivity_proxy": merged_df["ASDRRES"] / merged_df["NAASSET"],
})

In [87]:
camels_ratios["cert"].nunique

<bound method IndexOpsMixin.nunique of 0           9
1          14
2          35
3          39
4          41
        ...  
7743    90582
7744    91005
7745    91280
7746    91325
7747    91385
Name: cert, Length: 168645, dtype: int64>

In [77]:
label_df = pd.read_csv("./data_csv/failed_bank-data.csv")
label_df["LABEL"] = 1
label_df = label_df[["CERT","LABEL"]]

In [79]:
label_df

Unnamed: 0,CERT,LABEL
0,19040,1
1,23306,1
2,34578,1
3,35065,1
4,58052,1
...,...,...
330,29730,1
331,12736,1
332,33901,1
333,1971,1
