In [1]:
import pandas as pd
import numpy as np
import joblib

In [3]:
# Load model and test data
rf_model = joblib.load('../models/random_forest.pkl')
X_test = pd.read_csv('../data/processed/X_test.csv')
loans_df = pd.read_csv('../data/data.csv')  # Original data for loan amounts

  loans_df = pd.read_csv('../data/data.csv')  # Original data for loan amounts


In [4]:
# Calculate Probability of Default (PD)
def calcualte_pd(model, data):
    return model.predict_proba(data)[:, 1]

In [5]:
# Apply to test data
loans_df_test = X_test.copy()
loans_df_test['PD'] = calcualte_pd(rf_model, X_test)

In [6]:
# Implement Loss Given Default (LGD) estimation
def estimate_lgd(loan_data):
    # Basic LGD Logic - in reality woudl be a trained model
    # Higher FICO scofe = lower GD
    base_lgd = 0.45 # industry average

    # Adjsut based on collateral
    if 'home_ownership_OWN' in loan_data.columns:
        lgd = np.where(loan_data['home_ownership_OWN'] == 1,
                       base_lgd * 0.8, # Lower LGD for homeowners
                       base_lgd)
    else:
        lgd = base_lgd
    
    return lgd

In [7]:
loans_df_test['LGD'] = estimate_lgd(loans_df_test)

In [8]:
# Calculate Exposure at Default (EAD)
# For simplicity, using the original loan amount
# In practice, would account for amortization
test_indices = X_test.index
loans_df_test['EAD'] = loans_df.loc[test_indices, 'loan_amnt'].values

# Calucalte Expected Loss (EL) = PD * LGD * EAD
loans_df_test['EL'] = loans_df_test['PD'] * loans_df_test['LGD'] * loans_df_test['EAD']

In [9]:
# Risk weights based on Basel III standards
def assign_basel_risk_weight(pd_value):
    # Simplified risk weight assignment based on Basel III
    if pd_value <= 0.05:
        return 0.5  # 50% risk weight
    elif pd_value <= 0.10:
        return 0.75  # 75% risk weight
    elif pd_value <= 0.30:
        return 1.0  # 100% risk weight
    else:
        return 1.5  # 150% risk weight

In [10]:
# Apply risk weights
loans_df_test['RiskWeight'] = loans_df_test['PD'].apply(assign_basel_risk_weight)

# Calculate Risk-Weighted Assets (RWA)
loans_df_test['RWA'] = loans_df_test['EAD'] * loans_df_test['RiskWeight']

# Save Basel III Calculations
loans_df_test.to_csv('../data/processed/basel_risk_calculations.csv', index = False)


In [11]:
loans_df_test.head()

Unnamed: 0,loan_amnt,int_rate,installment,annual_inc,dti,delinq_2yrs,fico_range_low,fico_range_high,inq_last_6mths,open_acc,pub_rec,revol_bal,revol_util,total_acc,PD,LGD,EAD,EL,RiskWeight,RWA
0,0.495545,1.013327,0.194029,0.517202,-0.86376,-0.354243,-0.714613,-0.714601,-0.651061,-0.81761,-0.346938,-0.256612,0.439998,-1.348032,0.23,0.45,3600.0,372.6,1.0,3600.0
1,-0.548989,-0.021424,-0.407472,-0.282122,0.186643,-0.354243,-0.259984,-0.259983,-0.651061,0.068563,-0.346938,-0.287779,0.557173,-0.681094,0.2,0.45,24700.0,2223.0,1.0,24700.0
2,-0.875407,0.516647,-0.752593,-0.290414,0.548464,-0.354243,-1.169243,-1.169219,-0.651061,-1.17208,-0.346938,-0.011075,1.122846,-1.431399,0.16,0.45,20000.0,1440.0,1.0,20000.0
3,-0.331378,2.565454,-0.337183,-0.273831,0.878731,-0.354243,0.194646,0.194635,0.477035,-0.108672,-0.346938,0.037028,-0.335782,0.069211,0.21,0.45,35000.0,3307.5,1.0,35000.0
4,2.715181,0.29521,1.855595,0.057839,0.501483,-0.354243,0.194646,0.194635,-0.651061,0.954736,-0.346938,0.076358,-0.715592,-0.014156,0.1,0.45,10400.0,468.0,0.75,7800.0
