# Q3: How Robust Credit Risk Models are Over Time

This script compares [INSERT MODEL NAMES] in their performance in credit risk prediction when trained and tested data differ in economic periods.

### Imports

In [15]:
import numpy as np
import pandas as pd
import os
import sys
import subprocess
from pathlib import Path
from sklearn.preprocessing import OneHotEncoder
import warnings

# Add project root to path
PROJECT_ROOT = Path().resolve().parent
sys.path.append(str(PROJECT_ROOT))
warnings.filterwarnings("ignore")

from data_processings.datasets import LendingClubDataset
from data_processings.feature_engineering import process_q3_features

### Pre-Processing of Data

Loading Dataset for Accepted Loans

In [16]:
num_samples = 10000
dataloader = LendingClubDataset()
accepted_df = dataloader.load(num_samples)

In [19]:
accepted_df["revol_util"].unique() # DELETE LATER

array([ 29.7,  19.2,  56.2, ..., 100.8, 134.3,   8.3], shape=(1020,))

Feature Construction

In [17]:
accepted_df = process_q3_features(accepted_df)
accepted_df

Unnamed: 0,loan_status,revol_util,purpose,home_ownership,emp_length,term,application_type,int_rate,total_acc,open_acc,...,acc_now_delinq,acc_open_past_24mths,verification_status,pub_rec,issue_year,issue_month,dti_mod,annual_inc_mod,loan_income_ratio,fico_mean
0,Fully Paid,29.7,debt_consolidation,MORTGAGE,10+ years,36 months,Individual,13.99,13.0,7.0,...,0.0,4.0,Not Verified,0.0,2015,12,5.91,55000.0,0.065455,677.0
1,Fully Paid,19.2,small_business,MORTGAGE,10+ years,36 months,Individual,11.99,38.0,22.0,...,0.0,4.0,Not Verified,0.0,2015,12,16.06,65000.0,0.380000,717.0
2,Fully Paid,56.2,home_improvement,MORTGAGE,10+ years,60 months,Joint App,10.78,18.0,6.0,...,0.0,6.0,Not Verified,0.0,2015,12,13.85,71000.0,0.317460,697.0
3,Fully Paid,64.5,major_purchase,MORTGAGE,3 years,60 months,Individual,22.45,35.0,12.0,...,0.0,10.0,Source Verified,0.0,2015,12,25.37,104433.0,0.099585,697.0
4,Fully Paid,68.4,debt_consolidation,RENT,4 years,36 months,Individual,13.44,6.0,5.0,...,0.0,0.0,Source Verified,0.0,2015,12,10.20,34000.0,0.351471,692.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8388,Fully Paid,44.2,debt_consolidation,MORTGAGE,6 years,36 months,Individual,11.22,32.0,12.0,...,0.0,8.0,Source Verified,2.0,2015,12,6.53,90000.0,0.033333,662.0
8389,Charged Off,58.9,debt_consolidation,RENT,1 year,36 months,Individual,13.67,49.0,30.0,...,0.0,7.0,Verified,0.0,2015,12,29.63,38000.0,0.131579,662.0
8390,Fully Paid,48.5,credit_card,RENT,10+ years,36 months,Individual,9.76,25.0,6.0,...,0.0,2.0,Source Verified,0.0,2015,12,8.32,100000.0,0.160000,672.0
8391,Fully Paid,51.5,credit_card,MORTGAGE,9 years,36 months,Individual,9.17,22.0,10.0,...,0.0,3.0,Source Verified,0.0,2015,12,3.84,45000.0,0.222222,687.0


Feature Type Conversion

In [None]:
# Binary Categorical Features 
accepted_df['loan_status'] = accepted_df['loan_status'].map({'Fully Paid': 0, 'Charged Off': 1})
accepted_df['application_type'] = accepted_df['application_type'].map({'Individual': 0, 'Joint App': 1})
accepted_df['term'] = accepted_df['term'].map({'36 months': 0, '60 months': 1})
accepted_df

Unnamed: 0,loan_status,revol_util,purpose,home_ownership,emp_length,term,application_type,int_rate,total_acc,open_acc,...,acc_now_delinq,acc_open_past_24mths,verification_status,pub_rec,issue_year,issue_month,dti_mod,annual_inc_mod,loan_income_ratio,fico_mean
0,Fully Paid,29.7,debt_consolidation,MORTGAGE,10+ years,36 months,0,13.99,13.0,7.0,...,0.0,4.0,Not Verified,0.0,2015,12,5.91,55000.0,0.065455,677.0
1,Fully Paid,19.2,small_business,MORTGAGE,10+ years,36 months,0,11.99,38.0,22.0,...,0.0,4.0,Not Verified,0.0,2015,12,16.06,65000.0,0.380000,717.0
2,Fully Paid,56.2,home_improvement,MORTGAGE,10+ years,60 months,1,10.78,18.0,6.0,...,0.0,6.0,Not Verified,0.0,2015,12,13.85,71000.0,0.317460,697.0
3,Fully Paid,64.5,major_purchase,MORTGAGE,3 years,60 months,0,22.45,35.0,12.0,...,0.0,10.0,Source Verified,0.0,2015,12,25.37,104433.0,0.099585,697.0
4,Fully Paid,68.4,debt_consolidation,RENT,4 years,36 months,0,13.44,6.0,5.0,...,0.0,0.0,Source Verified,0.0,2015,12,10.20,34000.0,0.351471,692.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8388,Fully Paid,44.2,debt_consolidation,MORTGAGE,6 years,36 months,0,11.22,32.0,12.0,...,0.0,8.0,Source Verified,2.0,2015,12,6.53,90000.0,0.033333,662.0
8389,Charged Off,58.9,debt_consolidation,RENT,1 year,36 months,0,13.67,49.0,30.0,...,0.0,7.0,Verified,0.0,2015,12,29.63,38000.0,0.131579,662.0
8390,Fully Paid,48.5,credit_card,RENT,10+ years,36 months,0,9.76,25.0,6.0,...,0.0,2.0,Source Verified,0.0,2015,12,8.32,100000.0,0.160000,672.0
8391,Fully Paid,51.5,credit_card,MORTGAGE,9 years,36 months,0,9.17,22.0,10.0,...,0.0,3.0,Source Verified,0.0,2015,12,3.84,45000.0,0.222222,687.0


In [None]:
# One Hot Encoding 
one_hot_enc = OneHotEncoder(sparse=False, drop='first')
categorical_features = ['term', 'grade', 'sub_grade', 'emp_length', 'home_ownership']

In [None]:
print(list(accepted_df.columns))

In [None]:
accepted_df["loan_status"].unique()

In [None]:
accepted_df[["id"]]

In [None]:
accepted_df['dti_final'] = accepted_df['dti_joint'].fillna(accepted_df['dti'])

In [None]:
accepted_df["annual_inc"].unique()
