In [24]:
from fastai.tabular.all import *
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [25]:
df = pd.read_csv('../credit_risk_dataset.csv')
display(df)

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,RENT,123.0,PERSONAL,D,35000,16.02,1,0.59,Y,3
1,21,9600,OWN,5.0,EDUCATION,B,1000,11.14,0,0.10,N,2
2,25,9600,MORTGAGE,1.0,MEDICAL,C,5500,12.87,1,0.57,N,3
3,23,65500,RENT,4.0,MEDICAL,C,35000,15.23,1,0.53,N,2
4,24,54400,RENT,8.0,MEDICAL,C,35000,14.27,1,0.55,Y,4
...,...,...,...,...,...,...,...,...,...,...,...,...
32576,57,53000,MORTGAGE,1.0,PERSONAL,C,5800,13.16,0,0.11,N,30
32577,54,120000,MORTGAGE,4.0,PERSONAL,A,17625,7.49,0,0.15,N,19
32578,65,76000,RENT,3.0,HOMEIMPROVEMENT,B,35000,10.99,1,0.46,N,28
32579,56,150000,MORTGAGE,5.0,PERSONAL,B,15000,11.48,0,0.10,N,26


In [26]:
categorical_cols = ['person_home_ownership', 'loan_intent']
continuous_cols = ['person_age', 'person_income', 'person_emp_length', 'loan_amnt', 'loan_int_rate', 'loan_percent_income', 'cb_person_cred_hist_length']
binary_col  = 'cb_person_default_on_file'

ordinal_col = 'loan_grade'
custom_order = ['A', 'B', 'C', 'D', 'E']

target = 'loan_status'


In [27]:
class EncodeBinary(Transform):
    def encodes(self, df: pd.DataFrame):
        df[binary_col] = df[binary_col].map({'Y': 1, 'N': 0})

class EncodeOrdinal(Transform):
    def encodes(self, df: pd.DataFrame):
        df[ordinal_col] = df[ordinal_col].map({val: i for i, val in enumerate(custom_order)})

In [28]:
procs = [Categorify, FillMissing, EncodeBinary(), EncodeOrdinal()] # Normalize


splits = RandomSplitter(valid_pct=0.2)(range_of(df))

to = TabularPandas(df, procs=procs, cat_names=categorical_cols, cont_names=continuous_cols, y_names=target, splits=splits)
dataloader = to.dataloaders(bs=64)
dataloader.show_batch()

Unnamed: 0,person_home_ownership,loan_intent,person_emp_length_na,loan_int_rate_na,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,loan_status
0,RENT,PERSONAL,False,False,27.0,64000.0,0.0,10000.0,15.27,0.16,10.0,0.0
1,RENT,EDUCATION,False,False,28.0,42000.0,0.0,12000.0,11.11,0.29,9.0,0.0
2,MORTGAGE,MEDICAL,False,False,27.0,110000.0,3.0,4800.0,7.74,0.04,7.0,0.0
3,RENT,VENTURE,False,False,29.0,29000.0,3.0,5000.0,15.23,0.17,7.0,0.0
4,RENT,MEDICAL,False,True,23.0,78000.0,7.0,20000.0,10.99,0.26,4.0,0.0
5,MORTGAGE,DEBTCONSOLIDATION,False,False,25.0,60000.0,9.0,9600.0,6.03,0.16,3.0,0.0
6,MORTGAGE,DEBTCONSOLIDATION,False,False,23.0,39500.0,3.0,10000.0,14.22,0.25,2.0,0.0
7,MORTGAGE,HOMEIMPROVEMENT,False,False,25.0,63000.0,9.0,20000.0,8.49,0.32,2.0,0.0
8,RENT,PERSONAL,False,False,24.0,56000.0,8.0,5000.0,14.27,0.09,4.0,0.0
9,RENT,DEBTCONSOLIDATION,False,False,22.0,27000.0,5.0,2925.0,10.71,0.11,3.0,0.0


In [29]:
learn = tabular_learner(dataloader, layers=[200,100], metrics=accuracy)
learn.fit_one_cycle(5)

epoch,train_loss,valid_loss,accuracy,time
0,0.130274,0.11195,0.784837,00:05
1,0.107941,0.110288,0.784837,00:05
2,0.098812,0.095115,0.784837,00:05
3,0.092986,0.091768,0.784837,00:06
4,0.091899,0.092348,0.784837,00:05
