# Application 3: Loan Eligibility Predictor

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import datasets, preprocessing 
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
import ipywidgets as widgets
from IPython.display import clear_output

In [3]:
data = pd.read_csv('data.csv')

In [4]:
data['Gender'].fillna(
    data[ 'Gender'].mode()[0], 
    inplace=True
) 

data['Married'].fillna(
    data['Married'].mode()[0], 
    inplace=True
) 

data['Dependents'].fillna(
    data[ 'Dependents'].mode()[0], 
    inplace=True
) 

data['Self_Employed'].fillna(
    data['Self_Employed'].mode()[0], 
    inplace=True
) 

data['Credit_History'].fillna(
    data['Credit_History'].mode()[0], 
    inplace=True
) 

data['Loan_Amount_Term'].fillna(
    data[ 'Loan_Amount_Term' ].mode()[0], 
    inplace=True
) 

In [5]:
data['LoanAmount'].fillna(
    data['LoanAmount'].median(), 
    inplace=True
) 

In [6]:
data['LoanAmount_log'] = np.log(data['LoanAmount'])

In [7]:
classifiers = []
lr_classifier = LogisticRegression()
classifiers.append(lr_classifier)
lda_classifier = LinearDiscriminantAnalysis()
classifiers.append(lda_classifier)
svc_classifier = SVC()
classifiers.append(svc_classifier)
kn_classifier = KNeighborsClassifier()
classifiers.append(kn_classifier)
gnb_classifier = GaussianNB()
classifiers.append(gnb_classifier)
dt_classifier = DecisionTreeClassifier(random_state = 43)
classifiers.append(dt_classifier)
rf_classifier = RandomForestClassifier(random_state = 43)
classifiers.append(rf_classifier)

In [8]:
data['Total_Income'] = data['ApplicantIncome'] + data['CoapplicantIncome']

In [9]:
data['Total_Income_log'] = np.log(data['Total_Income'])

In [10]:
data['EMI'] = data['LoanAmount'] / data['Loan_Amount_Term']

In [11]:
data['EMI_log'] = np.log(data['EMI'])

In [12]:
data['Balance Income'] = data['Total_Income'] - (data['EMI'] * 1000) #Multiply with 1000 to make the units equal


In [13]:
data['Balance Income_log'] = np.log(data['Balance Income'])

In [14]:
data = data.drop(['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term'], axis=1) 

In [15]:
data['Balance Income_log'].fillna(
    data['Balance Income_log'].median(), 
    inplace=True
) 

In [16]:
X = data.drop('Loan_Status', 1) 
y = data.Loan_Status

In [17]:
X = pd.get_dummies(X) 

In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state = 0)

In [19]:
for classifier in classifiers:
    print(type(classifier))
    classifier.fit(X_train, Y_train)

<class 'sklearn.linear_model._logistic.LogisticRegression'>
<class 'sklearn.discriminant_analysis.LinearDiscriminantAnalysis'>
<class 'sklearn.svm._classes.SVC'>
<class 'sklearn.neighbors._classification.KNeighborsClassifier'>
<class 'sklearn.naive_bayes.GaussianNB'>
<class 'sklearn.tree._classes.DecisionTreeClassifier'>
<class 'sklearn.ensemble._forest.RandomForestClassifier'>


In [20]:
Gender = widgets.Dropdown(
    options = [('Male'), 
               ('Female')
              ],
    disabled = False, 
    description="Gender"
)

Married = widgets.Dropdown(
    options = [('Yes'), 
               ('No')
              ],
    disabled = False,
    description="Married"
)

Dependents = widgets.Dropdown(
    options = [('0'), 
               ('1'),
               ('2'),
               ('3+')
              ],
    disabled = False,
    description="Dependents"
)

Education = widgets.Dropdown(
    options = [('Graduate'), 
               ('Not Graduate')
              ],
    disabled = False,
    description="Education"
)

Self_Employed = widgets.Dropdown(
    options = [('Yes'), 
               ('No')
              ],
    disabled = False,
    description="Self_Employed"
)

ApplicantIncome = widgets.Text(description="ApplicantIncome")
CoapplicantIncome = widgets.Text(description="CoapplicantIncome")
LoanAmount = widgets.Text(description="LoanAmount")
Loan_Amount_Term = widgets.Text(description="Loan_Amount_Term")

Credit_History = widgets.Dropdown(
    options = [('0'), 
               ('1')
              ],
    disabled = False,
    description="Credit_History"
)

Property_Area = widgets.Dropdown(
    options = [('Semiurban'), 
               ('Urban'),
               ('Rural')
              ],
    disabled = False,
    description="Property_Area"
)

In [21]:
print("Please enter the loan application details:")
display(Gender)
display(Married)
display(Dependents)
display(Education)
display(Self_Employed )
display(ApplicantIncome )
display(CoapplicantIncome)
display(LoanAmount)
display(Loan_Amount_Term )
display(Credit_History)
display(Property_Area)

Dropdown(description='Gender', options=('Male', 'Female'), value='Male')

Dropdown(description='Married', options=('Yes', 'No'), value='Yes')

Dropdown(description='Dependents', options=('0', '1', '2', '3+'), value='0')

Dropdown(description='Education', options=('Graduate', 'Not Graduate'), value='Graduate')

Dropdown(description='Self_Employed', options=('Yes', 'No'), value='Yes')

Text(value='', description='ApplicantIncome')

Text(value='', description='CoapplicantIncome')

Text(value='', description='LoanAmount')

Text(value='', description='Loan_Amount_Term')

Dropdown(description='Credit_History', options=('0', '1'), value='0')

Dropdown(description='Property_Area', options=('Semiurban', 'Urban', 'Rural'), value='Semiurban')

In [22]:
algorithm = widgets.Dropdown(
    options = [('Logistic Regression', 'LR'), 
               ('Linear Discriminant Analysis ', 'LDA'), 
               ('Support Vector Machines', 'SVM'),
               ('K-Nearest Neighbors', 'KN'),
               ('Naive Bayes', 'NB'),
               ('Decision Trees', 'DT'),
               ('Random Forest', 'RF'),
              ],
    disabled = False,
)

print('Select Algorithm')
display(algorithm)

Select Algorithm


Dropdown(options=(('Logistic Regression', 'LR'), ('Linear Discriminant Analysis ', 'LDA'), ('Support Vector Ma…

In [23]:
prediction = widgets.Output()

button_predict = widgets.Button(description="Predict")

def on_button_predict_clicked(b):
    
    input_data = {}
    input_data['Gender'] = Gender.value
    input_data['Married'] = Married.value
    input_data['Dependents'] = Dependents.value
    input_data['Education'] = Education.value
    input_data['Self_Employed'] = Self_Employed.value
    input_data['ApplicantIncome'] = float(ApplicantIncome.value)
    input_data['CoapplicantIncome'] = float(CoapplicantIncome.value)
    input_data['LoanAmount'] = float(LoanAmount.value)
    input_data['Loan_Amount_Term'] = float(Loan_Amount_Term.value)
    input_data['Credit_History'] = float(Credit_History.value)
    input_data['Property_Area'] = Property_Area.value
    
    user_input = pd.DataFrame(input_data, columns = ['Gender','Married','Dependents','Education','Self_Employed','ApplicantIncome','CoapplicantIncome','LoanAmount','Loan_Amount_Term','Credit_History','Property_Area'], index=[0])
    
    user_input['Total_Income'] = user_input['ApplicantIncome'] + user_input['CoapplicantIncome']
    user_input['Total_Income_log'] = np.log(user_input['Total_Income'])
    user_input['EMI'] = user_input['LoanAmount'] / user_input['Loan_Amount_Term']
    user_input['EMI_log'] = np.log(user_input['EMI'])
    user_input['Balance Income'] = user_input['Total_Income'] - (user_input['EMI'] * 1000)
    user_input['Balance Income_log'] = np.log(user_input['Balance Income'])
    user_input = user_input.drop(['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term'], axis=1) 
    user_input = pd.get_dummies(user_input) 
    
    missing_cols = set(X_train.columns ) - set(user_input.columns)
    for c in missing_cols:
        user_input[c] = 0
    # Ensure the order of column in the test set is in the same order than in train set
    user_input = user_input[X_train.columns]
    
    selected_algorithm = algorithm.value
    
    if selected_algorithm == 'LR':
        classifier = lr_classifier
    elif selected_algorithm == 'LDA':
        classifier = lda_classifier
    elif selected_algorithm == 'SVM':
        classifier = svc_classifier        
    elif selected_algorithm == 'KN':
        classifier = kn_classifier
    elif selected_algorithm == 'NB':
        classifier = gnb_classifier
    elif selected_algorithm == 'DT':
        classifier = dt_classifier
    elif selected_algorithm == 'RF':
        classifier = rf_classifier
        
    with prediction:
        clear_output(True)
        #print(user_input)
        print(f'Selected Algorithm = {selected_algorithm}')
        print(classifier.predict(user_input)[0])
        
button_predict.on_click(on_button_predict_clicked)

In [24]:
display(button_predict)
display(prediction)

Button(description='Predict', style=ButtonStyle())

Output()