# Project: Automated Loan Approval System using ML Classifier

## Made By: Siddhartha Patra

In [2]:
# Installing Dependencies
#!pip install fancyimpute==0.7.0
#!pip install imblearn==0.0
#!pip install joblib==1.0.1
#!pip install matplotlib==3.3.4
#!pip install numpy==1.20.1
#!pip install pandas==1.3.5
#!pip install scikit_learn==0.24.1
#!pip install scipy==1.6.2
#!pip install seaborn==0.11.1
#!pip install six==1.15.0
#!pip install xgboost==1.5.1

In [3]:
from fancyimpute import KNN, SoftImpute

In [5]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import statistics
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
from sklearn.preprocessing import LabelBinarizer,StandardScaler,OrdinalEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from scipy.stats import boxcox
from sklearn.linear_model import LogisticRegression,RidgeClassifier, PassiveAggressiveClassifier
from sklearn import metrics
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import plot_importance
from matplotlib import pyplot
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
import joblib

%matplotlib inline

In [7]:
#!pip install imblearn==0.0

In [8]:
# Import necessary modules and perform modifications for compatibility
import operator

import six
import sys
sys.modules['sklearn.externals.six'] = six

import sklearn.neighbors._base
sys.modules['sklearn.neighbors.base'] = sklearn.neighbors._base

from sklearn.utils import _safe_indexing
sys.modules['sklearn.utils.safe_indexing'] = sklearn.utils._safe_indexing

In [9]:
# Import SMOTE from imblearn.over_sampling
from imblearn.over_sampling import SMOTE

In [10]:
# Importing Test Data
test = pd.read_csv("https://s3.amazonaws.com/hackerday.datascience/358/test_data.csv")

In [11]:
# List of categorical columns
cat_cols = ['Term','Years in current job','Home Ownership','Purpose']

# Encoding categorical variables using factorize function
for c in cat_cols:
    test[c] = pd.factorize(test[c])[0]

In [12]:
#Imputing missing data with soft impute
updated_test_data=pd.DataFrame(data=SoftImpute().fit_transform(test[test.columns[3:19]],), columns=test[test.columns[3:19]].columns, index=test.index)

[SoftImpute] Max Singular Value of X_init = 220732.319189
[SoftImpute] Iter 1: observed MAE=393.868714 rank=4
[SoftImpute] Iter 2: observed MAE=393.835770 rank=4
[SoftImpute] Iter 3: observed MAE=393.823069 rank=4
[SoftImpute] Iter 4: observed MAE=393.816892 rank=4
[SoftImpute] Iter 5: observed MAE=393.813120 rank=4
[SoftImpute] Iter 6: observed MAE=393.810394 rank=4
[SoftImpute] Iter 7: observed MAE=393.808205 rank=4
[SoftImpute] Iter 8: observed MAE=393.806333 rank=4
[SoftImpute] Iter 9: observed MAE=393.804668 rank=4
[SoftImpute] Iter 10: observed MAE=393.803152 rank=4
[SoftImpute] Iter 11: observed MAE=393.801748 rank=4
[SoftImpute] Iter 12: observed MAE=393.800437 rank=4
[SoftImpute] Iter 13: observed MAE=393.799203 rank=4
[SoftImpute] Iter 14: observed MAE=393.798037 rank=4
[SoftImpute] Iter 15: observed MAE=393.796931 rank=4
[SoftImpute] Iter 16: observed MAE=393.795881 rank=4
[SoftImpute] Iter 17: observed MAE=393.794882 rank=4
[SoftImpute] Iter 18: observed MAE=393.793931 rank

In [13]:
#Getting the dataset ready pd.get dummies function for dropping the dummy variables
test_data = pd.get_dummies(updated_test_data, drop_first=True)

In [20]:
# Loading the trained Gradient Boosting Classifier model
gbm_pickle = joblib.load('C:/Users/Siddhartha/Loan Eligibility Prediction using Gradient Boosting Classifier/Source Code/model/GBM_Model_version1.pkl')

In [22]:
# Making predictions using the loaded Gradient Boosting Classifier model
y_pred = gbm_pickle.predict(test_data)
print(y_pred)

[1 1 1 1 1 1 1 1 1]




In [24]:
# Obtaining class probabilities using the loaded Gradient Boosting Classifier model
y_pred = gbm_pickle.predict_proba(test_data)
print(y_pred)

[[0.44270122 0.55729878]
 [0.29316515 0.70683485]
 [0.42145883 0.57854117]
 [0.45898323 0.54101677]
 [0.24807445 0.75192555]
 [0.30432189 0.69567811]
 [0.45898323 0.54101677]
 [0.3622683  0.6377317 ]
 [0.45898323 0.54101677]]




In [25]:
# Converting class probabilities to predicted loan approval status
y_pred_1=np.where(y_pred ==0, 'Loan Approved', 'Loan Rejected')
print(y_pred_1)

[['Loan Rejected' 'Loan Rejected']
 ['Loan Rejected' 'Loan Rejected']
 ['Loan Rejected' 'Loan Rejected']
 ['Loan Rejected' 'Loan Rejected']
 ['Loan Rejected' 'Loan Rejected']
 ['Loan Rejected' 'Loan Rejected']
 ['Loan Rejected' 'Loan Rejected']
 ['Loan Rejected' 'Loan Rejected']
 ['Loan Rejected' 'Loan Rejected']]


In [26]:
# Adding the predicted loan status to the test dataset
test['Loan Status']=y_pred_1

# Saving the updated test dataset to a CSV file
test.to_csv('Output_Test.csv',index=False)

In [27]:
y_pred_1

array([['Loan Rejected', 'Loan Rejected'],
       ['Loan Rejected', 'Loan Rejected'],
       ['Loan Rejected', 'Loan Rejected'],
       ['Loan Rejected', 'Loan Rejected'],
       ['Loan Rejected', 'Loan Rejected'],
       ['Loan Rejected', 'Loan Rejected'],
       ['Loan Rejected', 'Loan Rejected'],
       ['Loan Rejected', 'Loan Rejected'],
       ['Loan Rejected', 'Loan Rejected']], dtype='<U13')