# Bank Loan Approval : Classification problem 

In [1]:
import pandas as pd
import numpy as np

In [2]:
loan_data = pd.read_excel("loan.xlsx")
loan_data.head()

Unnamed: 0,Sex,Age,Time_at_address,Res_status,Telephone,Occupation,Job_status,Time_employed,Time_bank,Liab_ref,Acc_ref,Home_Expn,Balance,Decision
0,M,50.75,0.585,owner,given,unemploye,unemploye,0,0,f,given,145,0,reject
1,M,19.67,10.0,rent,not_given,labourer,governmen,0,0,t,given,140,0,reject
2,F,52.830002,15.0,owner,given,creative_,private_s,5,14,f,given,0,2200,accept
3,M,22.67,2.54,rent,not_given,creative_,governmen,2,0,f,given,0,0,accept
4,M,29.25,13.0,owner,given,driver,governmen,0,0,f,given,228,0,reject


Dependent variable : __Decision__ <br>
Independent variables : __Res_status, Occupation, Job_status, Liab_ref, Acc_ref (Categorical variables)__ <br>

In [3]:
data2 = pd.DataFrame(loan_data, columns=['Res_status', 'Occupation', 'Job_status', 'Liab_ref', 'Acc_ref', 'Decision'])
data2.head()

Unnamed: 0,Res_status,Occupation,Job_status,Liab_ref,Acc_ref,Decision
0,owner,unemploye,unemploye,f,given,reject
1,rent,labourer,governmen,t,given,reject
2,owner,creative_,private_s,f,given,accept
3,rent,creative_,governmen,f,given,accept
4,owner,driver,governmen,f,given,reject


In [4]:
print("Data2 shape: ",data2.shape)

Data2 shape:  (429, 6)


### Adding custom test data to check our model:

In [5]:
### Sample data:
Tdata = {'Res_status':['owner','rent'], 'Occupation':['creative_','creative_'], 'Job_status':['governmen','governmen'],
         'Liab_ref':['f','f'], 'Acc_ref':['given','given']}
df = pd.DataFrame(data=Tdata)

### Appending test samples in the dataset
data2 = data2.append(df, ignore_index=True)

print("Data2 shape after appending test samples: ",data2.shape)

Data2 shape after appending test samples:  (431, 6)


In [6]:
### Splitting dependent & independent variables

Y = data2['Decision']  #Dependent variable
X = data2.drop(['Decision'],axis=1) #Independent variables


### Creating dummy variables (Independent variables contains categorical data)
X=pd.get_dummies(X)

In [7]:
### Splitting train & test samples:

X_train = X[:429]
X_test = X[429:]
Y_train = Y[:429]
Y_test = Y[429:]

### Note: As we have created dummy variables, We don't need to do Normalization on dataset. It is already in Normalized format.  

### Approach:
    -1. Naive Bayes classifier
    -2. Logistic Regression

## Naive Bayes classifier:

In [12]:
from sklearn.naive_bayes import GaussianNB

# Instantiate the model
gnb = GaussianNB()

# fitting the model with training data
gnb.fit(X_train, Y_train)

# Predicting for test samples
y_pred = gnb.predict(X_test)

print("For Input:")
print("1. owner, creative_, governmen, f, given:  ",y_pred[0])
print("2. rent, creative_, governmen, f, given:  ",y_pred[1])


For Input:
1. owner, creative_, governmen, f, given:   accept
2. rent, creative_, governmen, f, given:   accept


In [9]:
### Metric:
print('Training set score of this model: {:.4f}'.format(gnb.score(X_train, Y_train)))

Training set score of this model: 0.5524


## Logistic Regression:

In [13]:
from sklearn.linear_model import LogisticRegression

# Instantiate the model
lrdg = LogisticRegression(random_state=0)

# fitting the model with training data
lrdg.fit(X_train, Y_train)

# Predicting for test samples
Y_pred = lrdg.predict(X_test)

print("For Input:")
print("1. owner, creative_, governmen, f, given:  ",Y_pred[0])
print("2. rent, creative_, governmen, f, given:  ",Y_pred[1])


For Input:
1. owner, creative_, governmen, f, given:   accept
2. rent, creative_, governmen, f, given:   reject


In [11]:
print('Training set score of this model: {:.4f}'.format(lrdg.score(X_train, Y_train)))

Training set score of this model: 0.6807
