# Loan Approval Prediction Model

### Importing Necessary Libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

### Reading the files

In [2]:
loanData = pd.read_csv('LoanData.csv')

LoanPrep = loanData.copy()

LoanPrep.head()


Unnamed: 0,gender,married,ch,income,loanamt,status
0,Male,No,1.0,5849,,Y
1,Male,Yes,1.0,4583,128.0,N
2,Male,Yes,1.0,3000,66.0,Y
3,Male,Yes,1.0,2583,120.0,Y
4,Male,No,1.0,6000,141.0,Y


### Checking number of unfilled entries 

In [3]:
LoanPrep.isnull().sum(axis=0)

gender     13
married     3
ch         50
income      0
loanamt    22
status      0
dtype: int64

### Dropping all rows with incomplete data

In [4]:
LoanPrep = LoanPrep.dropna()
LoanPrep.isnull().sum(axis=0)

gender     0
married    0
ch         0
income     0
loanamt    0
status     0
dtype: int64

### Categorising appropriate columns

In [5]:
LoanPrep = pd.get_dummies(LoanPrep, drop_first=True)
LoanPrep.head()

Unnamed: 0,ch,income,loanamt,gender_Male,married_Yes,status_Y
1,1.0,4583,128.0,True,True,False
2,1.0,3000,66.0,True,True,True
3,1.0,2583,120.0,True,True,True
4,1.0,6000,141.0,True,False,True
5,1.0,5417,267.0,True,True,True


### Normalizing required columns

In [6]:
scaler = StandardScaler()

LoanPrep['income'] = scaler.fit_transform(LoanPrep[['income']])
LoanPrep['loanamt'] = scaler.fit_transform(LoanPrep[['loanamt']])
LoanPrep.head()

Unnamed: 0,ch,income,loanamt,gender_Male,married_Yes,status_Y
1,1.0,-0.128073,-0.19425,True,True,False
2,1.0,-0.392077,-0.971015,True,True,True
3,1.0,-0.461621,-0.294478,True,True,True
4,1.0,0.108246,-0.03138,True,False,True
5,1.0,0.011017,1.547205,True,True,True


### Spliting the data into Training and Testing Data

In [None]:
X = LoanPrep.drop(['status_Y'], axis=1)
Y = LoanPrep['status_Y']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,random_state=1235, stratify=Y)


Stratified so that such training set is not created which heavily involves only one outcome

### Initializing a Logistic Regressor and training it

In [8]:
regressor = LogisticRegression()

regressor.fit(X_train, Y_train)


### Predicting the result of Test Data

In [9]:
Y_predict = regressor.predict(X_test)
print(Y_predict)

[False  True  True  True  True  True False False  True False  True  True
  True  True  True  True  True  True  True  True  True False  True False
  True  True  True  True  True  True  True  True  True  True False  True
 False  True  True  True  True  True  True  True False  True  True  True
 False  True  True  True  True  True  True  True  True  True False  True
  True False False  True  True False  True  True  True  True  True  True
 False  True  True  True  True  True  True False  True False  True  True
 False  True  True  True  True  True  True  True  True  True  True  True
  True  True  True False  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True False
  True False  True  True  True  True False  True False  True False False
  True  True False  True  True  True  True  True  True  True  True  True
  True False False False  True False False  True  True  True  True  True
  True  True  True]


## Confusion Matrix
to analyze the accuracy of our predictions 

                
             Pridicted Negatives  Predicted Positives
                    
Actual Negatives:    True Negatives        Flase Positives

Actual Positives:    Flase Negatives         True Positives

In [10]:
from sklearn.metrics import confusion_matrix
cmf = confusion_matrix(Y_test, Y_predict)
print(cmf)

[[ 29  20]
 [  2 108]]


### Scoring the Predictions

In [11]:
score = regressor.score(X_test, Y_test)
print(score)

0.8616352201257862


An accuracy of 86.16% is obtained through the Logistic Model

lets now save the Predictions into a CSV File

In [12]:
test = pd.DataFrame(X_test)
prediction = pd.DataFrame()
prediction['result'] = Y_predict
test.to_csv('test.csv', index= False)
prediction.to_csv('Predictions.csv', header=True, index= False)