# **Beginners Loan prediction for Machine Learning**
Loan prediction Dataset is used to predict Loan Eligibility for "Dream Housing Finance company" 

## Step 1 - Importing Library Files

In [1]:
import pandas as pd                                   #working with our data
from sklearn.linear_model import LogisticRegression   #Model Algorithm
from sklearn.model_selection import train_test_split  #Dividing the dataset
from sklearn.preprocessing import LabelEncoder        #Encoding categorical variables
from sklearn.metrics import accuracy_score            #Accuracy Score
from sklearn.metrics import classification_report     #Confusion matrix

## Step 2 - Importing Dataset

In [2]:
data=pd.read_csv("train_ctrUa4K.csv")
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [3]:
data.columns 

Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')

### Encoding the Categorical Target Variable using LabelEncoder

In [4]:
encode=LabelEncoder()

In [5]:
data['Loan_Status']=encode.fit_transform(data['Loan_Status'])

## Step 3 - Data Preprocessing

In [6]:
#Checking for missing values
data.isnull().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [7]:
# drop the null values
data.dropna(how='any',inplace=True)

In [8]:
data.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,0
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,1
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,1
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,1
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,1


## Step 4 - Dividing the dataset into Train & Test Sets

In [9]:
train,test=train_test_split(data, test_size=.2,random_state=0)

In [10]:
print('Shape of Training set: ', train.shape)
print('Shape of Testing set: ', test.shape)

Shape of Training set:  (384, 13)
Shape of Testing set:  (96, 13)


### Seperating the Features & Target Variables for Training set

In [11]:
train_x=train.drop(columns=['Loan_ID','Loan_Status'],axis=1)
train_y=train['Loan_Status']

### Seperating the Features & Target Variables for Test set

In [12]:
test_x=test.drop(columns=['Loan_ID','Loan_Status'],axis=1)
test_y=test['Loan_Status']

### Encoding the dataset

In [13]:
train_x = pd.get_dummies(train_x)
test_x  = pd.get_dummies(test_x)

In [14]:
print('shape of training data : ',train_x.shape)
print('shape of testing data : ',test_x.shape)

shape of training data :  (384, 20)
shape of testing data :  (96, 20)


## Step 5 - Training our Model

In [15]:
model=LogisticRegression()

In [16]:
model.fit(train_x,train_y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression()

## Step 6 - Using our model to predict data

In [17]:
predict=model.predict(test_x)

In [18]:
print('Predicted the species on test data: ',encode.inverse_transform(predict))

Predicted the species on test data:  ['Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'Y' 'Y' 'Y' 'Y' 'Y'
 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'N' 'N' 'N' 'N' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y'
 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'Y' 'N' 'Y' 'Y' 'N' 'Y' 'Y' 'N'
 'Y' 'N' 'N' 'Y' 'Y' 'Y']


## Step 7 - Evaluation Metrics to Determine the quality of our Model

In [19]:
accuracy_score(test_y,predict)

0.7291666666666666

In [20]:
print(classification_report(test_y,predict))

              precision    recall  f1-score   support

           0       0.85      0.31      0.46        35
           1       0.71      0.97      0.82        61

    accuracy                           0.73        96
   macro avg       0.78      0.64      0.64        96
weighted avg       0.76      0.73      0.69        96

