# Importing All The Model Building Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MaxAbsScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score
import pickle


# Reading Test and Train

In [2]:
train = pd.read_csv('train.csv')

In [3]:
test = pd.read_csv('test.csv')

In [4]:
train.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,0,0,0,0,0,5000,2541.0,151.0,480.0,1,0,0
1,0,0,0,1,0,2165,0.0,70.0,360.0,1,1,1
2,1,1,2,0,0,16666,0.0,275.0,360.0,1,2,1
3,0,0,0,0,0,5417,0.0,143.0,480.0,0,2,0
4,1,1,0,0,0,4583,5625.0,255.0,360.0,1,1,1


# Scaling the Data

In [5]:
scaler = MaxAbsScaler()

In [6]:
train_y = train.iloc[:,-1]

In [7]:
train_x = train.drop('Loan_Status',axis=1)

In [8]:
test_y = test.iloc[:,-1]

In [9]:
test_x = test.drop('Loan_Status',axis=1)

In [10]:
x = pd.concat([train_x,test_x],axis=0)
y = pd.concat([train_y,test_y],axis=0)

In [11]:
train_x = scaler.fit_transform(train_x)

In [12]:
test_x = scaler.transform(test_x)

## Decision Tree Model

In [13]:
def decisionTree(train_x,test_x,train_y,test_y):
    dt = DecisionTreeClassifier()
    dt.fit(train_x,train_y)
    y_pred = dt.predict(test_x)
    print("**** Decision Tree Classifier ****")
    print('Confusion Matrix')
    print(confusion_matrix(test_y,y_pred))
    print('Classification Report')
    print(classification_report(test_y,y_pred))

In [14]:
decisionTree(train_x,test_x,train_y,test_y)

**** Decision Tree Classifier ****
Confusion Matrix
[[23 15]
 [23 62]]
Classification Report
              precision    recall  f1-score   support

           0       0.50      0.61      0.55        38
           1       0.81      0.73      0.77        85

    accuracy                           0.69       123
   macro avg       0.65      0.67      0.66       123
weighted avg       0.71      0.69      0.70       123



## Random Forest Model

In [15]:
def randomForest(train_x,test_x,train_y,test_y):
    rf = RandomForestClassifier()
    rf.fit(train_x,train_y)
    y_pred = rf.predict(test_x)
    print("**** Random Forest Classifier ****")
    print('Confusion Matrix')
    print(confusion_matrix(test_y,y_pred))
    print('Classification Report')
    print(classification_report(test_y,y_pred))

In [16]:
randomForest(train_x,test_x,train_y,test_y)

**** Random Forest Classifier ****
Confusion Matrix
[[16 22]
 [ 4 81]]
Classification Report
              precision    recall  f1-score   support

           0       0.80      0.42      0.55        38
           1       0.79      0.95      0.86        85

    accuracy                           0.79       123
   macro avg       0.79      0.69      0.71       123
weighted avg       0.79      0.79      0.77       123



## KNN Model

In [17]:
def knn(train_x,test_x,train_y,test_y):
    knn = KNeighborsClassifier()
    knn.fit(train_x,train_y)
    y_pred = knn.predict(test_x)
    print("**** KNeighbour Classifier ****")
    print('Confusion Matrix')
    print(confusion_matrix(test_y,y_pred))
    print('Classification Report')
    print(classification_report(test_y,y_pred))

In [18]:
knn(train_x,test_x,train_y,test_y)

**** KNeighbour Classifier ****
Confusion Matrix
[[16 22]
 [ 2 83]]
Classification Report
              precision    recall  f1-score   support

           0       0.89      0.42      0.57        38
           1       0.79      0.98      0.87        85

    accuracy                           0.80       123
   macro avg       0.84      0.70      0.72       123
weighted avg       0.82      0.80      0.78       123



## Xgboost Model

In [19]:
def xgboost(train_x,test_x,train_y,test_y):
    xg = GradientBoostingClassifier()
    xg.fit(train_x,train_y)
    y_pred = xg.predict(test_x)
    print("**** Gradient Boosting Classifier ****")
    print('Confusion Matrix')
    print(confusion_matrix(test_y,y_pred))
    print('Classification Report')
    print(classification_report(test_y,y_pred))

In [20]:
xgboost(train_x,test_x,train_y,test_y)

**** Gradient Boosting Classifier ****
Confusion Matrix
[[16 22]
 [ 3 82]]
Classification Report
              precision    recall  f1-score   support

           0       0.84      0.42      0.56        38
           1       0.79      0.96      0.87        85

    accuracy                           0.80       123
   macro avg       0.82      0.69      0.71       123
weighted avg       0.81      0.80      0.77       123



## Evaluating Performance Of The Model

In [21]:
rf = RandomForestClassifier()
rf.fit(train_x,train_y)
ypred = rf.predict(test_x)


In [22]:
f1_score(ypred,test_y,average='weighted')

0.8372934697088907

In [23]:
cv = cross_val_score(rf,x,y,cv=5)

In [24]:
np.mean(cv)

0.7964014394242304

## Saving The Model

In [25]:
pickle.dump(rf,open('rdf.pkl','wb'))
pickle.dump(scaler,open('scale.pkl','wb'))