In [1]:
# Import Required Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Load The Train and Test Dataset
train_df = pd.read_csv("Dataset/train_data.csv")
test_df = pd.read_csv("Dataset/test_data.csv")

In [3]:
train_df.drop(["Loan_ID","Dependents"],axis=1,inplace=True)

In [4]:
test_df.drop(["Loan_ID","Dependents"],axis=1,inplace=True)

In [5]:
train_df

Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,Male,No,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,Male,Yes,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,Male,Yes,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,Male,Yes,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,Male,No,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
...,...,...,...,...,...,...,...,...,...,...,...
609,Female,No,Graduate,No,2900,0.0,71.0,360.0,1.0,Rural,Y
610,Male,Yes,Graduate,No,4106,0.0,40.0,180.0,1.0,Rural,Y
611,Male,Yes,Graduate,No,8072,240.0,253.0,360.0,1.0,Urban,Y
612,Male,Yes,Graduate,No,7583,0.0,187.0,360.0,1.0,Urban,Y


In [6]:
# Data Exploration

In [7]:
train_df.shape

(614, 11)

In [8]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Gender             601 non-null    object 
 1   Married            611 non-null    object 
 2   Education          614 non-null    object 
 3   Self_Employed      582 non-null    object 
 4   ApplicantIncome    614 non-null    int64  
 5   CoapplicantIncome  614 non-null    float64
 6   LoanAmount         592 non-null    float64
 7   Loan_Amount_Term   600 non-null    float64
 8   Credit_History     564 non-null    float64
 9   Property_Area      614 non-null    object 
 10  Loan_Status        614 non-null    object 
dtypes: float64(4), int64(1), object(6)
memory usage: 52.9+ KB


In [9]:
train_df.isnull().sum()

Gender               13
Married               3
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [10]:
test_df.isnull().sum()

Gender               11
Married               0
Education             0
Self_Employed        23
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            5
Loan_Amount_Term      6
Credit_History       29
Property_Area         0
dtype: int64

In [11]:
# Handling Categorical Missing values of train_df
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy="most_frequent")
train_df[["Gender","Married","Self_Employed"]]=imputer.fit_transform(train_df[["Gender","Married","Self_Employed"]])

In [12]:
train_df.isnull().sum()

Gender                0
Married               0
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [13]:
# Handling Numerical Missing values of train_df
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy="mean")
train_df[["LoanAmount","Loan_Amount_Term","Credit_History"]]=imputer.fit_transform(train_df[["LoanAmount","Loan_Amount_Term","Credit_History"]])

In [14]:
train_df.isnull().sum().sum()

0

In [15]:
# Encoding the train dataset
from sklearn.preprocessing import OrdinalEncoder
ordinal_enc = OrdinalEncoder()
train_df[["Gender","Married","Education","Self_Employed","Property_Area","Loan_Status"]]=ordinal_enc.fit_transform(train_df[["Gender","Married","Education","Self_Employed","Property_Area","Loan_Status"]])

In [16]:
train_df

Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,1.0,0.0,0.0,0.0,5849,0.0,146.412162,360.0,1.0,2.0,1.0
1,1.0,1.0,0.0,0.0,4583,1508.0,128.000000,360.0,1.0,0.0,0.0
2,1.0,1.0,0.0,1.0,3000,0.0,66.000000,360.0,1.0,2.0,1.0
3,1.0,1.0,1.0,0.0,2583,2358.0,120.000000,360.0,1.0,2.0,1.0
4,1.0,0.0,0.0,0.0,6000,0.0,141.000000,360.0,1.0,2.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
609,0.0,0.0,0.0,0.0,2900,0.0,71.000000,360.0,1.0,0.0,1.0
610,1.0,1.0,0.0,0.0,4106,0.0,40.000000,180.0,1.0,0.0,1.0
611,1.0,1.0,0.0,0.0,8072,240.0,253.000000,360.0,1.0,2.0,1.0
612,1.0,1.0,0.0,0.0,7583,0.0,187.000000,360.0,1.0,2.0,1.0


In [17]:
test_df.isnull().sum()

Gender               11
Married               0
Education             0
Self_Employed        23
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount            5
Loan_Amount_Term      6
Credit_History       29
Property_Area         0
dtype: int64

In [18]:
# Handling Categorical Missing values of test_df
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy="most_frequent")
test_df[["Gender","Self_Employed"]]=imputer.fit_transform(test_df[["Gender","Self_Employed"]])

In [19]:
# Handling Numerical Missing values of test_df
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy="mean")
test_df[["LoanAmount","Loan_Amount_Term","Credit_History"]]=imputer.fit_transform(test_df[["LoanAmount","Loan_Amount_Term","Credit_History"]])

In [20]:
test_df.isnull().sum().sum()

0

In [22]:
# Split the data into train and test data
X=train_df.drop("Loan_Status",axis = 1)
y=train_df["Loan_Status"]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [23]:
# Import required Algorithms
from sklearn.tree import DecisionTreeClassifier,ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier,BaggingClassifier,VotingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

In [24]:
models = [DecisionTreeClassifier(),
          ExtraTreeClassifier(),
          RandomForestClassifier(),
          AdaBoostClassifier(),
          GradientBoostingClassifier(),
          BaggingClassifier(),
          XGBClassifier(),
          CatBoostClassifier()]

In [25]:
from sklearn.metrics import accuracy_score

In [30]:
for model in models:
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    print(model)
    print("Accuracy_Score :",accuracy_score(y_test,y_pred))
    print('\n')

DecisionTreeClassifier()
Accuracy_Score : 0.7096774193548387


ExtraTreeClassifier()
Accuracy_Score : 0.7258064516129032


RandomForestClassifier()
Accuracy_Score : 0.7419354838709677


AdaBoostClassifier()
Accuracy_Score : 0.7419354838709677


GradientBoostingClassifier()
Accuracy_Score : 0.7258064516129032


BaggingClassifier()
Accuracy_Score : 0.6774193548387096


XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
 

220:	learn: 0.4110778	total: 468ms	remaining: 1.65s
221:	learn: 0.4108288	total: 470ms	remaining: 1.65s
222:	learn: 0.4103684	total: 472ms	remaining: 1.65s
223:	learn: 0.4099432	total: 475ms	remaining: 1.64s
224:	learn: 0.4096501	total: 477ms	remaining: 1.64s
225:	learn: 0.4093443	total: 479ms	remaining: 1.64s
226:	learn: 0.4090308	total: 481ms	remaining: 1.64s
227:	learn: 0.4086766	total: 484ms	remaining: 1.64s
228:	learn: 0.4081836	total: 486ms	remaining: 1.64s
229:	learn: 0.4077962	total: 489ms	remaining: 1.64s
230:	learn: 0.4073740	total: 491ms	remaining: 1.63s
231:	learn: 0.4070664	total: 494ms	remaining: 1.63s
232:	learn: 0.4067233	total: 497ms	remaining: 1.64s
233:	learn: 0.4063790	total: 500ms	remaining: 1.64s
234:	learn: 0.4060343	total: 503ms	remaining: 1.64s
235:	learn: 0.4056897	total: 505ms	remaining: 1.63s
236:	learn: 0.4053367	total: 507ms	remaining: 1.63s
237:	learn: 0.4049202	total: 509ms	remaining: 1.63s
238:	learn: 0.4044484	total: 512ms	remaining: 1.63s
239:	learn: 

388:	learn: 0.3609073	total: 851ms	remaining: 1.34s
389:	learn: 0.3606003	total: 854ms	remaining: 1.34s
390:	learn: 0.3603847	total: 857ms	remaining: 1.33s
391:	learn: 0.3601432	total: 859ms	remaining: 1.33s
392:	learn: 0.3599151	total: 862ms	remaining: 1.33s
393:	learn: 0.3596889	total: 864ms	remaining: 1.33s
394:	learn: 0.3594323	total: 867ms	remaining: 1.33s
395:	learn: 0.3589017	total: 870ms	remaining: 1.33s
396:	learn: 0.3587968	total: 872ms	remaining: 1.32s
397:	learn: 0.3584773	total: 874ms	remaining: 1.32s
398:	learn: 0.3581854	total: 877ms	remaining: 1.32s
399:	learn: 0.3578813	total: 879ms	remaining: 1.32s
400:	learn: 0.3575808	total: 881ms	remaining: 1.31s
401:	learn: 0.3572653	total: 883ms	remaining: 1.31s
402:	learn: 0.3570035	total: 885ms	remaining: 1.31s
403:	learn: 0.3567041	total: 887ms	remaining: 1.31s
404:	learn: 0.3565371	total: 889ms	remaining: 1.31s
405:	learn: 0.3559754	total: 892ms	remaining: 1.3s
406:	learn: 0.3556621	total: 894ms	remaining: 1.3s
407:	learn: 0.

569:	learn: 0.3214352	total: 1.24s	remaining: 933ms
570:	learn: 0.3213507	total: 1.24s	remaining: 931ms
571:	learn: 0.3210747	total: 1.24s	remaining: 929ms
572:	learn: 0.3206617	total: 1.24s	remaining: 927ms
573:	learn: 0.3205124	total: 1.25s	remaining: 925ms
574:	learn: 0.3202753	total: 1.25s	remaining: 923ms
575:	learn: 0.3200880	total: 1.25s	remaining: 921ms
576:	learn: 0.3198676	total: 1.25s	remaining: 919ms
577:	learn: 0.3196553	total: 1.25s	remaining: 917ms
578:	learn: 0.3195372	total: 1.26s	remaining: 916ms
579:	learn: 0.3193344	total: 1.26s	remaining: 914ms
580:	learn: 0.3190209	total: 1.26s	remaining: 911ms
581:	learn: 0.3188452	total: 1.26s	remaining: 909ms
582:	learn: 0.3185310	total: 1.27s	remaining: 907ms
583:	learn: 0.3183060	total: 1.27s	remaining: 905ms
584:	learn: 0.3181500	total: 1.27s	remaining: 903ms
585:	learn: 0.3178231	total: 1.27s	remaining: 900ms
586:	learn: 0.3176641	total: 1.28s	remaining: 898ms
587:	learn: 0.3173232	total: 1.28s	remaining: 896ms
588:	learn: 

745:	learn: 0.2873937	total: 1.62s	remaining: 553ms
746:	learn: 0.2871864	total: 1.63s	remaining: 551ms
747:	learn: 0.2871301	total: 1.63s	remaining: 549ms
748:	learn: 0.2870160	total: 1.63s	remaining: 547ms
749:	learn: 0.2866883	total: 1.63s	remaining: 545ms
750:	learn: 0.2865668	total: 1.64s	remaining: 542ms
751:	learn: 0.2864225	total: 1.64s	remaining: 540ms
752:	learn: 0.2863627	total: 1.64s	remaining: 538ms
753:	learn: 0.2861857	total: 1.64s	remaining: 536ms
754:	learn: 0.2860820	total: 1.65s	remaining: 534ms
755:	learn: 0.2858102	total: 1.65s	remaining: 532ms
756:	learn: 0.2856287	total: 1.65s	remaining: 530ms
757:	learn: 0.2855409	total: 1.66s	remaining: 528ms
758:	learn: 0.2854636	total: 1.66s	remaining: 526ms
759:	learn: 0.2853831	total: 1.66s	remaining: 524ms
760:	learn: 0.2851595	total: 1.66s	remaining: 522ms
761:	learn: 0.2850702	total: 1.66s	remaining: 520ms
762:	learn: 0.2849275	total: 1.67s	remaining: 517ms
763:	learn: 0.2847664	total: 1.67s	remaining: 515ms
764:	learn: 

917:	learn: 0.2610886	total: 2.01s	remaining: 180ms
918:	learn: 0.2609875	total: 2.02s	remaining: 178ms
919:	learn: 0.2609128	total: 2.02s	remaining: 176ms
920:	learn: 0.2608230	total: 2.02s	remaining: 173ms
921:	learn: 0.2607631	total: 2.02s	remaining: 171ms
922:	learn: 0.2606285	total: 2.02s	remaining: 169ms
923:	learn: 0.2605568	total: 2.03s	remaining: 167ms
924:	learn: 0.2605225	total: 2.03s	remaining: 165ms
925:	learn: 0.2604128	total: 2.03s	remaining: 162ms
926:	learn: 0.2603281	total: 2.03s	remaining: 160ms
927:	learn: 0.2601985	total: 2.04s	remaining: 158ms
928:	learn: 0.2601291	total: 2.04s	remaining: 156ms
929:	learn: 0.2600030	total: 2.04s	remaining: 154ms
930:	learn: 0.2599384	total: 2.04s	remaining: 152ms
931:	learn: 0.2598863	total: 2.05s	remaining: 149ms
932:	learn: 0.2597536	total: 2.05s	remaining: 147ms
933:	learn: 0.2596890	total: 2.05s	remaining: 145ms
934:	learn: 0.2593873	total: 2.05s	remaining: 143ms
935:	learn: 0.2592766	total: 2.06s	remaining: 141ms
936:	learn: 

In [27]:
best_score = model.best_score_

In [28]:
best_score

{'learn': {'Logloss': 0.2506514839504076}}