In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [28]:
df = pd.read_csv('clean_data.csv',header=0)
df.head()

Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Loan_Status
0,1,0,1,0,5849,0.0,128.0,360.0,1.0,1
1,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0
2,1,1,1,1,3000,0.0,66.0,360.0,1.0,1
3,1,1,0,0,2583,2358.0,120.0,360.0,1.0,1
4,1,0,1,0,6000,0.0,141.0,360.0,1.0,1


In [29]:
X = df.drop('Loan_Status',axis=1)
y = df['Loan_Status']
y.value_counts()

Loan_Status
1    422
0    192
Name: count, dtype: int64

In [30]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [31]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier



In [32]:
models = {
    'LogisticRegression' : LogisticRegression(),
    'SVC':SVC(),
    'KNN':LogisticRegression(),
    'DecisionTreeClassifier':DecisionTreeClassifier(),
    'RandomForestClassifier':RandomForestClassifier(),
    'AdaBoostClassifier':AdaBoostClassifier(),
    'KNeighborsClassifier':KNeighborsClassifier(),
    'GradientBoostingRegressor':GradientBoostingClassifier()
}

In [33]:
from sklearn.metrics import accuracy_score,precision_score,recall_score

In [34]:
for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train,y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    print('train data of ',list(models.keys())[i])
    print('accuracy score : ',accuracy_score(y_train,y_pred_train))
    print('precision score : ',precision_score(y_train,y_pred_train))
    print('recall score : ',recall_score(y_train,y_pred_train))
    print('------------------------------------------------------')
    print('test data of ',list(models.keys())[i])
    print('accuracy score : ',accuracy_score(y_test,y_pred_test))
    print('precision score : ',precision_score(y_test,y_pred_test))
    print('recall score : ',recall_score(y_test,y_pred_test))
    print('='*35)
    

train data of  LogisticRegression
accuracy score :  0.8195652173913044
precision score :  0.8040712468193384
recall score :  0.9813664596273292
------------------------------------------------------
test data of  LogisticRegression
accuracy score :  0.7727272727272727
precision score :  0.7480916030534351
recall score :  0.98
train data of  SVC
accuracy score :  0.7043478260869566
precision score :  0.7030567685589519
recall score :  1.0
------------------------------------------------------
test data of  SVC
accuracy score :  0.6493506493506493
precision score :  0.6493506493506493
recall score :  1.0
train data of  KNN
accuracy score :  0.8195652173913044
precision score :  0.8040712468193384
recall score :  0.9813664596273292
------------------------------------------------------
test data of  KNN
accuracy score :  0.7727272727272727
precision score :  0.7480916030534351
recall score :  0.98
train data of  DecisionTreeClassifier
accuracy score :  1.0
precision score :  1.0
recall sc

In [35]:
from sklearn.model_selection import RandomizedSearchCV

In [36]:
model_ran = AdaBoostClassifier()

param = {
    "n_estimators":[60,100,90],
    "algorithm":['SAMME','SAMME.R'],
}



In [37]:
random_model = RandomizedSearchCV(estimator=model_ran,param_distributions=param,cv=5,n_jobs=-1)
random_model.fit(X_train,y_train)
y_pred_ran = random_model.predict(X_test)

In [38]:
recall_score(y_test,y_pred_ran)

0.98

In [39]:
random_model.best_params_

{'n_estimators': 60, 'algorithm': 'SAMME'}

In [40]:
final_model = AdaBoostClassifier(n_estimators=60,algorithm='SAMME')
final_model.fit(X_train,y_train)

0,1,2
,estimator,
,n_estimators,60
,learning_rate,1.0
,algorithm,'SAMME'
,random_state,


In [41]:
y_pred_true = final_model.predict(X_test)

In [42]:
recall_score(y_test,y_pred_true)

0.98

In [43]:
X.head()

Unnamed: 0,Gender,Married,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
0,1,0,1,0,5849,0.0,128.0,360.0,1.0
1,1,1,1,0,4583,1508.0,128.0,360.0,1.0
2,1,1,1,1,3000,0.0,66.0,360.0,1.0
3,1,1,0,0,2583,2358.0,120.0,360.0,1.0
4,1,0,1,0,6000,0.0,141.0,360.0,1.0


In [44]:
Gender = 1
Married = 0
Education = 1
Self_Employed = 0
ApplicantIncome = 20000
CoapplicantIncome = 0.0
LoanAmount = 11111.0
Loan_Amount_Term = 360.0
Credit_History = 1.0



In [45]:
final_model.predict(np.array([Gender, Married, Education, Self_Employed, ApplicantIncome, CoapplicantIncome, LoanAmount, Loan_Amount_Term, Credit_History]).reshape(1,-1))

array([1])

In [None]:
final_model.predict_proba(np.array([Gender, Married, Education, Self_Employed, ApplicantIncome, CoapplicantIncome, LoanAmount, Loan_Amount_Term, Credit_History]).reshape(1,-1))

array([[0.40383837, 0.59616163]])