In [56]:
import requests
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [57]:
api_url = "https://debt-api-4301881a2ff8.herokuapp.com/loan/get_all"
res = requests.get(api_url)
df = pd.DataFrame(res.json()["data"])

In [58]:
df.head()

Unnamed: 0,loan_id,gender,married,dependents,education,self_employed,applicantIncome,coapplicant_income,loan_amount,loan_amount_term,credit_history,property_area,loan_status
0,LP001002,Male,No,0,Graduate,No,5849,0,12000,12.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508,12800,12.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0,6600,12.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358,12000,12.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0,14100,12.0,1.0,Urban,Y


In [59]:
df = df.drop("loan_id", axis=1)

In [60]:
df.head()

Unnamed: 0,gender,married,dependents,education,self_employed,applicantIncome,coapplicant_income,loan_amount,loan_amount_term,credit_history,property_area,loan_status
0,Male,No,0,Graduate,No,5849,0,12000,12.0,1.0,Urban,Y
1,Male,Yes,1,Graduate,No,4583,1508,12800,12.0,1.0,Rural,N
2,Male,Yes,0,Graduate,Yes,3000,0,6600,12.0,1.0,Urban,Y
3,Male,Yes,0,Not Graduate,No,2583,2358,12000,12.0,1.0,Urban,Y
4,Male,No,0,Graduate,No,6000,0,14100,12.0,1.0,Urban,Y


In [61]:
df.isnull().sum()

gender                13
married                3
dependents            15
education              0
self_employed          2
applicantIncome        0
coapplicant_income     0
loan_amount            0
loan_amount_term       0
credit_history        50
property_area          0
loan_status            0
dtype: int64

In [62]:
#handle numerical missing data
df['loan_amount'] = df['loan_amount'].fillna(df['loan_amount'].mean())
df['loan_amount_term'] = df['loan_amount_term'].fillna(df['loan_amount_term'].mean())
df['credit_history'] = df['credit_history'].fillna(df['credit_history'].mean())


In [63]:
df.isnull().sum()

gender                13
married                3
dependents            15
education              0
self_employed          2
applicantIncome        0
coapplicant_income     0
loan_amount            0
loan_amount_term       0
credit_history         0
property_area          0
loan_status            0
dtype: int64

In [64]:
#handle categorial missing value

df['gender']=df['gender'].fillna(df['gender'].mode()[0])
df['married']=df['married'].fillna(df['married'].mode()[0])
df['dependents']=df['dependents'].fillna(df['dependents'].mode()[0])
df['self_employed']=df['self_employed'].fillna(df['self_employed'].mode()[0])


In [65]:
df.isnull().sum()

gender                0
married               0
dependents            0
education             0
self_employed         0
applicantIncome       0
coapplicant_income    0
loan_amount           0
loan_amount_term      0
credit_history        0
property_area         0
loan_status           0
dtype: int64

In [66]:
from sklearn.preprocessing import LabelEncoder

In [67]:
le_gender = LabelEncoder()
df['gender'] = le_gender.fit_transform(df['gender'])

le_married = LabelEncoder()
df['married'] = le_married.fit_transform(df['married'])

le_education = LabelEncoder()
df['education'] = le_education.fit_transform(df['education'])

le_self_employed = LabelEncoder()
df['self_employed'] = le_self_employed.fit_transform(df['self_employed'])

le_property_area = LabelEncoder()
df['property_area'] = le_property_area.fit_transform(df['property_area'])

le_loan_status = LabelEncoder()
df['loan_status'] = le_loan_status.fit_transform(df['loan_status'])


In [68]:
df.head()

Unnamed: 0,gender,married,dependents,education,self_employed,applicantIncome,coapplicant_income,loan_amount,loan_amount_term,credit_history,property_area,loan_status
0,1,0,0,0,0,5849,0,12000,12.0,1.0,2,1
1,1,1,1,0,0,4583,1508,12800,12.0,1.0,0,0
2,1,1,0,0,1,3000,0,6600,12.0,1.0,2,1
3,1,1,0,1,0,2583,2358,12000,12.0,1.0,2,1
4,1,0,0,0,0,6000,0,14100,12.0,1.0,2,1


In [69]:
x = df.drop('loan_status',axis=1)
y = df['loan_status']

In [70]:
x

Unnamed: 0,gender,married,dependents,education,self_employed,applicantIncome,coapplicant_income,loan_amount,loan_amount_term,credit_history,property_area
0,1,0,0,0,0,5849,0,12000,12.0,1.0,2
1,1,1,1,0,0,4583,1508,12800,12.0,1.0,0
2,1,1,0,0,1,3000,0,6600,12.0,1.0,2
3,1,1,0,1,0,2583,2358,12000,12.0,1.0,2
4,1,0,0,0,0,6000,0,14100,12.0,1.0,2
...,...,...,...,...,...,...,...,...,...,...,...
609,0,0,0,0,0,2900,0,7100,12.0,1.0,0
610,1,1,3,0,0,4106,0,4000,6.0,1.0,0
611,1,1,1,0,0,8072,240,25300,12.0,1.0,2
612,1,1,2,0,0,7583,0,18700,12.0,1.0,2


In [71]:
y

0      1
1      0
2      1
3      1
4      1
      ..
609    1
610    1
611    1
612    1
613    0
Name: loan_status, Length: 614, dtype: int64

In [72]:
cols = ['gender','married','dependents','education','self_employed','applicantIncome','coapplicant_income','loan_amount','loan_amount_term']

In [73]:
from sklearn.preprocessing import StandardScaler
st = StandardScaler()
x[cols]=st.fit_transform(x[cols])

In [74]:
x

Unnamed: 0,gender,married,dependents,education,self_employed,applicantIncome,coapplicant_income,loan_amount,loan_amount_term,credit_history,property_area
0,0.472343,-1.372089,-0.735646,-0.528362,-0.403582,0.072991,-0.554487,-0.282557,0.279683,1.0,2
1,0.472343,0.728816,0.259258,-0.528362,-0.403582,-0.134412,-0.038732,-0.188562,0.279683,1.0,0
2,0.472343,0.728816,-0.735646,-0.528362,2.477808,-0.393747,-0.554487,-0.917017,0.279683,1.0,2
3,0.472343,0.728816,-0.735646,1.892641,-0.403582,-0.462062,0.251980,-0.282557,0.279683,1.0,2
4,0.472343,-1.372089,-0.735646,-0.528362,-0.403582,0.097728,-0.554487,-0.035822,0.279683,1.0,2
...,...,...,...,...,...,...,...,...,...,...,...
609,-2.117107,-1.372089,-0.735646,-0.528362,-0.403582,-0.410130,-0.554487,-0.858271,0.279683,1.0,0
610,0.472343,0.728816,2.249066,-0.528362,-0.403582,-0.212557,-0.554487,-1.222498,-2.499049,1.0,0
611,0.472343,0.728816,0.259258,-0.528362,-0.403582,0.437174,-0.472404,1.280097,0.279683,1.0,2
612,0.472343,0.728816,1.254162,-0.528362,-0.403582,0.357064,-0.554487,0.504645,0.279683,1.0,2


In [75]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.10, random_state=8)

In [76]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import metrics
from sklearn.feature_selection import SelectFromModel
model = GradientBoostingClassifier(n_estimators=21,random_state=0)
model.fit(x_train,y_train)
y_predict_GradientBoosting = model.predict(x_test)
# print(y_predict_DecisionTree)
accuracy_Score_GradientBoosting = metrics.accuracy_score(y_test, y_predict_GradientBoosting)
print("*****************")
print("GradientBoostingClassifier accuracy :", accuracy_Score_GradientBoosting)
# classification_report
print()
print("classification_report")
print(classification_report(y_test, y_predict_GradientBoosting))

sel = SelectFromModel(model)
sel.fit(x, y)
selected_features = sel.transform(x)
print(sel.get_support())
print(df.shape)
print(selected_features.shape)

*****************
GradientBoostingClassifier accuracy : 0.9193548387096774

classification_report
              precision    recall  f1-score   support

           0       1.00      0.67      0.80        15
           1       0.90      1.00      0.95        47

    accuracy                           0.92        62
   macro avg       0.95      0.83      0.87        62
weighted avg       0.93      0.92      0.91        62

[False False False False False False False False False  True False]
(614, 12)
(614, 1)


In [84]:
test_input = pd.DataFrame({
    'gender':'Male',
    'married':'Yes',
    'dependents':1,
    'education':'Graduate',
    'self_employed':'No',
    'applicantIncome':4583,
    'coapplicant_income':1508,
    'loan_amount':12800,
    'loan_amount_term':12,
    'credit_history':1,
    'property_area': 'Urban'
},index=[0])

test_input['gender']= le_gender.fit_transform(test_input['gender'])
test_input['married']= le_married.fit_transform(test_input['married'])
test_input['education']= le_education.fit_transform(test_input['education'])
test_input['self_employed']= le_self_employed.fit_transform(test_input['self_employed'])
test_input['property_area']= le_property_area.fit_transform(test_input['property_area'])

In [85]:
Prediction = model.predict(test_input)

In [86]:
le_loan_status.inverse_transform(Prediction)

array(['N'], dtype=object)

In [87]:
import pickle

pickle.dump(model, open('GradientBoostingClassifier.pkl' , 'wb'))

In [88]:
loaded_model = pickle.load(open('GradientBoostingClassifier.pkl' , 'rb'))

In [89]:
result = loaded_model.predict(test_input)
result

array([0])

In [90]:
le_loan_status.inverse_transform([result])

array(['N'], dtype=object)