# Building Immo Classifier Model with PyCaret

In [2]:
import pandas as pd
import numpy as np

In [3]:
immo_df = pd.read_csv('./last_load.csv')

In [4]:
immo_df.head()

Unnamed: 0.1,Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,0,1,0,0,0,0,5849,0.0,146.412162,360.0,1.0,2,1
1,1,1,1,1,0,0,4583,1508.0,128.0,360.0,1.0,0,0
2,2,1,1,0,0,1,3000,0.0,66.0,360.0,1.0,2,1
3,3,1,1,0,1,0,2583,2358.0,120.0,360.0,1.0,2,1
4,4,1,0,0,0,0,6000,0.0,141.0,360.0,1.0,2,1


In [5]:
immo_df.Dependents

0      0
1      1
2      0
3      0
4      0
      ..
609    0
610    3
611    1
612    2
613    0
Name: Dependents, Length: 614, dtype: int64

In [6]:
from pycaret.classification import *

In [7]:
exp_clf101 = setup(data = immo_df, target = 'Loan_Status', session_id=123)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Loan_Status
2,Target Type,Binary
3,Label Encoded,"0: 0, 1: 1"
4,Original Data,"(614, 13)"
5,Missing Values,False
6,Numeric Features,5
7,Categorical Features,7
8,Ordinal Features,False
9,High Cardinality Features,False


In [8]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
ridge,Ridge Classifier,0.8253,0.0,0.9867,0.8097,0.8885,0.503,0.5663,0.006
lda,Linear Discriminant Analysis,0.8253,0.7319,0.9867,0.8097,0.8885,0.503,0.5663,0.006
lr,Logistic Regression,0.8183,0.7323,0.9833,0.805,0.8841,0.4823,0.5487,0.44
rf,Random Forest Classifier,0.8044,0.7473,0.9534,0.8064,0.8731,0.4602,0.4909,0.043
nb,Naive Bayes,0.802,0.7228,0.9501,0.8064,0.871,0.4552,0.4971,0.005
gbc,Gradient Boosting Classifier,0.7904,0.7192,0.9303,0.805,0.8611,0.4369,0.4703,0.018
lightgbm,Light Gradient Boosting Machine,0.7859,0.7528,0.9037,0.8135,0.8552,0.4458,0.4588,0.099
ada,Ada Boost Classifier,0.7856,0.6955,0.9334,0.7984,0.8594,0.4171,0.4481,0.019
et,Extra Trees Classifier,0.7694,0.6846,0.9035,0.7974,0.8465,0.3903,0.4033,0.039
dt,Decision Tree Classifier,0.6994,0.6455,0.7808,0.79,0.7829,0.2886,0.2939,0.006


## Compare Models with Tuned Setup

In [9]:
exp_clf102 = setup(data = immo_df, target = 'Loan_Status', session_id=123,
                  normalize = True, 
                  transformation = True)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Loan_Status
2,Target Type,Binary
3,Label Encoded,"0: 0, 1: 1"
4,Original Data,"(614, 13)"
5,Missing Values,False
6,Numeric Features,5
7,Categorical Features,7
8,Ordinal Features,False
9,High Cardinality Features,False


In [10]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.8253,0.7379,0.9867,0.8097,0.8885,0.503,0.5663,0.007
ridge,Ridge Classifier,0.8253,0.0,0.9867,0.8097,0.8885,0.503,0.5663,0.005
lda,Linear Discriminant Analysis,0.8253,0.7369,0.9867,0.8097,0.8885,0.503,0.5663,0.006
nb,Naive Bayes,0.816,0.728,0.97,0.8091,0.8812,0.4862,0.5379,0.006
rf,Random Forest Classifier,0.809,0.7545,0.9601,0.8078,0.8764,0.4697,0.5068,0.047
lightgbm,Light Gradient Boosting Machine,0.7858,0.7565,0.9103,0.8101,0.8555,0.4411,0.4622,0.054
ada,Ada Boost Classifier,0.7856,0.6893,0.9334,0.7983,0.8596,0.4165,0.4444,0.022
gbc,Gradient Boosting Classifier,0.781,0.7156,0.9237,0.7988,0.8549,0.4117,0.4425,0.019
et,Extra Trees Classifier,0.7764,0.6895,0.9103,0.8011,0.8515,0.4064,0.4207,0.045
svm,SVM - Linear Kernel,0.7625,0.0,0.8767,0.801,0.8349,0.4057,0.4289,0.006


## Build Gradient Boost Classifier

In [11]:
log_regr_model = create_model('lr')

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.907,0.8683,1.0,0.8857,0.9394,0.7425,0.7684
1,0.814,0.8385,1.0,0.7895,0.8824,0.4658,0.551
2,0.7674,0.7154,0.9667,0.7632,0.8529,0.3323,0.3931
3,0.8372,0.7846,1.0,0.8108,0.8955,0.5446,0.6117
4,0.814,0.6615,1.0,0.7895,0.8824,0.4658,0.551
5,0.7907,0.6821,0.9,0.8182,0.8571,0.4691,0.4767
6,0.7907,0.5667,1.0,0.7692,0.8696,0.3828,0.4865
7,0.907,0.8667,1.0,0.8824,0.9375,0.7584,0.7816
8,0.7442,0.559,1.0,0.7317,0.8451,0.2024,0.3355
9,0.881,0.8361,1.0,0.8571,0.9231,0.6667,0.7071


## Evaluate Model

In [12]:
evaluate_model(log_regr_model)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

## Predict Test Set 

In [14]:
predict_model(log_regr_model)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.773,0.786,0.9752,0.7516,0.8489,0.4217,0.4856


Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Gender_0,Married_1,Dependents_0,Dependents_1,Dependents_2,Dependents_3,Education_1,Self_Employed_1,Credit_History_1.0,Property_Area_0,Property_Area_1,Property_Area_2,Loan_Status,Label,Score
0,-1.507254,-0.993926,-1.595348,0.149303,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0,1,0.7532
1,-0.977890,0.636508,-0.359564,0.149303,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1,1,0.8474
2,2.484583,-0.993926,0.422083,0.149303,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,0.7600
3,0.361840,0.715083,0.362563,0.149303,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1,1,0.7344
4,0.189173,0.620898,0.534488,0.149303,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1,0,0.8793
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180,-0.838477,0.301392,-0.705689,0.149303,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1,1,0.6858
181,1.480796,-0.993926,0.862678,0.149303,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1,1,0.7008
182,-0.848405,1.451068,-0.109622,0.149303,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1,1,0.8269
183,-1.072210,1.215178,0.688949,0.149303,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1,1,0.8791


## Save Model

In [16]:
save_model(log_regr_model, model_name = 'Logis_Reg_model')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=['Unnamed: 0'],
                                       ml_usecase='classification',
                                       numerical_features=[],
                                       target='Loan_Status', time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=No...
                 ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
                 ('dfs', 'passthrough'), ('pca', 'passthrough'),
                 ['trained_model',
                  LogisticRegression(C=1.0, class_weight=None, dual=False,
                                     fit_intercept