# Classification Template

# Variables

In [27]:
# dataset info
dataset_name = 'Bank_Personal_Loan_Modelling_transformed.xlsx'
dataset_path = '../../dataset/' + dataset_name
dataset_format = 'xlsx'

# target column for dataset
target_col = 'Personal Loan'

# where to save the model?
model_store_location = '../../store/model/'

## Installing Pycaret

In [11]:
# !pip install pycaret --user
# !pip install pycaret-nightly --user

## Import Libraries

In [12]:
import pandas as pd
from pycaret.classification import *

## Import Dataset

In [13]:
## use code as per the type of data source
if dataset_format == 'csv':
    df = pd.read_csv(dataset_path)
else:
    df = pd.read_excel(dataset_path, index_col=0)

In [14]:
df.head()

Unnamed: 0_level_0,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Securities Account,CD Account,Online,CreditCard,Personal Loan
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,-1.0,-0.95,-0.254237,-0.863923,1.0,0.055556,-0.5,0.0,1,0,-1,0,0
2,0.0,-0.05,-0.508475,-1.241379,0.5,0.0,-0.5,0.0,1,0,-1,0,0
3,-0.3,-0.25,-0.898305,0.475714,-0.5,-0.277778,-0.5,0.0,0,0,-1,0,0
4,-0.5,-0.55,0.610169,0.250278,-0.5,0.666667,0.0,0.0,0,0,-1,0,0
5,-0.5,-0.6,-0.322034,-0.781238,1.0,-0.277778,0.0,0.0,0,0,-1,1,0


## Data Setup
* See [here](https://github.com/pycaret/pycaret/blob/master/tutorials/Binary%20Classification%20Tutorial%20Level%20Beginner%20-%20%20CLF101.ipynb) for notebook example(basic level)
* See [here](https://pycaret.org/classification/) for classification documentation.

In [18]:
data=setup(df,target=target_col, categorical_features=['Family', 'Education'], 
           train_size = 0.8, fold=5)

Unnamed: 0,Description,Value
0,session_id,1204
1,Target,Personal Loan
2,Target Type,Binary
3,Label Encoded,"0: 0, 1: 1"
4,Original Data,"(5000, 13)"
5,Missing Values,False
6,Numeric Features,6
7,Categorical Features,6
8,Ordinal Features,False
9,High Cardinality Features,False


## Comparing models and selecting top 3

In [19]:
#Selecting top3 models for tuning
top3_models=compare_models(n_select=3, fold=5)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.989,0.9973,0.9008,0.9797,0.9383,0.9323,0.9335,0.132
gbc,Gradient Boosting Classifier,0.9887,0.9979,0.917,0.9608,0.9383,0.9322,0.9325,0.238
xgboost,Extreme Gradient Boosting,0.988,0.9966,0.9089,0.9601,0.9336,0.927,0.9275,0.35
rf,Random Forest Classifier,0.9875,0.9964,0.8794,0.9849,0.9289,0.9221,0.924,0.452
et,Extra Trees Classifier,0.9858,0.9908,0.858,0.9879,0.9177,0.91,0.9131,0.414
dt,Decision Tree Classifier,0.984,0.9503,0.9089,0.9195,0.9133,0.9045,0.9051,0.02
ada,Ada Boost Classifier,0.9708,0.9817,0.7721,0.9005,0.8307,0.8148,0.8181,0.116
svm,SVM - Linear Kernel,0.965,0.0,0.7214,0.8852,0.7925,0.7737,0.7799,0.022
lr,Logistic Regression,0.9637,0.9682,0.692,0.8964,0.7787,0.7594,0.7682,0.876
knn,K Neighbors Classifier,0.9595,0.9337,0.5711,0.9908,0.7217,0.7018,0.7343,0.134


In [21]:
top3_models

[LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
                importance_type='split', learning_rate=0.1, max_depth=-1,
                min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
                n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
                random_state=1204, reg_alpha=0.0, reg_lambda=0.0, silent=True,
                subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
 GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                            learning_rate=0.1, loss='deviance', max_depth=3,
                            max_features=None, max_leaf_nodes=None,
                            min_impurity_decrease=0.0, min_impurity_split=None,
                            min_samples_leaf=1, min_samples_split=2,
                            min_weight_fraction_leaf=0.0, n_estimators=100,
                            n_iter_no_change=None, presort='deprecated',
           

In [22]:
# next step is to tune the top3 models separately in the hyperparmeter notebook
# then take the tuned model to the last step where we will try the ensembling technique

Uncomment below to see the model's performance visually

In [23]:
# plot_model(top3_models[0])

In [24]:
# plot_model(top3_models[1])

In [25]:
# plot_model(top3_models[2])

Uncomment below for model evaluation, different plot can be used

In [24]:
#evaluate_model(top3_models[0])

In [22]:
# evaluate_model(top3_models[1])

In [23]:
# evaluate_model(top3_models[2])

## Saving Top 3 Final models

In [26]:
for model in top3_models:
    model_name = model.__class__.__name__
    save_model(model, model_store_location +'/' + model_name)
    print('{0} model saved!'.format(model_name))

Transformation Pipeline and Model Succesfully Saved
LGBMClassifier model saved!
Transformation Pipeline and Model Succesfully Saved
GradientBoostingClassifier model saved!
Transformation Pipeline and Model Succesfully Saved
XGBClassifier model saved!
