In [8]:
# import packages
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from optbinning import BinningProcess, Scorecard
from optbinning.scorecard import plot_auc_roc, plot_ks

In [9]:
# read in data
ifood = pd.read_csv('ifood_df.csv')

In [10]:
# drop highly correlated variables
ifood_rdu = ifood.drop(['Z_CostContact', 'Z_Revenue', 'MntTotal', 'MntRegularProds', 'AcceptedCmpOverall'], axis=1)

In [11]:
# convert target variable to binary
ifood_rdu['Response'] = ifood_rdu['Response'].astype('category')

# list all categorical predictor variables
ifood_cat = ['AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5',
             'education_2n Cycle', 'education_Basic', 'education_Graduation', 'education_Master', 'education_PhD',
             'marital_Divorced', 'marital_Married', 'marital_Single', 'marital_Together', 'marital_Widow',
             'Kidhome', 'Teenhome', 'Complain'
            ]

# convert all variables in list to categorical
ifood_rdu[ifood_cat] = ifood_rdu[ifood_cat].apply(pd.Categorical)

In [12]:
# split data into response and predictor variables
X = ifood_rdu.loc[:, ifood_rdu.columns != 'Response']
y = ifood_rdu['Response']

# 70-30 train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)

In [None]:
# bin continuous variables
colnames = list(X_train.columns)

# variable selection (IV >= 0.05)
selection_criteria = {"iv": {"min": 0.05, "max": 1}}

# binning criteria
bin_proc = BinningProcess(colnames, selection_criteria = selection_criteria, categorical_variables = ifood_cat)

# fit the model
iv_all = bin_proc.fit(X_train, y_train).summary()

# sort variables by IV and print results
iv_all[iv_all.columns[0:6]].sort_values(by = ["iv"], ascending = False)

Unnamed: 0,name,dtype,status,selected,n_bins,iv
4,MntWines,numerical,OPTIMAL,True,8,0.576877
6,MntMeatProducts,numerical,OPTIMAL,True,8,0.570438
12,NumCatalogPurchases,numerical,OPTIMAL,True,6,0.564378
17,AcceptedCmp5,categorical,OPTIMAL,True,2,0.542956
9,MntGoldProds,numerical,OPTIMAL,True,9,0.489834
0,Income,numerical,OPTIMAL,True,8,0.447777
18,AcceptedCmp1,categorical,OPTIMAL,True,2,0.438378
22,Customer_Days,numerical,OPTIMAL,True,9,0.418622
3,Recency,numerical,OPTIMAL,True,8,0.374457
15,AcceptedCmp3,categorical,OPTIMAL,True,2,0.353404
