In [1]:
import pandas as pd
from pandas.api.types import is_numeric_dtype

import py_iv_woe as piwo

## Load data

The dependent variable has to be called 'label'.

In [2]:
url = 'https://raw.githubusercontent.com/IBM/telco-customer-churn-on-icp4d/master/data/Telco-Customer-Churn.csv'
df = pd.read_csv(url, na_values=[' '])

In [3]:
df.columns = [c[0].lower() + c[1:] for c in df.columns]
df['label'] = df['churn'].map({'Yes': 0, 'No': 1})
df['seniorCitizen'] = df['seniorCitizen'].map({1: 'Yes', 0: 'No'})
df.drop(['customerID', 'churn'], axis=1, inplace=True)

In [4]:
df.sample(5)

Unnamed: 0,gender,seniorCitizen,partner,dependents,tenure,phoneService,multipleLines,internetService,onlineSecurity,onlineBackup,deviceProtection,techSupport,streamingTV,streamingMovies,contract,paperlessBilling,paymentMethod,monthlyCharges,totalCharges,label
497,Female,No,Yes,No,70,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,Bank transfer (automatic),19.8,1397.65,1
817,Female,No,Yes,Yes,55,Yes,No,Fiber optic,No,Yes,Yes,Yes,Yes,Yes,One year,Yes,Bank transfer (automatic),103.7,5656.75,1
5318,Female,No,Yes,Yes,32,Yes,No,DSL,Yes,Yes,Yes,Yes,No,Yes,Two year,No,Mailed check,73.6,2316.85,1
5926,Male,No,No,No,6,Yes,No,DSL,Yes,No,Yes,Yes,Yes,Yes,Month-to-month,No,Credit card (automatic),78.65,483.3,1
3997,Female,No,Yes,No,69,Yes,No,DSL,No,Yes,No,No,No,Yes,Two year,Yes,Credit card (automatic),61.45,4131.2,1


## Create Feature objects

In [5]:
feats_dict = {}

for col in [c for c in df.columns if c != 'label']:
    if is_numeric_dtype(df[col]):
        feats_dict[col] = piwo.ContinuousFeature(df, col)
    else:
        feats_dict[col] = piwo.CategoricalFeature(df, col)

feats = list(feats_dict.values())

## Calculate Information Value

In [6]:
# create necessary objects
iv = piwo.IV()
s = piwo.StatsSignificance()
ar = piwo.AttributeRelevance()

In [7]:
ar.analyze(feats, iv, s, interpretation=True)

Unnamed: 0,iv,p-value,effect_size,iv_interpretation,es_interpretation
contract,1.23856,5.863038e-258,0.410116,suspicious,strong
tenure,0.777926,7.4135150000000005e-205,0.369846,suspicious,medium
onlineSecurity,0.717777,2.6611499999999997e-185,0.3474,suspicious,medium
techSupport,0.699567,1.443084e-180,0.342916,suspicious,medium
internetService,0.617953,9.571788e-160,0.322455,suspicious,medium
onlineBackup,0.528634,2.079759e-131,0.292316,suspicious,medium
deviceProtection,0.499725,5.505219e-122,0.28158,strong,medium
paymentMethod,0.457109,3.682355e-140,0.303359,strong,medium
streamingMovies,0.381374,2.6677570000000003e-82,0.230951,strong,medium
streamingTV,0.380462,5.528994e-82,0.230502,strong,medium
