# Predicting customer churn - Modeling

## Setup

### Common imports

In [45]:
# Data processing libraries
import pandas as pd

# Data modeling library
from sklearn.tree import DecisionTreeClassifier

### Data loading

In [46]:
df = pd.read_csv('data/customer-churn.csv')
df.head()

Unnamed: 0,COLLEGE,INCOME,OVERAGE,LEFTOVER,HOUSE,HANDSET_PRICE,OVER_15MINS_CALLS_PER_MONTH,AVERAGE_CALL_DURATION,REPORTED_SATISFACTION,REPORTED_USAGE_LEVEL,CONSIDERING_CHANGE_OF_PLAN,LEAVE
0,0,31953,0,6,313378,161,0,4,3,3,1,1
1,1,36147,0,13,800586,244,0,6,3,3,2,1
2,1,27273,230,0,305049,201,16,15,3,4,3,1
3,0,120070,38,33,788235,780,3,2,3,0,2,0
4,1,29215,208,85,224784,241,21,1,4,3,0,1


In [47]:
df.shape

(20000, 12)

In [48]:
df.columns

Index(['COLLEGE', 'INCOME', 'OVERAGE', 'LEFTOVER', 'HOUSE', 'HANDSET_PRICE',
       'OVER_15MINS_CALLS_PER_MONTH', 'AVERAGE_CALL_DURATION',
       'REPORTED_SATISFACTION', 'REPORTED_USAGE_LEVEL',
       'CONSIDERING_CHANGE_OF_PLAN', 'LEAVE'],
      dtype='object')

## Data Modeling

In [49]:
training = df.loc[:, df.columns != 'LEAVE']
labels = df['LEAVE']

### Evaluating performance

We can compute the ratio between correct and wrong predictions as a simple measure of performance to compare different models.

In [50]:
def evaluate_performance(model, training, labels):
    labels_pred = model.predict(training)
    n_correct = sum(labels_pred == labels)    
    print(
        f"Model performance: {((n_correct / len(labels_pred)) * 100):.2f}%")

## Decision tree with depth = 10

In [51]:
tree_depth_10 = DecisionTreeClassifier(
    max_depth = 10)

tree_depth_10.fit(training, labels)

DecisionTreeClassifier(max_depth=10)

In [52]:
evaluate_performance(tree_depth_10, training, labels)

Model performance: 73.93%


## Decision tree with depth = 20

In [53]:
tree_depth_20 = DecisionTreeClassifier(
    max_depth = 20)

tree_depth_20.fit(training, labels)

DecisionTreeClassifier(max_depth=20)

In [54]:
evaluate_performance(tree_depth_20, training, labels)

Model performance: 91.32%


## References

Provost, F., & Fawcett, T. (2013). Data science for business: what you need to know about data mining and data-analytic thinking. Chapter 3.

Géron, A. (2019) Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow: Concepts, Tools, and Techniques to Build Intelligent Systems. Chapter 2.