# Model Evaluation

In this notebook we will evaluate the performance of our models for the `Telco Customer Churn` dataset. We will compare Logistic Regression, Logistic Regression with Tuned Hyperparameter C, Decision Tree, and Decision Tree with Tuned Hyperparameters.

By the end of this notebook, we will identify the strongest model for predicting customer churn for Telco.

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_validate, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.metrics import log_loss, roc_curve, roc_auc_score
import pickle
import warnings
warnings.filterwarnings('ignore')
plt.style.use('ggplot')
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:,.2f}'.format)

df = pd.read_csv('../data/encoded_telco_churn.csv')
df

Unnamed: 0,Male,Partner,Dependents,SeniorCitizen,DurationMonths,PhoneService,MultipleLines,NoInternet,DSLInternet,FiberOpticInternet,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,MonthlyContract,AnnualContract,BiannualContract,AutoBankTransfer,AutoCreditCard,ElectronicCheck,MailedCheck,MonthlyCharges,Churn
0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,29.85,0
1,1,0,0,0,34,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,1,56.95,0
2,1,0,0,0,2,1,0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,53.85,1
3,1,0,0,0,45,0,0,0,1,0,1,0,1,1,0,0,0,1,0,1,0,0,0,42.30,0
4,0,0,0,0,2,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,70.70,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,1,1,1,0,24,1,1,0,1,0,1,0,1,1,1,1,0,1,0,0,0,0,1,84.80,0
7039,0,1,1,0,72,1,1,0,0,1,0,1,1,0,1,1,0,1,0,0,1,0,0,103.20,0
7040,0,1,1,0,11,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,1,0,29.60,0
7041,1,1,0,1,4,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,74.40,1


In [10]:
model_filename = '../data/logreg_base.pkl'
with open(model_filename, 'rb') as file:
    logreg_base = pickle.load(file)

model_filename = '../data/logreg_best.pkl'
with open(model_filename, 'rb') as file:
    logreg_best = pickle.load(file)

model_filename = '../data/dtree_base.pkl'
with open(model_filename, 'rb') as file:
    dtree_base = pickle.load(file)

model_filename = '../data/dtree_best.pkl'
with open(model_filename, 'rb') as file:
    dtree_best = pickle.load(file)

In [11]:
X = df.drop('Churn', axis=1)
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

logreg_base = LogisticRegression()
logreg_base.fit(X_train, y_train)
logreg_ypred = logreg_base.predict(X_test)
logreg_ypred_proba = logreg_base.predict_proba(X_test)[:, 1]

logreg_best = LogisticRegression(C=1e-2)
logreg_best.fit(X_train, y_train)
logbest_ypred = logreg_best.predict(X_test)
logbest_ypred_proba = logreg_best.predict_proba(X_test)[:, 1]

dtree_base = DecisionTreeClassifier()
dtree_base.fit(X_train, y_train)
dtree_base_ypred = dtree_base.predict(X_test)

dtree_best = DecisionTreeClassifier(max_depth=10, min_samples_split=100, min_samples_leaf=75, criterion='entropy')
dtree_best.fit(X_train, y_train)
dtree_best_ypred = dtree_best.predict(X_test)

In [12]:
try:
    print('Script Executed Successfully!')
except:
    print('Error')

Script Executed Successfully!
