In [1]:
# Basic libraries
import numpy as np
import pandas as pd

# Visualizations
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from matplotlib import pyplot as plt
from pdpbox import pdp, get_dataset, info_plots
import seaborn as sns
import pickle
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.io as pio
pio.templates.default = "plotly_white"

# Warnings
import warnings
warnings.filterwarnings('ignore')

# Sklearn libraries
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import svm
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.metrics import roc_curve
from sklearn.metrics import recall_score, confusion_matrix, precision_score, f1_score, accuracy_score, classification_report

from sklearn.ensemble import VotingClassifier
from sklearn.metrics import confusion_matrix, accuracy_score 
from sklearn.metrics import f1_score, precision_score, recall_score, fbeta_score
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import KFold
from sklearn import feature_selection
from sklearn import model_selection
from sklearn import metrics
from sklearn.metrics import classification_report, precision_recall_curve
from sklearn.metrics import auc, roc_auc_score, roc_curve
from sklearn.metrics import make_scorer, recall_score, log_loss
from sklearn.metrics import average_precision_score
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import plot_roc_curve


# Model explainability
import shap
import xgboost
from pdpbox import pdp, get_dataset, info_plots
import eli5
from eli5.sklearn import PermutationImportance
#from lime.lime_tabular import LimeTabularExplainer



In [2]:
# Data Read
df = pd.read_csv('/Users/jasonrobinson/Documents/Projects/Customer-Churn/data/telecom_customer_churn.csv')

In [None]:
# Data Preprocessing with Pandas Profiling
import pandas_profiling



In [None]:
df.columns=df.columns.str.replace(" ","").str.lower()

#### Dealing with Nulls

since this dataset is from a telecommunication company and this company has basically two main serivces that are

1- Phone Service </br> 
2- Internet Service<br>

So it isnt necesary that every customer is availing both of the services, and this caused presence of nulls in the dataset

So I have replaced nulls for the features that are related to phone service as "No phone service" for the customer that are just #### using internet services and simlarly for the features that give info related to internet services have been replaced by "No internet #### Service" for the customers that are just using phone services

In [None]:
df.avgmonthlylongdistancecharges=df.avgmonthlylongdistancecharges.fillna(0.0)

In [None]:
df.multiplelines=df.multiplelines.fillna('no phone service')

In [None]:
no_internet=['internettype','onlinesecurity','onlinebackup','deviceprotectionplan','premiumtechsupport','streamingtv',
             'streamingmovies','streamingmusic','unlimiteddata']
df[no_internet]=df[no_internet].fillna('no internet service')

In [None]:
df.avgmonthlygbdownload=df.avgmonthlygbdownload.fillna(0)

Droping features that are of no importance for the my objective, I have dropped geographical features after finding no correlation with other features, and also the columns like churn category and churn reason are out of the scope of machine learning model, they can be useful in exploratory analysis but EDA is not the focus of this notebook

In [None]:
df=df.drop(columns=['customerid','churncategory','churnreason','totalrefunds','zipcode','longitude','latitude','city'])

To avoid the biasness in the predictions i have dropped the customers information that joined the company recently

In [None]:
df=df.loc[~df.customerstatus.str.contains('Join')]
df.reset_index(drop=True,inplace=True)

#### Exploratory Data Analysis

In [None]:
type_ = ["No", "yes"]
fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Pie(labels=type_, values=df['customerstatus'].value_counts(), name="customerstatus"))

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name", textfont_size=16)

fig.update_layout(
    title_text="Churn Distributions",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Churn', x=0.5, y=0.5, font_size=20, showarrow=False)])
fig.show()

In [None]:
df.customerstatus[df.customerstatus == 'Stayed'].groupby(by = df.gender).count()

In [None]:
df.customerstatus[df.customerstatus == 'Churned'].groupby(by = df.gender).count()

In [None]:
fig = px.histogram(df, x="customerstatus", color = "contract", barmode = "group", title = "<b>Customer contract distribution<b>")
fig.update_layout(width=700, height=500, bargap=0.2)
fig.show()

#### Feature Transformation and Feature Scaling

1- Features having two uniques were replaced by 1 and 0.<br>
2- Features having more than two uniques were encoded using label encoder<br>
3- Continous features were standarized using sk-learn scaler method

In [None]:
#Create a label encoder object
le = LabelEncoder()
# Label Encoding will be used for columns with 2 or less unique 

le_count = 0
for col in df.columns[1:]:
    if df[col].dtype == 'object':
        if len(list(df[col].unique())) <= 2:
            le.fit(df[col])
            df[col] = le.transform(df[col])
            le_count += 1
print('{} columns were label encoded.'.format(le_count))

In [None]:
df['gender'] = [1 if each == 'Female' else 0 for each in df['gender']]

In [None]:
def encode_data(dataframe):
    if dataframe.dtype == "object":
        dataframe = LabelEncoder().fit_transform(dataframe)
    return dataframe

data = df.apply(lambda x: encode_data(x))
data.head()

In [None]:
X = data.drop(columns = "customerstatus")
y = data["customerstatus"].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 4, stratify =y)

In [None]:
col=['totalcharges','avgmonthlylongdistancecharges','monthlycharge','totalrevenue','totallongdistancecharges',
     'tenureinmonths','totallongdistancecharges','totalextradatacharges']

In [None]:
scaler = StandardScaler()
X_train[col] = StandardScaler().fit_transform(X_train[col])
X_test[col] = StandardScaler().fit_transform(X_test[col])

#### Model Building Without Optimization

Since its a binary Classification Problem. So I have tried to build several classification models
At first i have just used the base models and have evaluated them to check how they are pwrforming

In [None]:
models = []
models.append(('Logistic Regression', LogisticRegression()))
models.append(('Kernel SVM', SVC()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('Gaussian NB', GaussianNB()))
models.append(('Random Forest', RandomForestClassifier()))
models.append(('Decision Tree Classifier', DecisionTreeClassifier()))

In [None]:
acc_results =[]
auc_results =[]
pre_results =[]
f1_results =[]
names = []

result_col = ["Algorithm", "ROC AUC", "Accuracy", "Precision", "f1 Score"]
model_results = pd.DataFrame(columns = result_col)

i=0
# K- fold cross validation

for name, model in models:
    names.append(name)
    # kfold = model_selection.KFold(n_splits=10)
    
    cv_acc_results = model_selection.cross_val_score(model, X_train, y_train, 
                     scoring="accuracy")
    cv_auc_results = model_selection.cross_val_score(model, X_train, y_train,
                     scoring="roc_auc")
    cv_pre_results = model_selection.cross_val_score(model, X_train, y_train,
                     scoring="precision")
    cv_f1_results = model_selection.cross_val_score(model, X_train, y_train,
                     scoring="f1")
    acc_results.append(cv_acc_results)
    auc_results.append(cv_auc_results)
    pre_results.append(cv_pre_results)
    f1_results.append(cv_f1_results)
    
    model_results.loc[i] = [name, 
                           round(cv_acc_results.mean()*100,2),
                           round(cv_auc_results.mean()*100,2),
                           round(cv_pre_results.mean()*100,2),
                           round(cv_f1_results.mean()*100,2)]
    i+=1

model_results.sort_values(by = ['ROC AUC'], ascending=False)

In [None]:
modelD = DecisionTreeClassifier()

In [None]:
modelD.fit(X_train, y_train)

In [None]:
pred_train_ID = modelD.predict(X_train)
pred_test_ID = modelD.predict(X_test)

In [None]:
acc_train = accuracy_score(y_train, pred_train_ID)
acc_test = accuracy_score(y_test, pred_test_ID)
print(f'Traning Accuracy: {acc_train}')
print(f'Testing Accuracy: {acc_test}')

In [None]:
tuned_parameters = [{'criterion':['gini', 'entropy'],
                     'max_leaf_nodes': range(5,25)}] # hyperparameters to tune

clf_D = GridSearchCV(DecisionTreeClassifier(), tuned_parameters, 
                   verbose=1, n_jobs=-1) # grid search model
clf_D.fit(X_train, y_train) # evaluate hyper-parameters

print("\nBest parameters found:")
print(clf_D.best_params_) # best hyperparameter balues

print("\nGrid scores:")
means_D = clf_D.cv_results_['mean_test_score'] # mean accuracy with folds
stds_D = clf_D.cv_results_['std_test_score'] # standard deviation of accuracies
# for each hyperparameter combination show mean +/- 2 standard-deviations 
for mean, std, params in zip(means_D, stds_D, clf_D.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" %(mean, std * 2, params)) 

Evaluating Decision Tree classifier with selected hyper-parameters
Based on our 5-fold-cross-validation, we use a model with the following hyper-parameters:

criterion = 'gini'
max_leaf_nodes = 19

In [None]:
criterion = 'gini'
max_leaf_nodes = 19
# Train and test model
good_model_D = DecisionTreeClassifier(criterion=criterion,
                                    max_leaf_nodes=max_leaf_nodes) # create model 
print(good_model_D) # display model parameters
good_model_D.fit(X_train, y_train) # train model
pred_D = good_model_D.predict(X_test) # predicted output for test examples
print("Results on test data")
acc_D = accuracy_score(y_test, pred_D) # accuracy on test examples
prec_D = precision_score(y_test, pred_D) # precision on test examples
reca_D = recall_score(y_test, pred_D) # recall on test examples
print(f'Test accuracy = {acc_D: .4f}') # round to 4 decimal places
print(f'Test precision = {prec_D: .4f}') # round to 4 decimal places
print(f'Test recall = {reca_D: .4f}') # round to 4 decimal places
print("Classification report:")
print(classification_report(y_test, pred_D))
print("Confusion matrix (Rows actual, Columns predicted):")
print(pd.DataFrame(confusion_matrix(y_test, pred_D)))
print('\nROC curve')
plot_roc_curve(good_model_D, X_test, y_test)  # 
plt.show()

#### Random Forset Classifier

In [None]:
modelR = RandomForestClassifier()

In [None]:
modelR.fit(X_train, y_train)

In [None]:
pred_train = modelR.predict(X_train)
pred_test = modelR.predict(X_test)

In [None]:
acc_train = accuracy_score(pred_train, y_train)
acc_test = accuracy_score(pred_test, y_test)
print(f'Training accuracy {acc_train: .3f}') 
print(f'Testing accuracy {acc_test: .3f}')

In [None]:
tuned_parameters = [{'n_estimators': [25, 50, 75],
                     'max_features': [15, 20, 25],
                     'max_leaf_nodes': [8, 16, 24]}] # hyperparameters to tune

clfR = GridSearchCV(RandomForestClassifier(), tuned_parameters, 
                   verbose=1, n_jobs=-1) # grid search model
clfR.fit(X_train, y_train) # evaluate hyper-parameters

print("\nBest parameters found:")
print(clfR.best_params_) # best hyperparameter values

print("\nGrid scores:")
means = clfR.cv_results_['mean_test_score'] # mean accuracy with folds
stds = clfR.cv_results_['std_test_score'] # standard deviation of accuracies
# for each hyperparameter combination show mean +/- 2 standard-deviations 
for mean, std, params in zip(means, stds, clfR.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" %(mean, std * 2, params)) 

In [None]:
5. Evaluating Random forest classifier with selected hyper-parameters
Based on our 5-fold-cross-validation, we use a model with the following hyper-parameters:

max_features = '15'
max_leaf_nodes = 16
n-estimators = 50

In [None]:
# Specify non-default hyper-parameter values
max_features = 15
max_leaf_nodes = 16 
n_estimators = 50
# Train and test model
good_model = RandomForestClassifier(max_leaf_nodes = max_leaf_nodes,
                                    max_features = max_features, 
                                    n_estimators=n_estimators, ) # create model 
print(good_model) # display model parameters
good_model.fit(X_train, y_train) # train model
pred = good_model.predict(X_test) # predicted output for test examples
print("Results on test data")
acc = accuracy_score(y_test, pred) # accuracy on test examples
prec = precision_score(y_test, pred) # precision on test examples
reca = recall_score(y_test, pred) # recall on test examples
print(f'Test accuracy = {acc: .4f}') # round to 4 decimal places
print(f'Test precision = {prec: .4f}') # round to 4 decimal places
print(f'Test recall = {reca: .4f}') # round to 4 decimal places
print("Classification report:")
print(classification_report(y_test, pred))
print("Confusion matrix (Rows actual, Columns predicted):")
print(pd.DataFrame(confusion_matrix(y_test, pred)))
print('\nROC curve')
plot_roc_curve(good_model, X_test, y_test)  # 
plt.show()

#### Skipped some model, but will go back and demonstrate at a later time

In [None]:
# Evaluating on the basis of best Parameters
# Specify non-default hyper-parameter values
C = 1 # algorithm name
kernel = 'linear'# kernel type
gamma =  0.1 # kernel parameter
# Train and test model
good_modelsvm = svm.SVC(C=C, kernel=kernel, 
                                  gamma=gamma) # create model 
print(good_modelsvm) # display model parameters
good_modelsvm.fit(X_train,y_train) # train model
predsvm = good_modelsvm.predict(X_test) # predicted output for test examples
print("Results on test data")
accsvm = accuracy_score(y_test, predsvm) # accuracy on test examples
precsvm = precision_score(y_test, predsvm) # precision on test examples
recasvm = recall_score(y_test, predsvm) # recall on test examples
print(f'Test accuracy = {accsvm: .4f}') # round to 4 decimal places
print(f'Test precision = {precsvm: .4f}') # round to 4 decimal places
print(f'Test recall = {recasvm: .4f}') # round to 4 decimal places
print("Classification report:")
print(classification_report(y_test, predsvm))
print("Confusion matrix (Rows actual, Columns predicted):")
print(pd.DataFrame(confusion_matrix(y_test, predsvm)))
print('\nROC curve')
plot_roc_curve(good_modelsvm, X_test, y_test)  # 
plt.show()


In [None]:
models_opt = []

models_opt.append(('Logistic Regression', LogisticRegression(solver='liblinear',C = 1000, max_iter=1000, random_state = 0)))
models_opt.append(('SVC', SVC(C=1, kernel = 'linear', gamma=1, random_state = 0)))
models_opt.append(('Kernel SVM', SVC(C=1, kernel = 'rbf', gamma=1, random_state = 0)))
models_opt.append(('KNN', KNeighborsClassifier(n_neighbors = 20, metric = 'euclidean', p = 2,algorithm='brute')))
models_opt.append(('Gaussian NB', GaussianNB()))
models_opt.append(('Decision Tree Classifier', DecisionTreeClassifier(criterion = 'gini', max_leaf_nodes=19, random_state = 0)))
models_opt.append(('Random Forest', RandomForestClassifier(max_leaf_nodes = 16,max_features = 15, 
                                                           n_estimators = 50,criterion = 'entropy', random_state = 0)))

In [None]:
acc_results_opt =[]
auc_results_opt =[]
pre_results_opt =[]
f1_results_opt =[]
names_opt = []

result_col_opt = ["Algorithm", "ROC AUC", "Accuracy", 'Precision', 'F1 Scores']
model_results_opt = pd.DataFrame(columns = result_col_opt)

i=0
# K- fold cross validation

for name, model in models:
    names_opt.append(name)
    kfold = model_selection.KFold(n_splits=10)
    
    cv_acc_results_opt = model_selection.cross_val_score(model, X_train, y_train, 
                    cv = kfold, scoring="accuracy")
    cv_auc_results_opt = model_selection.cross_val_score(model, X_train, y_train,
                    cv = kfold, scoring="roc_auc")
    cv_pre_results_opt = model_selection.cross_val_score(model, X_train, y_train,
                    cv = kfold, scoring="precision")
    cv_f1_results_opt = model_selection.cross_val_score(model, X_train, y_train,
                    cv = kfold, scoring="f1")
    acc_results_opt.append(cv_acc_results_opt)
    auc_results_opt.append(cv_auc_results_opt)
    pre_results_opt.append(cv_pre_results_opt)
    f1_results_opt.append(cv_f1_results_opt)
    model_results_opt.loc[i] = [name, 
                           round(cv_auc_results_opt.mean()*100,2),
                           round(cv_acc_results_opt.mean()*100,2),
                           round(cv_pre_results_opt.mean()*100,2),
                           round(cv_f1_results_opt.mean()*100,2)]
    i+=1

model_results_opt.sort_values(by = ['ROC AUC'], ascending=False)

In [None]:
acc_results_opt =[]
auc_results_opt =[]
pre_results_opt =[]
f1_results_opt =[]
names_opt = []

result_col_opt = ["Algorithm", "ROC AUC", "Accuracy", 'Precision', 'F1 Scores']
model_results_opt = pd.DataFrame(columns = result_col_opt)

i=0
# K- fold cross validation

for name, model in models:
    names_opt.append(name)
    kfold = model_selection.KFold(n_splits=10)
    
    cv_acc_results_opt = model_selection.cross_val_score(model, X_test, y_test, 
                    cv = kfold, scoring="accuracy")
    cv_auc_results_opt = model_selection.cross_val_score(model, X_test, y_test,
                    cv = kfold, scoring="roc_auc")
    cv_pre_results_opt = model_selection.cross_val_score(model, X_test, y_test,
                    cv = kfold, scoring="precision")
    cv_f1_results_opt = model_selection.cross_val_score(model, X_test, y_test,
                    cv = kfold, scoring="f1")
    acc_results_opt.append(cv_acc_results_opt)
    auc_results_opt.append(cv_auc_results_opt)
    pre_results_opt.append(cv_pre_results_opt)
    f1_results_opt.append(cv_f1_results_opt)
    model_results_opt.loc[i] = [name, 
                           round(cv_auc_results_opt.mean()*100,2),
                           round(cv_acc_results_opt.mean()*100,2),
                           round(cv_pre_results_opt.mean()*100,2),
                           round(cv_f1_results_opt.mean()*100,2)]
    i+=1

model_results_opt.sort_values(by = ['ROC AUC'], ascending=False)

In [None]:
perm = PermutationImportance(good_model_D, random_state=1).fit(X_test, y_test)
eli5.show_weights(perm, feature_names = X_test.columns.tolist())

In [None]:
data_d = data.query('monthlycharge >= 0')
y1_d = data.customerstatus
base_features_d=['tenureinmonths','contract','numberofreferrals','age','monthlycharge']
X1_d = data[base_features_d]
train_X1_d, val_X1_d, train_y1_d, val_y1_d = train_test_split(X1_d, y1_d, random_state=1)
good_model_D1 = DecisionTreeClassifier(criterion = 'entropy', max_leaf_nodes=19, random_state = 0).fit(train_X1_d, train_y1_d)
print("Data sample:")
data_d.head()

In [None]:
for feat_name in base_features_d:
    pdp_dist =  pdp.pdp_isolate(model=good_model_D1, dataset=val_X1_d,
                               model_features=base_features_d, feature=feat_name)
    pdp.pdp_plot(pdp_dist, feat_name)
    plt.show()

In [None]:
explainer = shap.TreeExplainer(good_model_D)
shap_values = explainer.shap_values(X)

shap.summary_plot(shap_values, X_train)

In [None]:
perm = PermutationImportance(good_model, random_state=1).fit(X_test, y_test)
eli5.show_weights(perm, feature_names = X_test.columns.tolist())

In [None]:
from sklearn.ensemble import RandomForestRegressor
data1 = data.query('monthlycharge >= 0')
y1 = data.customerstatus
base_features=['tenureinmonths','contract','numberofreferrals','age','numberofdependents','monthlycharge']
X1 = data[base_features]
train_X1, val_X1, train_y1, val_y1 = train_test_split(X1, y1, random_state=1)
first_model = RandomForestRegressor(n_estimators=50, random_state=1).fit(train_X1, train_y1)
print("Data sample:")
data.head()


In [None]:
for feat_name in base_features:
    pdp_dist =  pdp.pdp_isolate(model=first_model, dataset=val_X1,
                               model_features=base_features, feature=feat_name)
    pdp.pdp_plot(pdp_dist, feat_name)
    plt.show()

In [None]:
good_model.predict(np.array(X_test.values[0]).reshape(1,-1))

In [None]:
fnames = ['tenureinmonths', 'contract']
partial_plot  =  pdp.pdp_interact(model=first_model, dataset=val_X1,
                                  model_features=base_features, features=fnames)
pdp.pdp_interact_plot(pdp_interact_out=partial_plot,
                      feature_names=fnames, plot_type='contour')
plt.show()

In [None]:
imp_features = ['contract', 'tenureinmonths']
# Specify non-default hyper-parameter values

In [None]:
imp_features = ['contract', 'tenureinmonths']
# Specify non-default hyper-parameter values
max_features = 15
max_leaf_nodes = 16 
n_estimators = 50
# Train and test model
good_model = RandomForestClassifier(max_leaf_nodes = max_leaf_nodes,
                                    max_features = max_features, 
                                    n_estimators=n_estimators, ) # create model 
print(good_model) # display model parameters
good_model.fit(X_train, y_train) # train model
pred = good_model.predict(X_test) # predicted output for test examples
print("Results on test data")
acc = accuracy_score(y_test, pred) # accuracy on test examples
prec = precision_score(y_test, pred) # precision on test examples
reca = recall_score(y_test, pred) # recall on test examples
print(f'Test accuracy = {acc: .4f}') # round to 4 decimal places
print(f'Test precision = {prec: .4f}') # round to 4 decimal places
print(f'Test recall = {reca: .4f}') # round to 4 decimal places
print("Classification report:")
print(classification_report(y_test, pred))
print("Confusion matrix (Rows actual, Columns predicted):")
print(pd.DataFrame(confusion_matrix(y_test, pred)))
print('\nROC curve')
plot_roc_curve(good_model, X_test, y_test)  # 
plt.show()

In [None]:
perm = PermutationImportance(good_modelL, random_state=1).fit(X_test, y_test)
eli5.show_weights(perm, feature_names = X_test.columns.tolist())

In [None]:
y2 = data.customerstatus
base_features2=['tenureinmonths','contract','numberofreferrals','married','totalcharges']
X2 = data[base_features]
train_X2, val_X2, train_y2, val_y2 = train_test_split(X2, y, random_state=1)
second_model= LogisticRegression(C=1000, max_iter= 1000).fit(train_X2, train_y2)
print("Data sample:")
data.head()

In [None]:
for feat_name in base_features:
    pdp_dist =  pdp.pdp_isolate(model=second_model, dataset=val_X2,
                               model_features=base_features, feature=feat_name)
    pdp.pdp_plot(pdp_dist, feat_name)
    plt.show()

In [None]:
imp_features = ['contract', 'tenureinmonths', 'numberofreferrals','married']
model_impL = LogisticRegression(C=1000, max_iter= 1000) # model with important features
model_impL.fit(X_train[imp_features], y_train)
pred = model_impL.predict(X_train[imp_features]) # predicted output for test examples
print("Results on test data")
accL = accuracy_score(y_test, predL) # accuracy on test examples
precL = precision_score(y_test, predL) # precision on test examples
recaL = recall_score(y_test, predL) # recall on test examples
print(f'Test accuracy = {accL: .4f}') # round to 4 decimal places
print(f'Test precision = {precL: .4f}') # round to 4 decimal places
print(f'Test recall = {recaL: .4f}') # round to 4 decimal places
print("Classification report:")
print(classification_report(y_test, predL))
print("Confusion matrix (Rows actual, Columns predicted):")
print(pd.DataFrame(confusion_matrix(y_test, predL)))
print('\nROC curve')
plot_roc_curve(model_impL, X_test[imp_features], y_test)  
plt.show()

In [None]:
perm = PermutationImportance(good_modelknn, random_state=1).fit(X_test, y_test)
eli5.show_weights(perm, feature_names = X_test.columns.tolist())

In [None]:
perm = PermutationImportance(good_modelsvm, random_state=1).fit(X_test, y_test)
eli5.show_weights(perm, feature_names = X_test.columns.tolist())

#### Conclusion
SIX MACHINE LEARNING MODELS WERE BUILT AND TRAINED

RANDOM FOREST OUTPERFORMED OTHER MODELS

MOST IMPORTANT FEATURES ARE

    CONTRACT
    MONTHLY CHARGE
    TENURE IN MONTHS
    NUMBER OF REFERRALS
    NUMBER OF INDEPENDENTS
linkcode
Further Working can be done in model explanation using LIME and SHAP methods that build more trust and reliability on the above used models