In [None]:
import pandas as pd
import numpy as np
import missingno as msno
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import tensorflow as tf
from functools import partial
from tensorflow import keras
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from statistics import mean
import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from functools import partial
from tensorflow import keras
from imblearn.over_sampling import SMOTE
from sklearn import metrics
from sklearn.metrics import roc_curve,auc,roc_auc_score
from sklearn.metrics import recall_score, confusion_matrix, precision_score, f1_score, accuracy_score, classification_report
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.pipeline import Pipeline
from keras.layers import Dense
from keras.models import Sequential

In [None]:
data = pd.read_csv("data.csv")

In [None]:
##-----------------Undertanding the data-----------------------

In [None]:
##Note : Each row represents a customer, each column 
##contains customer’s attributes described on the column Metadata.

In [None]:
data.head()

In [None]:
##---------------The data set includes information about-----------------
##-1-Customers who left within the last month: the column is called Churn
##-2-Services that each customer has signed up for: phone, multiple lines, internet, online security, online backup, device protection, tech support, and streaming TV and movies
##-3-Customer account information : how long they’ve been a customer, contract, payment method, paperless billing, monthly charges, and total charges
##-4-Demographic info about customers : gender, age range, and if they have partners and dependents

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.columns.values

In [None]:
data.dtypes

In [None]:
##-------The target the we will use to guide the exploration is Churn----------

In [None]:
##---------------Visualize missing values---------------------

In [None]:
msno.matrix(data);

In [None]:
##--------Using this matrix we can very quickly find the pattern of missingness in the dataset.----


In [None]:
##--------From the above visualisation we can observe that it has no
## peculiar pattern that stands out. In fact there is no missing data.

In [None]:
##---------------------Data Manipulation ------------------------

In [None]:
data=data.drop(['customerID'],axis=1)
data.head()

In [None]:
##------On deep analysis, we can find some indirect missingness in our data 
## (which can be in form of blankspaces). Let's see that!

In [None]:
data['TotalCharges'] = pd.to_numeric(data.TotalCharges, errors='coerce')
data.isnull().sum()

In [None]:
##---------Here we see that the TotalCharges has 11 missing values. Let's check this data.-----

In [None]:
data[np.isnan(data['TotalCharges'])]

In [None]:
##----It can also be noted that the Tenure column is 0 for these 
## entries even though the MonthlyCharges column is not empty. -----

In [None]:
##-------Let's see if there are any other 0 values in the tenure column.------

In [None]:
data[data['tenure'] == 0].index

In [None]:
##-----There are no additional missing values in the Tenure column. 

In [None]:
##------Let's delete the rows with missing values in Tenure columns
## since there are only 11 rows and deleting them will not affect the data.

In [None]:
data.drop(labels=data[data['tenure'] == 0].index, axis=0, inplace=True)
data[data['tenure'] == 0].index

In [None]:
##-----To solve the problem of missing values in TotalCharges 
## column, I decided to fill it with the mean of TotalCharges values.

In [None]:
data.fillna(data["TotalCharges"].mean())

In [None]:
data.isnull().sum()

In [None]:
data.head()

In [None]:
data["SeniorCitizen"]= data["SeniorCitizen"].map({0: "No", 1: "Yes"})
data.head()

In [None]:
##---------------- Data Visualization ------------------------

In [None]:
g_labels = ['Male', 'Female']
c_labels = ['No', 'Yes']
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=g_labels, values=data['gender'].value_counts(), name="Gender"),
              1, 1)
fig.add_trace(go.Pie(labels=c_labels, values=data['Churn'].value_counts(), name="Churn"),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name", textfont_size=24)

fig.update_layout(
    title_text="Gender and Churn Distributions",font_size=24,
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Gender', x=0.16, y=0.5, font_size=26, showarrow=False),
                 dict(text='Churn', x=0.84, y=0.5, font_size=26, showarrow=False)])
fig.show()

In [None]:
# -------------------------Result----------------------------------
# 26.6 % of customers switched to another firm.
# Customers are 49.5 % female and 50.5 % male.
# -----------------------------------------------------------


In [None]:
data["Churn"][data["Churn"]=="No"].groupby(by=data["gender"]).count()

In [None]:
data["Churn"][data["Churn"]=="Yes"].groupby(by=data["gender"]).count()

In [None]:
plt.figure(figsize=(10, 10))
labels =["Churn: Yes","Churn:No"]
values = [1869,5163]
labels_gender = ["F","M","F","M"]
sizes_gender = [939,930 , 2544,2619]
colors = ['#ff6666', '#66b3ff']
colors_gender = ['#c2c2f0','#ffb3e6', '#c2c2f0','#ffb3e6']
explode = (0.3,0.3) 
explode_gender = (0.1,0.1,0.1,0.1)
textprops = {"fontsize":24}
#Plot
plt.pie(values, labels=labels,autopct='%1.1f%%',pctdistance=1.08, labeldistance=0.8,colors=colors, startangle=90,frame=True, explode=explode,radius=10, textprops =textprops, counterclock = True, )
plt.pie(sizes_gender,labels=labels_gender,colors=colors_gender,startangle=90, explode=explode_gender,radius=7, textprops =textprops, counterclock = True, )
#Draw circle
centre_circle = plt.Circle((0,0),5,color='black', fc='white',linewidth=0)
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

plt.title('Churn Distribution Gender: Male(M), Female(F)', fontsize=26, y=1.1)

# show plot 
 
plt.axis('equal')
plt.tight_layout()
plt.show()

In [None]:
# -------------------------Result----------------------------------
# There is negligible difference in customer percentage/ count who 
# changed the service provider. Both genders behaved in similar 
#fashion when it comes to migrating to another service provider/firm.
# -----------------------------------------------------------


In [None]:
fig=px.histogram(data, x="Churn", color="Contract", barmode="group", title="Customer contract distribution")
fig.update_layout(width=1000, height=600, bargap=0.1,font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# About 75% of customer with Month-to-Month Contract opted to 
# move out as compared to 13% of customrs with One Year Contract 
# and 3% with Two Year Contract
# -----------------------------------------------------------


In [None]:
labels = data['PaymentMethod'].unique()
values = data['PaymentMethod'].value_counts()

fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.update_layout(title_text="Payment Method Distribution",font_size=24)
fig.show()

In [None]:
fig = px.histogram(data, x="Churn", color="PaymentMethod", title="Customer Payment Method distribution Churn")
fig.update_layout(width=1000, height=600, bargap=0.1,font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# - Major customers who moved out were having Electronic Check as
#   Payment Method.
#
# - Customers who opted for Credit-Card automatic transfer or Bank 
#   Automatic Transfer and Mailed Check as Payment Method were less 
#   likely to move out. 
# -----------------------------------------------------------


In [None]:
data["InternetService"].unique()

In [None]:
data[data["gender"]=="Male"][["InternetService", "Churn"]].value_counts()

In [None]:
data[data["gender"]=="Female"][["InternetService", "Churn"]].value_counts()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = [['Churn:No', 'Churn:No', 'Churn:Yes', 'Churn:Yes'],
       ["Female", "Male", "Female", "Male"]],
  y = [965, 992, 219, 240],
  name = 'DSL',
))

fig.add_trace(go.Bar(
  x = [['Churn:No', 'Churn:No', 'Churn:Yes', 'Churn:Yes'],
       ["Female", "Male", "Female", "Male"]],
  y = [889, 910, 664, 633],
  name = 'Fiber optic',
))

fig.add_trace(go.Bar(
  x = [['Churn:No', 'Churn:No', 'Churn:Yes', 'Churn:Yes'],
       ["Female", "Male", "Female", "Male"]],
  y = [690, 717, 56, 57],
  name = 'No Internet',
))
fig.update_layout(title_text="Churn Distribution Internet Service and Gender",font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# - A lot of customers choose the Fiber optic service and it's also 
#   evident that the customers who use Fiber optic have high churn 
#   rate, this might suggest a dissatisfaction with this type of internet
#   service.
#
# - Customers having DSL service are majority in number and have less 
#   churn rate compared to Fibre optic service.
# -----------------------------------------------------------


In [None]:
color_map = {"Yes": "#FF97FF", "No": "#AB63FA"}
fig = px.histogram(data, x="Churn", color="Dependents", barmode="group", title="Dependents distribution", color_discrete_map=color_map)
fig.update_layout(width=1000, height=600, bargap=0.1,font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# - Customers without dependents are more likely to churn
# -----------------------------------------------------------


In [None]:
color_map = {"Yes": '#FFA15A', "No": '#00CC96'}
fig = px.histogram(data, x="Churn", color="Partner", barmode="group", title="Chrun distribution Partners", color_discrete_map=color_map)
fig.update_layout(width=1000, height=600, bargap=0.1,font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# - Customers that doesn't have partners are more likely to churn
# -----------------------------------------------------------

In [None]:
color_map = {"Yes": '#00CC96', "No": '#B6E880'}
fig = px.histogram(data, x="Churn", color="SeniorCitizen", title="Chrun distribution Senior Citizen", color_discrete_map=color_map)
fig.update_layout(width=1000, height=600, bargap=0.1,font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# - It can be observed that the fraction of senior citizen is very less.
# - Most of the senior citizens churn.
# -----------------------------------------------------------

In [None]:
color_map = {"Yes": "#FF97FF", "No": "#AB63FA"}
fig = px.histogram(data, x="Churn", color="OnlineSecurity", barmode="group", title="Churn Online Security", color_discrete_map=color_map)
fig.update_layout(width=1000, height=600, bargap=0.1,font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# - Most customers churn in the absence of online security
# -----------------------------------------------------------

In [None]:
color_map = {"Yes": '#FFA15A', "No": '#00CC96'}
fig = px.histogram(data, x="Churn", color="PaperlessBilling",  title="Chrun distribution Paperless Billing", color_discrete_map=color_map)
fig.update_layout(width=1000, height=600, bargap=0.1,font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# - Customers with Paperless Billing are most likely to churn
# -----------------------------------------------------------

In [None]:
fig = px.histogram(data, x="Churn", color="TechSupport",barmode="group",  title="Chrun distribution TechSupport")
fig.update_layout(width=1000, height=600, bargap=0.1,font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# -Customers with no TechSupport are most likely to migrate to 
#  another service provider.
# -----------------------------------------------------------

In [None]:
color_map = {"Yes": '#00CC96', "No": '#B6E880'}
fig = px.histogram(data, x="Churn", color="PhoneService", title="Chrun distribution Phone Service", color_discrete_map=color_map)
fig.update_layout(width=1000, height=600, bargap=0.1,font_size=24,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# -Very small fraction of customers don't have a phone service and out  
#  of that, 1/3rd Customers are more likely to churn.
# -----------------------------------------------------------

In [None]:
plt.figure(figsize=(10, 6))
sns.set_context("paper",font_scale=2)
ax = sns.kdeplot(data.MonthlyCharges[(data["Churn"] == 'No') ],
                color="Red", shade = True);
ax = sns.kdeplot(data.MonthlyCharges[(data["Churn"] == 'Yes') ],
                ax =ax, color="Blue", shade= True);
ax.legend(["Not Churn","Churn"],loc='upper right');
label_font = {'size':'24'}  # Adjust to fit
ax.set_ylabel('Density', fontdict=label_font);
ax.set_xlabel('Monthly Charges', fontdict=label_font);
title_font = {'size':'24'}  # Adjust to fit
ax.set_title('Distribution of monthly charges by churn',fontdict=title_font);

In [None]:
plt.figure(figsize=(10, 6))
mc_churn_no = data[data.Churn=='No'].MonthlyCharges
mc_churn_yes = data[data.Churn=='Yes'].MonthlyCharges

plt.xlabel('Monthly Charges',fontsize=24)
plt.ylabel('Number of Customers',fontsize=24)
plt.title('Customer Churn Prediction Visualization',fontsize=24)

blood_sugar_men = [113,85,90,150,149,88,93,115,135,80,77,82,129]
blood_sugar_women =[67,98,89,120,133,150,84,69,89,79,120,112,100] 

plt.hist([mc_churn_yes,mc_churn_no],rwidth=0.95,color=['green','red'],label=['Churn=Yes','Churn=No'])
plt.legend()

In [None]:
# -------------------------Result----------------------------------
# -Customers with higher Monthly Charges are also more likely 
#  to churn
# -----------------------------------------------------------

In [None]:
plt.figure(figsize=(10, 6))
ax = sns.kdeplot(data.TotalCharges[(data["Churn"] == 'No') ],
                color="Gold", shade = True);
ax = sns.kdeplot(data.TotalCharges[(data["Churn"] == 'Yes') ],
                ax =ax, color="Green", shade= True);
ax.legend(["Not Churn","Churn"],loc='upper right');
label_font = {'size':'24'}  # Adjust to fit
ax.set_ylabel('Density',fontdict=label_font);
ax.set_xlabel('Total Charges',fontdict=label_font);
title_font = {'size':'24'}  # Adjust to fit
ax.set_title('Distribution of total charges by churn',fontdict=title_font);

In [None]:
fig = px.box(data, x='Churn', y = 'tenure')

# Update yaxis properties
fig.update_yaxes(title_text='Tenure (Months)', row=1, col=1)
# Update xaxis properties
fig.update_xaxes(title_text='Churn', row=1, col=1)
# Update size and title
fig.update_layout(width=1000, height=600,bargap=0.1,font_size=24,
    title='Tenure vs Churn',plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
# -------------------------Result----------------------------------
# -New customers are more likely to churn
# -----------------------------------------------------------

In [None]:
plt.figure(figsize=(25, 10))

corr = data.apply(lambda x: pd.factorize(x)[0]).corr()

mask = np.triu(np.ones_like(corr, dtype=bool))

ax = sns.heatmap(corr, mask=mask, xticklabels=corr.columns, yticklabels=corr.columns, annot=True, linewidths=.2, cmap='coolwarm', vmin=-1, vmax=1)

In [None]:
def print_unique_col_value(data):
    for column in data:
        if data[column].dtypes=='object':
            print(f'{column}:{data[column].unique()}')

In [None]:
print_unique_col_value(data)

In [None]:
##-----------------Get Clone from data to clean ---------------

In [None]:
processData=data

In [None]:
processData.replace('No internet service','No',inplace=True)
processData.replace('No phone service','No',inplace=True)

In [None]:
print_unique_col_value(processData)

In [None]:
#-----------Now lets replace yes and no with 0 or 1 ------

In [None]:
#---------all cloumn with yes and no
yes_no_columns = ['SeniorCitizen','Partner','Dependents','PhoneService','MultipleLines',
                 'OnlineSecurity','OnlineBackup','DeviceProtection','TechSupport',
                 'StreamingTV','StreamingMovies','PaperlessBilling','Churn'] 

for col in yes_no_columns:
    processData[col].replace({'Yes':1,'No':0},inplace=True)

In [None]:
for col in processData:
    print(f'{col}:{processData[col].unique()}')

In [None]:
#//--------Change Gender 
processData['gender'].replace({'Female':1,'Male':0},inplace=True)

In [None]:
processData['gender'].unique()

In [None]:
processData = pd.get_dummies(data=processData,columns=['InternetService','Contract','PaymentMethod'])

In [None]:
#---------------- Now our data look quite ----------------------
processData.sample(4)

In [None]:
processData.head()

In [None]:
#---------------------------------------------------------
# Now lets see the datatype... as we can see all are number
# which is quite great
#--------------------------------
processData.dtypes

In [None]:
#-----------------------------------------------------------
# Now lets scale our data.
#so the columns to be scal are tenure,MonthlyCharges,TotalCharges
#as they are nit in range 0-1
#We will use min-max or normalization.
#-------------------------------------------------

In [None]:
cols_to_scale = ['tenure','MonthlyCharges','TotalCharges']
scaler = MinMaxScaler()
processData[cols_to_scale] = scaler.fit_transform(processData[cols_to_scale])
processData[cols_to_scale] = scaler.fit_transform(processData[cols_to_scale])

In [None]:
#-------------------------------------------------------------
#Now our dataframe is scale and used for prediction
#-------------------------------------

In [None]:
for col in processData:
    print(f'{col}:{processData[col].unique()}')

In [None]:
#------------------ We are done with preprocessing,------------------

In [None]:
#-------Machine Learning Model Evaluations and Predictions-------

In [None]:
#-------------------------Predicted By ANN--------------------------------------------
def GetNeuralNetwork(x_train,y_train,x_test,y_test):
    
    model = keras.Sequential(
        [
            keras.layers.Dense(20,input_shape=(26,),activation='relu'),
            keras.layers.Dropout(0.3),
            keras.layers.Dense(10,activation='relu'),
            keras.layers.Dropout(0.3),
            keras.layers.Dense(5,activation='relu'),
            keras.layers.Dropout(0.3),
            keras.layers.Dense(1,activation='sigmoid'),
        ]
    )

    model.compile(optimizer ='adam',
                 loss='binary_crossentropy',
                 metrics=['accuracy'])
    
    model.fit(x_train,y_train,epochs=100,batch_size=8)

    model.evaluate(x_test,y_test)
    y_pred = model.predict(x_test)
    y_pred_actual = []
    for ele in y_pred:
        if ele > 0.5:
            y_pred_actual.append(1)
        else :
            y_pred_actual.append(0)
    accuracy_ann=accuracy_score(y_test, y_pred_actual)
    precision = precision_score(y_test, y_pred_actual)
    recall = recall_score(y_test, y_pred_actual)
    f1 = f1_score(y_test, y_pred_actual)
    print("Neural network accuracy is :",accuracy_ann)
    print("Neural network precision is :",precision)
    print("Neural network recall is :",recall)
    print("Neural network f1 is :",f1)
    print("Neural Network Classification Reports is:\n",classification_report(y_test,y_pred_actual))
    return round(accuracy_ann*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),model,y_pred,y_pred_actual

In [None]:
#-------------------------Predicted By KNN--------------------------------------------
def GetKNN(x_train,y_train,x_test,y_test):
    
    knn_model = KNeighborsClassifier(n_neighbors = 11) 
    knn_model.fit(x_train,y_train)
    predicted_y = knn_model.predict(x_test)
    accuracy_knn = knn_model.score(x_test,y_test)
    precision = precision_score(y_test, predicted_y)
    recall = recall_score(y_test, predicted_y)
    f1 = f1_score(y_test, predicted_y)
    print("KNN accuracy is :",accuracy_knn)
    print("KNN precision is :",precision)
    print("KNN recall is :",recall)
    print("KNN f1 is :",f1)
    print("KNN Classification Reports is:\n",classification_report(y_test, predicted_y))
    return round(accuracy_knn*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),knn_model,predicted_y

In [None]:
#-------------------------Predicted By SVM--------------------------------------------
def GetSVC(x_train,y_train,x_test,y_test):
    svc_model = SVC(random_state = 1,probability=True)
    svc_model.fit(x_train,y_train)
    predict_y = svc_model.predict(x_test)
    accuracy_svc = svc_model.score(x_test,y_test)
    precision = precision_score(y_test, predict_y)
    recall = recall_score(y_test, predict_y)
    f1 = f1_score(y_test, predict_y)
    print("SVM accuracy is :",accuracy_svc)
    print("SVM precision is :",precision)
    print("SVM recall is :",recall)
    print("SVM f1 is :",f1)
    print("SVM Classification Reports is:\n",classification_report(y_test, predict_y))
    return round(accuracy_svc*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),svc_model,predict_y

In [None]:
#-------------------------Predicted By Random Forest-----------------------
def GetRandomForest(x_train,y_train,x_test,y_test):
    
    model_rf = RandomForestClassifier(n_estimators=100 , oob_score = True, n_jobs = -1,
                                      random_state =50, max_features = "auto",
                                      max_leaf_nodes = 30)
    
    model_rf.fit(x_train, y_train)

    # Make predictions
    prediction_test = model_rf.predict(x_test)
    accuracy_randomForest = metrics.accuracy_score(y_test, prediction_test)
    precision = precision_score(y_test, prediction_test)
    recall = recall_score(y_test, prediction_test)
    f1 = f1_score(y_test, prediction_test)
    print("Random Forest accuracy is :",accuracy_randomForest)
    print("Random Forest precision is :",precision)
    print("Random Forest recall is :",recall)
    print("Random Forest f1 is :",f1)
    print("Random Forest Classification Reports is:\n",classification_report(y_test, prediction_test))
    return round(accuracy_randomForest*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),model_rf ,prediction_test

In [None]:
#-----------------------Predicted By Logistic Regression-----------------
def GetLogisticRegression(x_train,y_train,x_test,y_test):
    lr_model = LogisticRegression()
    lr_model.fit(x_train,y_train)
    accuracy_lr = lr_model.score(x_test,y_test)
    print("Logistic Regression accuracy is :",accuracy_lr)
    lr_pred= lr_model.predict(x_test)
    precision = precision_score(y_test, lr_pred)
    recall = recall_score(y_test, lr_pred)
    f1 = f1_score(y_test, lr_pred)
    print("Logistic Regression precision is :",precision)
    print("Logistic Regression recall is :",recall)
    print("Logistic Regression f1 is :",f1)
    print("Logistic Regression Classification Reports is:\n",classification_report(y_test, lr_pred))
    return round(accuracy_lr*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),lr_model ,lr_pred

In [None]:
#-----------------------Predicted By Decision Tree-----------------
def GetDecisionTree(x_train,y_train,x_test,y_test):
    dt_model = DecisionTreeClassifier(max_depth=4)
    dt_model.fit(x_train,y_train)
    predictdt_y = dt_model.predict(x_test)
    accuracy_dt = dt_model.score(x_test,y_test)
    precision = precision_score(y_test, predictdt_y)
    recall = recall_score(y_test, predictdt_y)
    f1 = f1_score(y_test, predictdt_y)
    print("Decision Tree accuracy is :",accuracy_dt)
    print("Decision Tree precision is :",precision)
    print("Decision Tree recall is :",recall)
    print("Decision Tree f1 is :",f1)
    print("Decision Tree Classification Reports is:\n",classification_report(y_test, predictdt_y))
    return round(accuracy_dt*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),dt_model ,predictdt_y

In [None]:
def GetNeuralNetworkModel():
    
    model =keras.Sequential(
        [
            keras.layers.Dense(20,input_shape=(26,),activation='relu'),
            keras.layers.Dropout(0.3),
            keras.layers.Dense(10,activation='relu'),
            keras.layers.Dropout(0.3),
            keras.layers.Dense(5,activation='relu'),
            keras.layers.Dropout(0.3),
            keras.layers.Dense(1,activation='sigmoid'),
        ]
    )
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
#-----------------------Predicted By Voting with 'NeuralNetwork' 'LogisticRegression' 'XGBoost' -----------------
def GetVoting_NeuralNetwork_Boosting(x_train,y_train,x_test,y_test):
  
    neuralNetworkClassifier = KerasClassifier(build_fn=GetNeuralNetworkModel)
    neuralNetworkClassifier._estimator_type = "classifier"
    neuralNetworkClassifier_pipe = Pipeline([('scaler', scaler), ('cnn', neuralNetworkClassifier)])
    
    clf1 = neuralNetworkClassifier
    clf2 = AdaBoostClassifier()
    clf3 = XGBClassifier(n_estimators=800, n_jobs=-1)
    eclf1 = VotingClassifier(estimators=[('nn', clf1), ('ada', clf2), ('xgb', clf3)], voting='soft')
    eclf1.fit(x_train, y_train)
    predictions = eclf1.predict(x_test)
    accuracy_voting = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    print("NeuralNetwork , AdaBoost , XGBoost  Voting Classifier accuracy is :",accuracy_voting)
    print("NeuralNetwork , AdaBoost , XGBoost  Voting Classifier precision is :",precision)
    print("NeuralNetwork , AdaBoost , XGBoost  Voting Classifier recall is :",recall)
    print("NeuralNetwork , AdaBoost , XGBoost  Voting Classifier f1 is :",f1)
    print("NeuralNetwork,AdaBoost,XGBoost Voting Classification Reports is:\n",classification_report(y_test, predictions))
    return round(accuracy_voting*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),eclf1,predictions

In [None]:
#-----------------------Predicted By 'KNN' Boosting -----------------
def GetVoting_KNN_Boosting(x_train,y_train,x_test,y_test):    
    clf1 =KNeighborsClassifier(n_neighbors = 11) 
    clf2 = AdaBoostClassifier()
    clf3 = XGBClassifier(n_estimators=800, n_jobs=-1)
    eclf1 = VotingClassifier(estimators=[('knn', clf1), ('ada', clf2), ('xgb', clf3)], voting='soft')
    eclf1.fit(x_train, y_train)
    predictions = eclf1.predict(x_test)
    accuracy_voting = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    print("KNN , AdaBoost , XGBoost  Voting Classifier accuracy is :",accuracy_voting)
    print("KNN , AdaBoost , XGBoost  Voting Classifier precision is :",precision)
    print("KNN , AdaBoost , XGBoost  Voting Classifier recall is :",recall)
    print("KNN , AdaBoost , XGBoost  Voting Classifier f1 is :",f1)
    print("KNN,AdaBoost,XGBoost Voting Classification Reports is:\n",classification_report(y_test, predictions))
    return round(accuracy_voting*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),eclf1,predictions

In [None]:
#-----------------------Predicted By 'SVM' Boosting -----------------
def GetVoting_SVM_Boosting(x_train,y_train,x_test,y_test):    
    clf1 =SVC(random_state = 1,probability=True)
    clf2 = AdaBoostClassifier()
    clf3 = XGBClassifier(n_estimators=800, n_jobs=-1)
    eclf1 = VotingClassifier(estimators=[('svm', clf1), ('ada', clf2), ('xgb', clf3)], voting='soft')
    eclf1.fit(x_train, y_train)
    predictions = eclf1.predict(x_test)
    accuracy_voting = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    print("SVM , AdaBoost , XGBoost  Voting Classifier accuracy is :",accuracy_voting)
    print("SVM , AdaBoost , XGBoost  Voting Classifier precision is :",precision)
    print("SVM , AdaBoost , XGBoost  Voting Classifier recall is :",recall)
    print("SVM , AdaBoost , XGBoost  Voting Classifier f1 is :",f1)
    print("SVM,AdaBoost,XGBoost Voting Classification Reports is:\n",classification_report(y_test, predictions))
    return round(accuracy_voting*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),eclf1,predictions

In [None]:
#-----------------------Predicted By 'Random Forest' Boosting -----------------
def GetVoting_RandomForest_Boosting(x_train,y_train,x_test,y_test):
    clf1 =RandomForestClassifier(n_estimators=500 , oob_score = True, n_jobs = -1,
                                  random_state =50, max_features = "auto",
                                  max_leaf_nodes = 30)
    clf2 = AdaBoostClassifier()
    clf3 = XGBClassifier(n_estimators=800, n_jobs=-1)
    eclf1 = VotingClassifier(estimators=[('rf', clf1), ('ada', clf2), ('xgb', clf3)], voting='soft')
    eclf1.fit(x_train, y_train)
    predictions = eclf1.predict(x_test)
    accuracy_voting = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    print("Random Forest , AdaBoost , XGBoost  Voting Classifier accuracy is :",accuracy_voting)
    print("Random Forest , AdaBoost , XGBoost  Voting Classifier precision is :",precision)
    print("Random Forest , AdaBoost , XGBoost  Voting Classifier recall is :",recall)
    print("Random Forest , AdaBoost , XGBoost  Voting Classifier f1 is :",f1)
    print("Random Forest,AdaBoost,XGBoost Voting Classification Reports is:\n",classification_report(y_test, predictions))
    return round(accuracy_voting*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),eclf1,predictions

In [None]:
#-----------------------Predicted By 'LogisticRegression' Boosting -----------------
def GetVoting_LogisticRegression_Boosting(x_train,y_train,x_test,y_test):        
    clf1 = LogisticRegression()
    clf2 = AdaBoostClassifier()
    clf3 = XGBClassifier(n_estimators=800, n_jobs=-1)
    eclf1 = VotingClassifier(estimators=[('lr', clf1), ('ada', clf2), ('xgb', clf3)], voting='soft')
    eclf1.fit(x_train, y_train)
    predictions = eclf1.predict(x_test)
    accuracy_voting = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    print("LogisticRegression , AdaBoost , XGBoost  Voting Classifier accuracy is :",accuracy_voting)
    print("LogisticRegression , AdaBoost , XGBoost  Voting Classifier precision is :",precision)
    print("LogisticRegression , AdaBoost , XGBoost  Voting Classifier recall is :",recall)
    print("LogisticRegression , AdaBoost , XGBoost  Voting Classifier f1 is :",f1)
    print("LogisticRegression,AdaBoost,XGBoost Voting Classification Reports is:\n",classification_report(y_test, predictions))
    return round(accuracy_voting*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),eclf1 ,predictions

In [None]:
#-----------------------Predicted By 'DecisionTree' Boosting -----------------
def GetVoting_DecisionTree_Boosting(x_train,y_train,x_test,y_test):    
    clf1 =DecisionTreeClassifier()
    clf2 = AdaBoostClassifier()
    clf3 = XGBClassifier(n_estimators=800, n_jobs=-1)
    eclf1 = VotingClassifier(estimators=[('dt', clf1), ('ada', clf2), ('xgb', clf3)], voting='soft')
    eclf1.fit(x_train, y_train)
    predictions = eclf1.predict(x_test)
    accuracy_voting = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    print("DecisionTree , AdaBoost , XGBoost  Voting Classifier accuracy is :",accuracy_voting)
    print("DecisionTree , AdaBoost , XGBoost  Voting Classifier precision is :",precision)
    print("DecisionTree , AdaBoost , XGBoost  Voting Classifier recall is :",recall)
    print("DecisionTree , AdaBoost , XGBoost  Voting Classifier f1 is :",f1)
    print("DecisionTree,AdaBoost,XGBoost Voting Classification Reports is:\n",classification_report(y_test, predictions))
    return round(accuracy_voting*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),eclf1,predictions

In [None]:
#--------------------Create CM
def GetConfusionMatrix(y_test,y_pred_actual,title):
    cm = confusion_matrix(y_test, y_pred_actual)
    plt.figure(figsize=(9,7))
    ax = plt.subplot()
    sns.set(font_scale=2.0) # Adjust to fit
    sns.heatmap(cm, annot=True, ax=ax, cmap="Blues", fmt="g");  
    # Labels, title and ticks
    label_font = {'size':'28'}  # Adjust to fit
    ax.set_xlabel('Predicted labels', fontdict=label_font);
    ax.set_ylabel('Observed labels', fontdict=label_font);
    title_font = {'size':'24'}  # Adjust to fit
    ax.set_title(title, fontdict=title_font);
    ax.tick_params(axis='both', which='major', labelsize=20)  # Adjust to fit
    ax.xaxis.set_ticklabels(['False', 'True']);
    ax.yaxis.set_ticklabels(['False', 'True']);
    plt.show()

In [None]:
#-----------------Create ROC
def GetROC(model,x_test,y_test,title):
     #--------------ROC Curve------------------------
    # Labels, title and ticks
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(9,7))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    y_pred_prob = model.predict_proba(x_test)[:,1]
    fpr_rf, tpr_rf, thresholds = roc_curve(y_test, y_pred_prob)
    aucScore = roc_auc_score(y_test, y_pred_prob)
    aucLabel="AUC="+format(round(aucScore*100, 2), '.2f')
    print("AUC Score is:" ,aucScore)   
    plt.plot([0, 1], [0, 1], 'k--' )
    plt.plot(fpr_rf, tpr_rf,'b', label=aucLabel,marker='.',color='r')
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title(title,fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    plt.legend()
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();
    return aucScore,fpr_rf,tpr_rf
    

In [None]:
def GetNNROC(model,x_test,y_test,y_pred_prob,title):
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(9,7))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    fpr_rf, tpr_rf, thresholds = roc_curve(y_test, y_pred_prob)
    aucScore = roc_auc_score(y_test, y_pred_prob)
    aucLabel="AUC="+format(round(aucScore*100, 2), '.2f')
    print("AUC Score is:" ,aucScore)   
    plt.plot([0, 1], [0, 1], 'k--' )
    plt.plot(fpr_rf, tpr_rf,'b', label=aucLabel,marker='.',color='r')
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title(title,fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    plt.legend()
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();
    return aucScore,fpr_rf,tpr_rf

In [None]:
nonChurnData = processData.drop('Churn',axis=1)
churnData = processData['Churn']

x_train,x_test,y_train,y_test = train_test_split(nonChurnData,churnData,test_size=0.2,random_state=15,stratify=churnData)

In [None]:
y_train.value_counts()

In [None]:
y_test.value_counts()

In [None]:
#---------------------------------------------------------
#As We can see our dataset is imbalanced. Now see the difference between training of 
#unbalanced dataset vs balanced dataset
#------------------------------------------

In [None]:
#------------------------------Neural Network---------------------------------------
nn_accuracy,nn_precision,nn_recall,nn_f1,nn_model,nn_yPrediction,nn_yPredictionActual = GetNeuralNetwork(x_train,y_train,x_test,y_test)

In [None]:
#--------------------------KNN------------------------------------

In [None]:
knn_accuracy,knn_precision,knn_recall,knn_f1,knn_model,knn_yPrediction=GetKNN(x_train,y_train,x_test,y_test)

In [None]:
#---------------------------SVM--------------------------------------

In [None]:
svm_accuracy,svm_precision,svm_recall,svm_f1,svm_model,svm_yPrediction = GetSVC(x_train,y_train,x_test,y_test)

In [None]:
#----------------------------Random Forest---------------------------

In [None]:
rf_accuracy,rf_precision,rf_recall,rf_f1,rf_model,rf_yPrediction = GetRandomForest(x_train,y_train,x_test,y_test)

In [None]:
#------------------------Logistic Regression------------------------

In [None]:
lr_accuracy,lr_precision,lr_recall,lr_f1,lr_model,lr_yPrediction =GetLogisticRegression(x_train,y_train,x_test,y_test)

In [None]:
#-------------------------Decision Tree Classifier-------------------

In [None]:
dt_accuracy,dt_precision,dt_recall,dt_f1,dt_model,dt_yPrediction =GetDecisionTree(x_train,y_train,x_test,y_test)

In [None]:
fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,6))
models_voting_Accuracy = pd.DataFrame({
    'Classifiers':['Neural network ','KNN', 'SVM', 'Random forest','Logistic regression','Decision tree'],
    'Accuracy' :[nn_accuracy,knn_accuracy,svm_accuracy,rf_accuracy,lr_accuracy,dt_accuracy]
})
models_voting_Accuracy
sns.barplot(x='Accuracy', y='Classifiers',data=models_voting_Accuracy)
ax = fig.add_subplot(111)
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')
models_voting_Accuracy.sort_values(by='Accuracy', ascending=False)

In [None]:
#-------------------lets Balance our dataset and see the difference----------

In [None]:
smote = SMOTE(sampling_strategy ='minority')
x_sm, y_sm = smote.fit_resample(nonChurnData,churnData)

In [None]:
#Now our dataset is balanced.
y_sm.value_counts() 

In [None]:
x_train_balance,x_test_balance,y_train_balance,y_test_balance = train_test_split(x_sm,y_sm,test_size=0.2,random_state=15,stratify=y_sm)

In [None]:
#balanced train dataset
y_train_balance.value_counts() 

In [None]:
#balanced test dataset
y_test_balance.value_counts()

In [None]:
#---------Balance ANN--------------------------------

In [None]:
nn_balance_accuracy,nn_balance_precision,nn_balance_recall,nn_balance_f1,nn_balance_model,nn_balance_yPrediction,nn_balance_yPredictionActual = GetNeuralNetwork(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
knn_balance_accuracy,knn_balance_precision,knn_balance_recall,knn_balance_f1,knn_balance_model,knn_balance_yPrediction =GetKNN(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
svm_balance_accuracy,svm_balance_precision,svm_balance_recall,svm_balance_f1,svm_balance_model,svm_balance_yPrediction= GetSVC(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
rf_balance_accuracy,rf_balance_precision,rf_balance_recall,rf_balance_f1,rf_balance_model,rf_balance_yPrediction = GetRandomForest(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
lr_balance_accuracy,lr_balance_precision,lr_balance_recall,lr_balance_f1,lr_balance_model,lr_balance_yPrediction=GetLogisticRegression(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
dt_balance_accuracy,dt_balance_precision,dt_balance_recall,dt_balance_f1,dt_balance_model,dt_balance_yPrediction =GetDecisionTree(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,6))
models_voting_Accuracy_balance = pd.DataFrame({
    'Classifiers':['Neural network ','KNN', 'SVM', 'Random forest','Logistic regression','Decision tree'],
    'Accuracy' :[nn_balance_accuracy,knn_balance_accuracy , svm_balance_accuracy , rf_balance_accuracy,lr_balance_accuracy,dt_balance_accuracy]
})
models_voting_Accuracy_balance
sns.barplot(x='Accuracy', y='Classifiers', data=models_voting_Accuracy_balance)
ax = fig.add_subplot(111)
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')
models_voting_Accuracy_balance.sort_values(by='Accuracy', ascending=False)

In [None]:
voting_nn_accuracy,voting_nn_precision,voting_nn_recall,voting_nn_f1,voting_nn_model,voting_nn_yPrediction =GetVoting_NeuralNetwork_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
voting_knn_accuracy,voting_knn_precision,voting_knn_recall,voting_knn_f1,voting_knn_model,voting_knn_yPrediction =GetVoting_KNN_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
voting_svm_accuracy,voting_svm_precision,voting_svm_recall,voting_svm_f1,voting_svm_model,voting_svm_yPrediction =GetVoting_SVM_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
voting_rf_accuracy,voting_rf_precision,voting_rf_recall,voting_rf_f1,voting_rf_model,voting_rf_yPrediction =GetVoting_RandomForest_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
voting_lr_accuracy,voting_lr_precision,voting_lr_recall,voting_lr_f1,voting_lr_model,voting_lr_yPrediction =GetVoting_LogisticRegression_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
voting_dt_accuracy,voting_dt_precision,voting_dt_recall,voting_dt_f1,voting_dt_model,voting_dt_yPrediction=GetVoting_DecisionTree_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,6))
models_voting_Accuracy_voting_balance = pd.DataFrame({
    'Ensemble Learning':['Neural network-XGBoost-AdaBoost','KNN-XGBoost-AdaBoost', 'SVM-XGBoost-AdaBoost', 'Random forest-XGBoost-AdaBoost','Logistic regression-XGBoost-AdaBoost','Decision tree-XGBoost-AdaBoost'],
    'Accuracy' :[voting_nn_accuracy,voting_knn_accuracy , voting_svm_accuracy , voting_rf_accuracy,voting_lr_accuracy,voting_dt_accuracy]
})
models_voting_Accuracy_voting_balance
sns.barplot(x='Accuracy', y='Ensemble Learning', data=models_voting_Accuracy_voting_balance)
ax = fig.add_subplot(111)
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')
models_voting_Accuracy_voting_balance.sort_values(by='Accuracy', ascending=False)

In [None]:
accuracy_Boosted_balance_nn = []
accuracy_Boosted_balance_knn = []
accuracy_Boosted_balance_svm = []
accuracy_Boosted_balance_rf = []
accuracy_Boosted_balance_lr = []
accuracy_Boosted_balance_dt = []
for x in range(10):
    #---------Neural Network
    voting_nn_accuracy_mean,voting_nn_precision_mean,voting_nn_recall_mean,voting_nn_f1_mean,voting_nn_model_mean,voting_nn_yPrediction_mean=GetVoting_NeuralNetwork_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)
    accuracy_Boosted_balance_nn.append(voting_nn_accuracy_mean)
    #-------- KNN
    voting_knn_accuracy_mean,voting_knn_precision_mean,voting_knn_recall_mean,voting_knn_f1_mean,voting_knn_model_mean,voting_knn_yPrediction_mean=GetVoting_KNN_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)
    accuracy_Boosted_balance_knn.append(voting_knn_accuracy_mean)
    #-------- Svm
    voting_svm_accuracy_mean,voting_svm_precision_mean,voting_svm_recall_mean,voting_svm_f1_mean,voting_svm_model_mean,voting_svm_yPrediction_mean=GetVoting_SVM_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)
    accuracy_Boosted_balance_svm.append(voting_svm_accuracy_mean)
    #-------- Random Forest
    voting_rf_accuracy_mean,voting_rf_precision_mean,voting_rf_recall_mean,voting_rf_f1_mean,voting_rf_model_mean,voting_rf_yPrediction_mean=GetVoting_RandomForest_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)
    accuracy_Boosted_balance_rf.append(voting_rf_accuracy_mean)
    #-------- Logistic Regression
    voting_lr_accuracy_mean,voting_lr_precision_mean,voting_lr_recall_mean,voting_lr_f1_mean,voting_lr_model_mean,voting_lr_yPrediction_mean=GetVoting_LogisticRegression_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)
    accuracy_Boosted_balance_lr.append(voting_lr_accuracy_mean)
    #-------- Decision Tree
    voting_dt_accuracy_mean,voting_dt_precision_mean,voting_dt_recall_mean,voting_dt_f1_mean,voting_dt_model_mean,voting_dt_yPrediction_mean=GetVoting_DecisionTree_Boosting(x_train_balance,y_train_balance,x_test_balance,y_test_balance)
    accuracy_Boosted_balance_dt.append(voting_dt_accuracy_mean)
    
mean_accuracy_Boosted_nn=mean(accuracy_Boosted_balance_nn)  
mean_accuracy_Boosted_nn=mean(accuracy_Boosted_balance_nn)    
mean_accuracy_Boosted_knn=mean(accuracy_Boosted_balance_knn)
mean_accuracy_Boosted_svm=mean(accuracy_Boosted_balance_svm)
mean_accuracy_Boosted_rf=mean(accuracy_Boosted_balance_rf)
mean_accuracy_Boosted_lr=mean(accuracy_Boosted_balance_lr)
mean_accuracy_Boosted_dt=mean(accuracy_Boosted_balance_dt)

In [None]:
fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,6))
models_voting_Accuracy = pd.DataFrame({
    'Ensamble Learning Average':['Neural network-XGBoost-AdaBoost','KNN-XGBoost-AdaBoost', 'SVM-XGBoost-AdaBoost', 'Random forest-XGBoost-AdaBoost','Logistic regression-XGBoost-AdaBoost','Decision tree-XGBoost-AdaBoost'],
    'Accuracy' :[mean_accuracy_Boosted_nn,mean_accuracy_Boosted_knn , mean_accuracy_Boosted_svm , mean_accuracy_Boosted_rf,mean_accuracy_Boosted_lr,mean_accuracy_Boosted_dt]
})
models_voting_Accuracy
sns.barplot(x='Accuracy', y='Ensamble Learning Average', data=models_voting_Accuracy)
ax = fig.add_subplot(111)
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')
models_voting_Accuracy.sort_values(by='Accuracy', ascending=False)

In [None]:
#---------------Predict with Best Clasifiers------

In [None]:
#-----------------------Predicted By Voting with 'NeuralNetwork' 'LogisticRegression' 'XGBoost' -----------------
def GetVoting_NeuralNetwork_LogisticRegression_XGBoost(x_train,y_train,x_test,y_test):
  
    neuralNetworkClassifier = KerasClassifier(build_fn=GetNeuralNetworkModel)
    neuralNetworkClassifier._estimator_type = "classifier"
    neuralNetworkClassifier_pipe = Pipeline([('scaler', scaler), ('cnn', neuralNetworkClassifier)])
    clf1 =neuralNetworkClassifier
    clf2 = LogisticRegression()
    clf3 = XGBClassifier(n_estimators=800, n_jobs=-1)
    eclf1 = VotingClassifier(estimators=[('nn', clf1), ('lr', clf2), ('xgb', clf3)], voting='soft')
    eclf1.fit(x_train, y_train)
    predictions = eclf1.predict(x_test)
    accuracy_voting = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    print("NeuralNetwork , LogisticRegression , XGBoost  Voting Classifier accuracy is :",accuracy_voting)
    print("NeuralNetwork , LogisticRegression , XGBoost  Voting Classifier precision is :",precision)
    print("NeuralNetwork , LogisticRegression , XGBoost  Voting Classifier recall is :",recall)
    print("NeuralNetwork , LogisticRegression , XGBoost  Voting Classifier f1 is :",f1)
    print("NeuralNetwork,LogisticRegression,XGBoost Voting Classification Reports is:\n",classification_report(y_test, predictions))
    return round(accuracy_voting*100, 3),round(precision*100, 3),round(recall*100, 3),round(f1*100, 3),eclf1 ,predictions

In [None]:
voting_nn_lr_ab_accuracy,voting_nn_lr_ab_precision,voting_nn_lr_ab_recall,voting_nn_lr_ab_f1,voting_nn_lr_ab_model,voting_nn_lr_ab_yPrediction =GetVoting_NeuralNetwork_LogisticRegression_XGBoost(x_train_balance,y_train_balance,x_test_balance,y_test_balance)

In [None]:
accuracy_Boosted_balance_nn_lr_xgb = []
for x in range(10):
    voting_nn_lr_ab_accuracy_mean,voting_nn_lr_ab_precision_mean,voting_nn_lr_ab_recall_mean,voting_nn_lr_ab_f1_mean,voting_nn_lr_ab_model_mean,voting_nn_lr_ab_yPrediction_mean =GetVoting_NeuralNetwork_LogisticRegression_XGBoost(x_train_balance,y_train_balance,x_test_balance,y_test_balance)
    accuracy_Boosted_balance_nn_lr_xgb.append(voting_nn_lr_ab_accuracy_mean)
    
mean_accuracy_Boosted_nn_lr_xgb=mean(accuracy_Boosted_balance_nn_lr_xgb)  

In [None]:
#-----------------Elected Classifier Accuracy Average

In [None]:
mean_accuracy_Boosted_nn_lr_xgb

In [None]:
labels=['Neural network ','KNN', 'SVM', 'Random forest','Logistic regression','Decision tree']
values=[mean_accuracy_Boosted_nn,mean_accuracy_Boosted_knn , mean_accuracy_Boosted_svm , mean_accuracy_Boosted_rf,mean_accuracy_Boosted_lr,mean_accuracy_Boosted_dt]

fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.update_layout(title_text="Average accuracy Score Distribution",font_size=22)
fig.show()

In [None]:
#------------------ROC--------------------------

In [None]:
roc_auc_nn,roc_fpr_nn,roc_tpr_nn=GetNNROC(nn_model,x_test,y_test,nn_yPrediction,'Neural network')

In [None]:
roc_auc_knn,roc_fpr_knn,roc_tpr_knn=GetROC(knn_model,x_test,y_test,'KNN')

In [None]:
roc_auc_svm,roc_fpr_svm,roc_tpr_svm=GetROC(svm_model,x_test,y_test,'SVM')

In [None]:
roc_auc_rf,roc_fpr_rf,roc_tpr_rf=GetROC(rf_model,x_test,y_test,'Random forest')

In [None]:
roc_auc_lr,roc_fpr_lr,roc_tpr_lr=GetROC(lr_model,x_test,y_test,'Logistic regression')

In [None]:
roc_auc_dt,roc_fpr_dt,roc_tpr_dt=GetROC(dt_model,x_test,y_test,'Decision tree')

In [None]:
#-------------------ROC After Balanced Data Set

In [None]:
roc_auc_nn_balance,roc_fpr_nn_balance,roc_tpr_nn_balance=GetNNROC(nn_balance_model,x_test_balance,y_test_balance,nn_balance_yPrediction,'Neural network')

In [None]:
roc_auc_knn_balance,roc_fpr_knn_balance,roc_tpr_knn_balance=GetROC(knn_balance_model,x_test_balance,y_test_balance,"KNN")

In [None]:
roc_auc_svm_balance,roc_fpr_svm_balance,roc_tpr_svm_balance=GetROC(svm_balance_model,x_test_balance,y_test_balance,"SVM")

In [None]:
roc_auc_rf_balance,roc_fpr_rf_balance,roc_tpr_rf_balance=GetROC(rf_balance_model,x_test_balance,y_test_balance,"Random forest")

In [None]:
roc_auc_lr_balance,roc_fpr_lr_balance,roc_tpr_lr_balance=GetROC(lr_balance_model,x_test_balance,y_test_balance,"Logistic regression")

In [None]:
roc_auc_dt_balance,roc_fpr_dt_balance,roc_tpr_dt_balance=GetROC(dt_balance_model,x_test_balance,y_test_balance,"Decision tree")

In [None]:
#-------------- Compare Before and after balancing

In [None]:
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(9,7))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    plt.plot([0, 1], [0, 1], 'k--' )
    #plt.plot(fpr_rf, tpr_rf,'b', label=title,marker='.',color='r')
    plt.plot(roc_fpr_nn,roc_tpr_nn, label= "Imbalanced")
    plt.plot(roc_fpr_nn_balance, roc_tpr_nn_balance, label= "Balanced")
    plt.legend()
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title("Neural network",fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();

In [None]:
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(9,7))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    plt.plot([0, 1], [0, 1], 'k--' )
    #plt.plot(fpr_rf, tpr_rf,'b', label=title,marker='.',color='r')
    plt.plot(roc_fpr_knn,roc_tpr_knn, label= "Imbalanced")
    plt.plot(roc_fpr_knn_balance, roc_tpr_knn_balance, label= "Balanced")
    plt.legend()
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title("KNN",fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();

In [None]:
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(9,7))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    plt.plot([0, 1], [0, 1], 'k--' )
    #plt.plot(fpr_rf, tpr_rf,'b', label=title,marker='.',color='r')
    plt.plot(roc_fpr_svm,roc_tpr_svm, label= "Imbalanced")
    plt.plot(roc_fpr_svm_balance, roc_tpr_svm_balance, label= "Balanced")
    plt.legend()
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title("SVM",fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();

In [None]:
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(9,7))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    plt.plot([0, 1], [0, 1], 'k--' )
    #plt.plot(fpr_rf, tpr_rf,'b', label=title,marker='.',color='r')
    plt.plot(roc_fpr_rf,roc_tpr_rf, label= "Imbalanced")
    plt.plot(roc_fpr_rf_balance, roc_tpr_rf_balance, label= "Balanced")
    plt.legend()
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title("Random Forest",fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();

In [None]:
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(9,7))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    plt.plot([0, 1], [0, 1], 'k--' )
    #plt.plot(fpr_rf, tpr_rf,'b', label=title,marker='.',color='r')
    plt.plot(roc_fpr_lr,roc_tpr_lr, label= "Imbalanced")
    plt.plot(roc_fpr_lr_balance, roc_tpr_lr_balance, label= "Balanced")
    plt.legend()
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title("Logistic regression",fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();

In [None]:
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(9,7))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    plt.plot([0, 1], [0, 1], 'k--' )
    #plt.plot(fpr_rf, tpr_rf,'b', label=title,marker='.',color='r')
    plt.plot(roc_fpr_dt,roc_tpr_dt, label= "Imbalanced")
    plt.plot(roc_fpr_dt_balance, roc_tpr_dt_balance, label= "Balanced")
    plt.legend()
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title("Decision tree",fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [round(roc_auc_nn*100, 2),round(roc_auc_nn_balance*100, 2)],
  name = 'Neural network',
  text =[round(roc_auc_nn*100, 2),round(roc_auc_nn_balance*100, 2)]
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [round(roc_auc_knn*100, 2),round(roc_auc_knn_balance*100, 2)],
    name = 'KNN',
    text =[round(roc_auc_knn*100, 2),round(roc_auc_knn_balance*100, 2)] 
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [round(roc_auc_svm*100, 2),round(roc_auc_svm_balance*100, 2)],
    name = 'SVM',
    text =[round(roc_auc_svm*100, 2),round(roc_auc_svm_balance*100, 2)]  
))
fig.add_trace(go.Bar(
   x = ['Imbalanced dataset', 'Balanced dataset'],
   y = [round(roc_auc_rf*100, 2),round(roc_auc_rf_balance*100, 2)],
   name = 'Random Forest',
   text =[round(roc_auc_rf*100, 2),round(roc_auc_rf_balance*100, 2)]  
)),
fig.add_trace(go.Bar(
   x = ['Imbalanced dataset', 'Balanced dataset'],
   y = [round(roc_auc_lr*100, 2),round(roc_auc_lr_balance*100, 2)],
   name = 'Logistic regression',
   text =[round(roc_auc_lr*100, 2),round(roc_auc_lr_balance*100, 2)]  
)),
fig.add_trace(go.Bar(
   x = ['Imbalanced dataset', 'Balanced dataset'],
   y = [round(roc_auc_dt*100, 2),round(roc_auc_dt_balance*100, 2)],
   name = 'Decision tree',
   text =[round(roc_auc_dt*100, 2),round(roc_auc_dt_balance*100, 2)]  
))
fig.update_layout(title_text="Compare AUC before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
#--------------Voting ROC----------

In [None]:
roc_auc_nn_balance_voting,roc_fpr_nn_balance_voting,roc_tpr_nn_balance_voting=GetROC(voting_nn_model,x_test_balance,y_test_balance,"EML with neural network-XGBoost-AdaBoost")

In [None]:
roc_auc_knn_balance_voting,roc_fpr_knn_balance_voting,roc_tpr_knn_balance_voting=GetROC(voting_knn_model,x_test_balance,y_test_balance,"EML with KNN-XGBoost-AdaBoost")

In [None]:
roc_auc_svm_balance_voting,roc_fpr_svm_balance_voting,roc_tpr_svm_balance_voting=GetROC(voting_svm_model,x_test_balance,y_test_balance,"EML with SVM-XGBoost-AdaBoost")

In [None]:
roc_auc_rf_balance_voting,roc_fpr_rf_balance_voting,roc_tpr_rf_balance_voting=GetROC(voting_rf_model,x_test_balance,y_test_balance,"EML with Random forest-XGBoost-AdaBoost")

In [None]:
roc_auc_lr_balance_voting,roc_fpr_lr_balance_voting,roc_tpr_lr_balance_voting=GetROC(voting_lr_model,x_test_balance,y_test_balance,"EML with Logistic regression-XGBoost-AdaBoost")

In [None]:
roc_auc_dt_balance_voting,roc_fpr_dt_balance_voting,roc_tpr_dt_balance_voting=GetROC(voting_dt_model,x_test_balance,y_test_balance,"EML with Decision tree-XGBoost-AdaBoost")

In [None]:
roc_auc_dt_balance_voting,roc_fpr_dt_balance_voting,roc_tpr_dt_balance_voting=GetROC(voting_nn_lr_ab_model,x_test_balance,y_test_balance,"EML with Neural network-Logistic regression-XGBoost")

In [None]:
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,10))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    plt.plot([0, 1], [0, 1], 'k--' )
    plt.plot(roc_fpr_nn, roc_tpr_nn, label= "Neural network")
    plt.plot(roc_fpr_knn, roc_tpr_knn, label= "KNN")
    plt.plot(roc_fpr_svm, roc_tpr_svm, label= "SVM")
    plt.plot(roc_fpr_rf, roc_tpr_rf, label= "Random forest")
    plt.plot(roc_fpr_lr, roc_tpr_lr, label= "Logistic regression")
    plt.plot(roc_fpr_dt, roc_tpr_dt, label= "Decision tree ")
    plt.legend()
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title("Comparison of Classifiers with Imbalanced dataset",fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();

In [None]:
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,10))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    plt.plot([0, 1], [0, 1], 'k--' )
    plt.plot(roc_fpr_nn_balance,roc_tpr_nn_balance, label= "Neural network")
    plt.plot(roc_fpr_knn_balance, roc_tpr_knn_balance, label= "KNN")
    plt.plot(roc_fpr_svm_balance, roc_tpr_svm_balance, label= "SVM")
    plt.plot(roc_fpr_rf_balance, roc_tpr_rf_balance, label= "Random forest")
    plt.plot(roc_fpr_lr_balance, roc_tpr_lr_balance, label= "Logistic regression")
    plt.plot(roc_fpr_dt_balance, roc_tpr_dt_balance, label= "Decision tree ")
    plt.legend()
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title("Comparison of Classifiers with balanced dataset",fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();

In [None]:
    fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,10))
    label_font = {'size':'28'}  # Adjust to fit
    title_font = {'size':'24'}  # Adjust to fit
    plt.plot([0, 1], [0, 1], 'k--' )
    plt.plot(roc_fpr_nn_balance_voting,roc_tpr_nn_balance_voting, label= "Neural network")
    plt.plot(roc_fpr_knn_balance_voting, roc_tpr_knn_balance_voting, label= "KNN")
    plt.plot(roc_fpr_svm_balance_voting, roc_tpr_svm_balance_voting, label= "SVM")
    plt.plot(roc_fpr_rf_balance_voting, roc_tpr_rf_balance_voting, label= "Random forest")
    plt.plot(roc_fpr_lr_balance_voting, roc_tpr_lr_balance_voting, label= "Logistic regression")
    plt.plot(roc_fpr_dt_balance_voting, roc_tpr_dt_balance_voting, label= "Decision tree ")
    plt.legend()
    plt.xlabel('False Positive Rate', fontdict=label_font)
    plt.ylabel('True Positive Rate', fontdict=label_font)
    plt.title("Comparison ensemble learning with balanced dataset",fontdict=title_font)
    plt.rcParams['axes.facecolor'] = 'none'
    plt.savefig('ROC',dpi=300)
    ax = fig.add_subplot(111)
    ax.spines['bottom'].set_color('black')
    ax.spines['top'].set_color('black')
    ax.spines['right'].set_color('black')
    ax.spines['left'].set_color('black')
    plt.show();

In [None]:
roc_auc_dt_balance_voting,roc_fpr_dt_balance_voting,roc_tpr_dt_balance_voting=GetROC(voting_nn_lr_ab_model,x_test_balance,y_test_balance,"EML with Neural network-Logistic regression-XGBoost")

In [None]:
fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,6))
models_Imbalanced_AUC = pd.DataFrame({
    'Classifiers':['Neural network','KNN', 'SVM', 'Random forest','Logistic regression','Decision tree'],
    'AUC' :[roc_auc_nn,roc_auc_knn,roc_auc_svm,roc_auc_rf,roc_auc_lr,roc_auc_dt]
})
models_Imbalanced_AUC
sns.barplot(x='AUC', y='Classifiers',data=models_Imbalanced_AUC)
ax = fig.add_subplot(111)
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')
models_Imbalanced_AUC.sort_values(by='AUC', ascending=False)

In [None]:
fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,6))
models_balanced_AUC = pd.DataFrame({
    'Classifiers':['Neural network ','KNN', 'SVM', 'Random forest','Logistic regression','Decision tree'],
    'AUC' :[roc_auc_nn_balance,roc_auc_knn_balance,roc_auc_svm_balance,roc_auc_rf_balance,roc_auc_lr_balance,roc_auc_dt_balance]
})
models_balanced_AUC
sns.barplot(x='AUC', y='Classifiers',data=models_balanced_AUC)
ax = fig.add_subplot(111)
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')
models_balanced_AUC.sort_values(by='AUC', ascending=False)

In [None]:
fig=plt.figure(facecolor=(1, 1, 1),figsize=(10,6))
models_balanced_voting_AUC = pd.DataFrame({
    'Ensemble Learning':['Neural network-XGBoost-AdaBoost ','KNN-XGBoost-AdaBoost', 'SVM-XGBoost-AdaBoost', 'Random forest-XGBoost-AdaBoost','Logistic regression-XGBoost-AdaBoost','Decision tree-XGBoost-AdaBoost'],
    'AUC' :[roc_auc_nn_balance_voting,roc_auc_knn_balance_voting,roc_auc_svm_balance_voting,roc_auc_rf_balance_voting,roc_auc_lr_balance_voting,roc_auc_dt_balance_voting]
})
models_balanced_voting_AUC
sns.barplot(x='AUC', y='Ensemble Learning',data=models_balanced_voting_AUC)
ax = fig.add_subplot(111)
ax.spines['bottom'].set_color('black')
ax.spines['top'].set_color('black')
ax.spines['right'].set_color('black')
ax.spines['left'].set_color('black')
models_balanced_voting_AUC.sort_values(by='AUC', ascending=False)

In [None]:
#-----------Confusion Matrix----------------------

In [None]:
GetConfusionMatrix(y_test, nn_yPredictionActual,'Neural network')

In [None]:
GetConfusionMatrix(y_test, knn_yPrediction,'KNN')

In [None]:
GetConfusionMatrix(y_test, svm_yPrediction,'SVM')

In [None]:
GetConfusionMatrix(y_test, rf_yPrediction,'Random forest')

In [None]:
GetConfusionMatrix(y_test, lr_yPrediction,'Logistic regression')

In [None]:
GetConfusionMatrix(y_test, dt_yPrediction,'Decision tree')

In [None]:
#-----------balanced dataset Confusion Matrix

In [None]:
GetConfusionMatrix(y_test_balance, nn_balance_yPredictionActual,'Neural network')

In [None]:
GetConfusionMatrix(y_test_balance, knn_balance_yPrediction,'KNN')

In [None]:
GetConfusionMatrix(y_test_balance, svm_balance_yPrediction,'SVM')

In [None]:
GetConfusionMatrix(y_test_balance, rf_balance_yPrediction,'Random forest')

In [None]:
GetConfusionMatrix(y_test_balance, lr_balance_yPrediction,'Logistic regression')

In [None]:
GetConfusionMatrix(y_test_balance, dt_balance_yPrediction,'Decision tree')

In [None]:
#----------------Voting Confusion Matrix

In [None]:
GetConfusionMatrix(y_test_balance, voting_nn_yPrediction,'EML with neural network-XGBoost-AdaBoost')

In [None]:
GetConfusionMatrix(y_test_balance, voting_knn_yPrediction,'EML with KNN-XGBoost-AdaBoost')

In [None]:
GetConfusionMatrix(y_test_balance, voting_svm_yPrediction,'EML with SVM-XGBoost-Adaboost')

In [None]:
GetConfusionMatrix(y_test_balance, voting_rf_yPrediction,'EML with random forest-XGBoost-AdaBoost')

In [None]:
GetConfusionMatrix(y_test_balance, voting_lr_yPrediction,'EML with logistic regression-XGBoost-AdaBoost')

In [None]:
GetConfusionMatrix(y_test_balance, voting_dt_yPrediction,'EML with decision tree-XGBoost-AdaBoost')

In [None]:
#---------NN-LR-XGB ------------

In [None]:
 GetConfusionMatrix(y_test_balance, voting_nn_lr_ab_yPrediction,'EML with neural network-logistic regression-XGBoost')

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Balanced dataset :No', 'Balanced dataset:Yes'],
  y = [nn_accuracy, nn_balance_accuracy],
  name = 'Neural network'
))
fig.add_trace(go.Bar(
    x = ['Balanced dataset :No', 'Balanced dataset:Yes'],
    y = [knn_accuracy, knn_balance_accuracy],
  name = 'KNN'
))
fig.add_trace(go.Bar(
    x = ['Balanced dataset :No', 'Balanced dataset:Yes'],
    y = [svm_accuracy, svm_balance_accuracy],
  name = 'SVM',
))
fig.add_trace(go.Bar(
    x = ['Balanced dataset :No', 'Balanced dataset:Yes'],
    y = [rf_accuracy, rf_balance_accuracy],
  name = 'Random forest',
))
fig.add_trace(go.Bar(
    x = ['Balanced dataset :No', 'Balanced dataset:Yes'],
    y = [lr_accuracy, lr_balance_accuracy],
  name = 'Logistic regression',
))
fig.add_trace(go.Bar(
    x = ['Balanced dataset :No', 'Balanced dataset:Yes'],
    y = [dt_accuracy, dt_balance_accuracy],
  name = 'Decision tree',
))
fig.update_layout(title_text="<b>Comparing the accuracy of classifiers before and after data set balancing </b>",font_size=16,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [nn_accuracy, nn_balance_accuracy],
  name = 'Accuracy',
  text =['{0:.2f}'.format(nn_accuracy),'{0:.2f}'.format(nn_balance_accuracy)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [nn_recall, nn_balance_recall],
    name = 'Recall',
    text =['{0:.2f}'.format(nn_recall),'{0:.2f}'.format(nn_balance_recall)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [nn_precision, nn_balance_precision],
    name = 'Precision',
    text =['{0:.2f}'.format(nn_precision),'{0:.2f}'.format(nn_balance_precision)]  
))
fig.add_trace(go.Bar(
   x = ['Imbalanced dataset', 'Balanced dataset'],
   y = [nn_f1, nn_balance_f1],
   name = 'F1',
   text =['{0:.2f}'.format(nn_f1),'{0:.2f}'.format(nn_balance_f1)]  
))
fig.update_layout(title_text="Compare Neural network before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [knn_accuracy, knn_balance_accuracy],
  name = 'Accuracy',
  text =['{0:.2f}'.format(knn_accuracy),'{0:.2f}'.format(knn_balance_accuracy)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [knn_recall, knn_balance_recall],
    name = 'Recall',
    text =['{0:.2f}'.format(knn_recall),'{0:.2f}'.format(knn_balance_recall)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [knn_precision, knn_balance_precision],
    name = 'Precision',
    text =['{0:.2f}'.format(knn_precision),'{0:.2f}'.format(knn_balance_precision)]  
))
fig.add_trace(go.Bar(
   x = ['Imbalanced dataset', 'Balanced dataset'],
   y = [knn_f1, knn_balance_f1],
   name = 'F1',
   text =['{0:.2f}'.format(knn_f1),'{0:.2f}'.format(knn_balance_f1)]  
))
fig.update_layout(title_text="Compare KNN before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [svm_accuracy, svm_balance_accuracy],
  name = 'Accuracy',
  text =['{0:.2f}'.format(svm_accuracy),'{0:.2f}'.format(svm_balance_accuracy)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [svm_recall, svm_balance_recall],
    name = 'Recall',
    text =['{0:.2f}'.format(svm_recall),'{0:.2f}'.format(svm_balance_recall)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [svm_precision, svm_balance_precision],
    name = 'Precision',
    text =['{0:.2f}'.format(svm_precision),'{0:.2f}'.format(svm_balance_precision)]  
))
fig.add_trace(go.Bar(
   x = ['Imbalanced dataset', 'Balanced dataset'],
   y = [svm_f1, svm_balance_f1],
   name = 'F1',
   text =['{0:.2f}'.format(svm_f1),'{0:.2f}'.format(svm_balance_f1)]  
))
fig.update_layout(title_text="Compare SVM before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [rf_accuracy, rf_balance_accuracy],
  name = 'Accuracy',
  text =['{0:.2f}'.format(rf_accuracy),'{0:.2f}'.format(rf_balance_accuracy)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [rf_recall, rf_balance_recall],
    name = 'Recall',
    text =['{0:.2f}'.format(rf_recall),'{0:.2f}'.format(rf_balance_recall)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [rf_precision, rf_balance_precision],
    name = 'Precision',
    text =['{0:.2f}'.format(rf_precision),'{0:.2f}'.format(rf_balance_precision)]  
))
fig.add_trace(go.Bar(
   x = ['Imbalanced dataset', 'Balanced dataset'],
   y = [rf_f1, rf_balance_f1],
   name = 'F1',
   text =['{0:.2f}'.format(rf_f1),'{0:.2f}'.format(rf_balance_f1)]  
))
fig.update_layout(title_text="Compare Random forest before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [lr_accuracy, lr_balance_accuracy],
  name = 'Accuracy',
  text =['{0:.2f}'.format(lr_accuracy),'{0:.2f}'.format(lr_balance_accuracy)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [lr_recall, lr_balance_recall],
    name = 'Recall',
    text =['{0:.2f}'.format(lr_recall),'{0:.2f}'.format(lr_balance_recall)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [lr_precision, lr_balance_precision],
    name = 'Precision',
    text =['{0:.2f}'.format(lr_precision),'{0:.2f}'.format(lr_balance_precision)]  
))
fig.add_trace(go.Bar(
   x = ['Imbalanced dataset', 'Balanced dataset'],
   y = [lr_f1, lr_balance_f1],
   name = 'F1',
   text =['{0:.2f}'.format(lr_f1),'{0:.2f}'.format(lr_balance_f1)]  
))
fig.update_layout(title_text="Compare Logistic regression before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [dt_accuracy, dt_balance_accuracy],
  name = 'Accuracy',
  text =['{0:.2f}'.format(dt_accuracy),'{0:.2f}'.format(dt_balance_accuracy)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [dt_recall, dt_balance_recall],
    name = 'Recall',
    text =['{0:.2f}'.format(dt_recall),'{0:.2f}'.format(dt_balance_recall)]  
))
fig.add_trace(go.Bar(
    x = ['Imbalanced dataset', 'Balanced dataset'],
    y = [dt_precision, dt_balance_precision],
    name = 'Precision',
    text =['{0:.2f}'.format(dt_precision),'{0:.2f}'.format(dt_balance_precision)]  
))
fig.add_trace(go.Bar(
   x = ['Imbalanced dataset', 'Balanced dataset'],
   y = [dt_f1, dt_balance_f1],
   name = 'F1',
   text =['{0:.2f}'.format(dt_f1),'{0:.2f}'.format(dt_balance_f1)]  
))
fig.update_layout(title_text="Compare Decision tree before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [roc_auc_nn*100, roc_auc_nn_balance*100],
  name = 'AUC',
  text =['{0:.2f}'.format(roc_auc_nn*100),'{0:.2f}'.format(roc_auc_nn_balance*100)]  
))
fig.update_layout(title_text="Compare Neural network AUC before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [roc_auc_knn*100, roc_auc_knn_balance*100],
  name = 'AUC',
  text =['{0:.2f}'.format(roc_auc_knn*100),'{0:.2f}'.format(roc_auc_knn_balance*100)]  
))
fig.update_layout(title_text="Compare KNN AUC before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [roc_auc_svm*100, roc_auc_svm_balance*100],
  name = 'AUC',
  text =['{0:.2f}'.format(roc_auc_svm*100),'{0:.2f}'.format(roc_auc_svm_balance*100)]  
))
fig.update_layout(title_text="Compare SVM AUC before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [roc_auc_rf*100, roc_auc_rf_balance*100],
  name = 'AUC',
  text =['{0:.2f}'.format(roc_auc_rf*100),'{0:.2f}'.format(roc_auc_rf_balance*100)]  
))
fig.update_layout(title_text="Compare Random forest AUC before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [roc_auc_lr*100, roc_auc_lr_balance*100],
  name = 'AUC',
  text =['{0:.2f}'.format(roc_auc_lr*100),'{0:.2f}'.format(roc_auc_lr_balance*100)]  
))
fig.update_layout(title_text="Compare Logistic regression AUC before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
  x = ['Imbalanced dataset', 'Balanced dataset'],
  y = [roc_auc_dt*100, roc_auc_dt_balance*100],
  name = 'AUC',
  text =['{0:.2f}'.format(roc_auc_dt*100),'{0:.2f}'.format(roc_auc_dt_balance*100)]  
))
fig.update_layout(title_text="Compare Decision tree AUC before and after data set balancing",font_size=18,plot_bgcolor='rgba(0,0,0,0)')
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.show()