---

<h1 style="text-align: center;font-size: 30px; color: #013b86;">Simple Churn prediction with Deep-Learning VS XGBoost</h1>

---

<center><img style="width: 700px;" src="https://i.ibb.co/jWvLTtM/vs-image-2.jpg"></center>

---
<i>Source: Base image from Google, after edit image upload in imgbb</i>

# Introduction

This notebook is just about a simple comparison of artificial neural networks vs xgboost. For this comparison I use the churn-modelling data set.

Here we are not doing detailed EDA. Which is not the goal for here. All that is needed to complete this process is to do a simple EDA.

Deep learning is best for long information through. But we want to see. What do they do in such a situation.

Hope u like this simple comparison.

In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
dataset = pd.read_csv('/kaggle/input/churn-modelling/Churn_Modelling.csv')
dataset.head()

In [None]:
plt.figure(figsize=(20,20))
churn_corr = dataset.corr()
churn_corr_top = churn_corr.index
sns.heatmap(dataset[churn_corr_top].corr(), annot=True)

In [None]:
X = dataset.iloc[:, 3:13]
y = dataset.iloc[:, 13]

In [None]:
X.head()

In [None]:
geography = pd.get_dummies(dataset['Geography'], drop_first=True)
gender = pd.get_dummies(dataset['Gender'], drop_first=True)
gender.head()

In [None]:
X = pd.concat([X, geography, gender], axis=1)

In [None]:
X = X.drop(['Geography', 'Gender'], axis=1)

In [None]:
X

### Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

# Artificial Neural Network

---

<center><img style="width: 700px;" src="https://www.mdpi.com/genes/genes-10-00553/article_deploy/html/images/genes-10-00553-g001.png"></center>

---


### Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
SC = StandardScaler()
X_train = SC.fit_transform(X_train)
X_test = SC.transform(X_test)

### Importing the Keras libraries and packages

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU, PReLU, ELU
from keras.layers import Dropout

### Initialising the ANN

In [None]:
classifier = Sequential()

### Building Deep layer 

In [None]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(6, kernel_initializer = 'he_uniform',activation='relu',input_dim = 11))
classifier.add(Dropout(0.1))

In [None]:
# Adding the second hidden layer
classifier.add(Dense(6, kernel_initializer = 'he_uniform',activation='relu'))
classifier.add(Dropout(0.1))

In [None]:
# Adding the output layer
classifier.add(Dense(1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

In [None]:
# Compiling the ANN
classifier.compile(optimizer='Adamax', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model_history = classifier.fit(X_train, y_train, validation_split=0.33, batch_size = 10, epochs = 100)


### list all data in history

In [None]:
print(model_history.history.keys())

In [None]:
# summarize history for accuracy
plt.figure(figsize=(10, 7))
plt.plot(model_history.history['accuracy'])
plt.plot(model_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.figure(figsize=(10, 7))
plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

## Part 3 - Making the predictions and evaluating the model

In [None]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
y_pred

In [None]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [None]:
cm

In [None]:
# Calculate the Accuracy
from sklearn.metrics import accuracy_score
score = accuracy_score(y_test, y_pred)

In [None]:
score

## Hyper parameter Tuning for ANN

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
from keras.layers import Dense, Activation, Embedding, Flatten, LeakyReLU, BatchNormalization, Dropout
from keras.activations import relu, sigmoid

In [None]:
def create_model(layers, activation):
    model = Sequential()
    for i, nodes in enumerate(layers):
        if i == 0:
            model.add(Dense(nodes, kernel_initializer = 'he_uniform',activation=activation,input_dim = X_train.shape[1]))
            model.add(Dropout(0.1))
        else:
            model.add(Dense(nodes, kernel_initializer = 'he_uniform',activation=activation))
            model.add(Dropout(0.1))

    # Adding the output layer
    model.add(Dense(1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))
                           
    # Compiling the ANN
    model.compile(optimizer='Adamax', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = KerasClassifier(build_fn=create_model, verbose=0)
layers = [(6, 3, 3), (10, 10), (45, 30, 15)]
activations = ['sigmoid', 'relu']
param_grid = dict(layers=layers, activation=activations, batch_size = [128, 256], epochs=[30])
grid = GridSearchCV(estimator=model, param_grid=param_grid,cv=5)
grid_result = grid.fit(X_train, y_train)

In [None]:
[grid_result.best_score_,grid_result.best_params_]

# XGBoost

---

<center><img style="width: 700px;" src="https://d1rwhvwstyk9gu.cloudfront.net/2020/02/XG-Boost-FINAL-01.png"></center>

---

In [None]:
X.head()

In [None]:
y.head()

## Hyper Parameter Optimization for xgboost

In [None]:
params = {
    "max_depth": [3, 4, 5, 6, 8, 10, 12, 15],
    "learning_rate": [0.5, 0.10, 0.15, 0.20, 0.25, 0.30],
    "min_child_weight": [1, 3, 5, 7],
    "gamma": [0.0, 0.1, 0.2, 0.3, 0.4],
    "colsample_bytree": [0.3, 0.4, 0.5, 0.7]
}

In [None]:
## Hyperparameter optimization using RandomizedSearchCV
from sklearn.model_selection import RandomizedSearchCV
import xgboost

In [None]:
xgb_init = xgboost.XGBClassifier()

In [None]:
random_cv = RandomizedSearchCV(xgb_init, param_distributions=params, n_iter=5, scoring="roc_auc", n_jobs=1, cv=5, verbose=3)

In [None]:
random_cv.fit(X, y)

In [None]:
random_cv.best_estimator_

In [None]:
random_cv.best_params_

In [None]:
xgb_classifier = xgboost.XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.4, gamma=0.2, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.3, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, monotone_constraints='()',
              n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
predict = cross_val_score(xgb_classifier, X, y, cv=10)
predict

In [None]:
predict = predict.mean()
predict

---

<h1 style="text-align: center;font-size: 30px; color: #e34b4b;">Deep-Learning VS XGBoost</h1>

---

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import HTML

In [None]:
th_props = [
  ('font-size', '28px'),
  ('text-align', 'center'),
  ('font-weight', 'bold'),
  ('color', '#021755'),
  ('background-color', '#fbe4e5')
  ]

# Set CSS properties for td elements in dataframe
td_props = [
  ('font-size', '25px'),
  ('background-color', '#f7f2ed')
  ]

# Set table styles
styles = [
  dict(selector="th", props=th_props),
  dict(selector="td", props=td_props)
  ]

In [None]:
d = {'ML Technique': ["Deep-Learning", "XGBoost"], 'Score': [score, predict]}
df = pd.DataFrame(data=d)
cm = sns.light_palette("red", as_cmap=True)

(df.style
  .set_caption('Deep-Learning VS XGBoost.')
  .format({'total_amt_usd_pct_diff': "{:.2%}"})
  .set_table_styles(styles))

note: Deep-Learning predicted value always changeable. sometimes it cross xgboost predicted value but maximum times are not. So please don't judge those as a actuala value. Try it yourself hope so u get some better experience.

### Acknowledgement

* https://www.youtube.com/watch?v=9HomdnM12o4
* https://www.youtube.com/watch?v=HdlDYng8g9s&t=100s
* https://www.youtube.com/watch?v=OTTOglLJxLU&list=PLZoTAELRMXVPGU70ZGsckrMdr0FteeRUi&index=17
* https://mode.com/example-gallery/python_dataframe_styling/