# Stacking Classifier

Documentations -

1.   [Sklearn Documentation](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.StackingClassifier.html)
2.   [Mlxtend](http://rasbt.github.io/mlxtend/user_guide/classifier/StackingClassifier/)




# 1. Importing Libraries and Dataset


In [2]:
!pip install category_encoders

Collecting category_encoders
[?25l  Downloading https://files.pythonhosted.org/packages/44/57/fcef41c248701ee62e8325026b90c432adea35555cbc870aff9cfba23727/category_encoders-2.2.2-py2.py3-none-any.whl (80kB)
[K     |████████████████████████████████| 81kB 2.2MB/s 
Installing collected packages: category-encoders
Successfully installed category-encoders-2.2.2


In [None]:
# Importing Libraries

## For Data Operations and Visualizations
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from category_encoders import TargetEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, roc_auc_score

## For Classifiers
from sklearn.ensemble import RandomForestClassifier as rfc
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
from google.colab import files
uploaded = files.upload()

Saving Churn_Modelling.csv to Churn_Modelling.csv


In [6]:
# Importing Dataset
df = pd.read_csv('Churn_Modelling.csv')

# 2. Feature Engineering and Selection 

In [7]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [8]:
# Dropping off redundant columns
df.drop(['RowNumber', 'CustomerId', 'Surname'], inplace = True, axis = 1)  

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      10000 non-null  int64  
 1   Geography        10000 non-null  object 
 2   Gender           10000 non-null  object 
 3   Age              10000 non-null  int64  
 4   Tenure           10000 non-null  int64  
 5   Balance          10000 non-null  float64
 6   NumOfProducts    10000 non-null  int64  
 7   HasCrCard        10000 non-null  int64  
 8   IsActiveMember   10000 non-null  int64  
 9   EstimatedSalary  10000 non-null  float64
 10  Exited           10000 non-null  int64  
dtypes: float64(2), int64(7), object(2)
memory usage: 859.5+ KB


In [10]:
# Check for Imbalance
df.groupby('Exited')['Geography'].count()

Exited
0    7963
1    2037
Name: Geography, dtype: int64

# 3. Data Preprocessing  

Encoding Categorical Variables

In [11]:
l = LabelEncoder()
df['Gender'] = l.fit_transform(df['Gender'])

In [None]:
encoder = TargetEncoder()
df['country'] = encoder.fit_transform(df['Geography'], df['Exited'])

In [12]:
df.drop(['Geography'], inplace = True, axis = 1)

In [13]:
df

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,0,42,2,0.00,1,1,1,101348.88,1
1,608,0,41,1,83807.86,1,0,1,112542.58,0
2,502,0,42,8,159660.80,3,1,0,113931.57,1
3,699,0,39,1,0.00,2,0,0,93826.63,0
4,850,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...
9995,771,1,39,5,0.00,2,1,0,96270.64,0
9996,516,1,35,10,57369.61,1,1,1,101699.77,0
9997,709,0,36,7,0.00,1,0,1,42085.58,1
9998,772,1,42,3,75075.31,2,1,0,92888.52,1


In [29]:
# Spliting into dependent and independent vectors
x = df.drop(['Exited'], axis = 1)
y = df.Exited

In [30]:
# Standard Scaling
S = StandardScaler()
x = S.fit_transform(x)

# 4. Splitting the dataset into training set and test set  

In [31]:
x_train, x_test, y_train, y_test = train_test_split(x, y ,test_size = 0.25, 
                                                    random_state = 0)

# 5. Random Forest Classifier

In [32]:
# fitting my model
classifier = rfc(n_estimators = 100, random_state = 0, criterion = 'entropy')
classifier.fit(x_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [33]:
# predicting the test set results
y_pred = classifier.predict(x_test)

In [34]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.96      0.91      1991
           1       0.72      0.45      0.56       509

    accuracy                           0.85      2500
   macro avg       0.80      0.70      0.73      2500
weighted avg       0.84      0.85      0.84      2500



# 6. Logistic Regression Classifier

In [35]:
# fitting my model
classifier = LogisticRegression(random_state = 0)
classifier.fit(x_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [36]:
# predicting the test set results
y_pred = classifier.predict(x_test)

In [37]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.97      0.89      1991
           1       0.60      0.17      0.27       509

    accuracy                           0.81      2500
   macro avg       0.71      0.57      0.58      2500
weighted avg       0.77      0.81      0.76      2500



# 7. Naive Bayes Classifier 

In [38]:
# fitting my model
classifier = GaussianNB()
classifier.fit(x_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [39]:
# predicting the test set results
y_pred = classifier.predict(x_test)

In [40]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.83      0.97      0.90      1991
           1       0.70      0.24      0.36       509

    accuracy                           0.82      2500
   macro avg       0.77      0.61      0.63      2500
weighted avg       0.81      0.82      0.79      2500



# 8. Multi-Layer Perceptron Classifier 

In [61]:
# fitting my model
classifier = MLPClassifier(activation = "relu", alpha = 0.05, random_state = 0)
classifier.fit(x_train, y_train)

MLPClassifier(activation='relu', alpha=0.05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=0, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [62]:
# predicting the test set results
y_pred = classifier.predict(x_test)

In [63]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1991
           1       0.75      0.47      0.58       509

    accuracy                           0.86      2500
   macro avg       0.81      0.72      0.75      2500
weighted avg       0.85      0.86      0.85      2500



# 9. Stacking Classifier

In [41]:
# Importing Necessary Libraries
from sklearn.ensemble import StackingClassifier

Note : You can setup any classifier as stacking classifiers or meta-classifiers according to your choice or performance.

In [72]:
# Initialising the Stacking Algorithms
estimators = [
        ('naive-bayes', GaussianNB()),
        ('random-forest', rfc(n_estimators = 100, random_state = 0)),
        ('mlp', MLPClassifier(activation = "relu", alpha = 0.05, random_state = 0))
        ]

In [73]:
# Setting up the Meta-Classifier
clf = StackingClassifier(
        estimators = estimators, 
        final_estimator = LogisticRegression(random_state = 0)
        )

In [74]:
clf.fit(x_train, y_train)

StackingClassifier(cv=None,
                   estimators=[('naive-bayes',
                                GaussianNB(priors=None, var_smoothing=1e-09)),
                               ('random-forest',
                                RandomForestClassifier(bootstrap=True,
                                                       ccp_alpha=0.0,
                                                       class_weight=None,
                                                       criterion='gini',
                                                       max_depth=None,
                                                       max_features='auto',
                                                       max_leaf_nodes=None,
                                                       max_samples=None,
                                                       min_impurity_decrease=0.0,
                                                       min_impurity_split=None,
                                                   

In [75]:
clf.get_params()

{'cv': None,
 'estimators': [('naive-bayes', GaussianNB(priors=None, var_smoothing=1e-09)),
  ('random-forest',
   RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                          criterion='gini', max_depth=None, max_features='auto',
                          max_leaf_nodes=None, max_samples=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=100,
                          n_jobs=None, oob_score=False, random_state=0, verbose=0,
                          warm_start=False)),
  ('mlp',
   MLPClassifier(activation='relu', alpha=0.05, batch_size='auto', beta_1=0.9,
                 beta_2=0.999, early_stopping=False, epsilon=1e-08,
                 hidden_layer_sizes=(100,), learning_rate='constant',
                 learning_rate_init=0.001, max_fun=15000, max_iter=200,
                 mo

In [97]:
# predicting the test set results
y_pred = clf.predict(x_test)

In [77]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1991
           1       0.76      0.46      0.58       509

    accuracy                           0.86      2500
   macro avg       0.82      0.71      0.75      2500
weighted avg       0.85      0.86      0.85      2500



The below was for Simple Random Forest Classifier,
```
 precision    recall  f1-score   support

           0       0.87      0.96      0.91      1991
           1       0.72      0.45      0.56       509

    accuracy                           0.85      2500
   macro avg       0.80      0.70      0.73      2500
weighted avg       0.84      0.85      0.84      2500
```
So, by comparison on a naive basis, we can say that we can obtain better results by merging two or more algorithms together forming ensemble based learning and obtain better results without hyperparameter tuning as compared to 1-Base Algo Ensembled Based Learning.


# 10. Tuning the Meta-Classifier

In [86]:
# Defining Parameter Grid
params = {'final_estimator__C': [1.0,1.1,1.5],
          'final_estimator__max_iter': [50,100,150,200],
          'final_estimator__n_jobs': [1,-1,5],
          'final_estimator__penalty': ['l1','l2'],
          'final_estimator__random_state': [0],
          }

In [88]:
# Initialize GridSearchCV
grid = GridSearchCV(estimator = clf, 
                    param_grid = params, 
                    cv = 5,
                    scoring = "roc_auc",
                    verbose = 10,
                    n_jobs = -1)

In [90]:
# Fit GridSearchCV
grid.fit(x_train, y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   48.6s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  5.5min
[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed:  8.7min
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed: 11.0min
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 15.0min
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 18.2min
[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed: 22.9min
[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed: 27.0min
[Parallel(n_jobs=-1)]: Done  81 tasks      | elapsed: 32.5min
[Parallel(n_jobs=-1)]: Done  94 tasks      | elapsed: 37.3min
[Parallel(n_jobs=-1)]: Done 109 tasks      | elapsed: 43.5min
[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed: 49.2min
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed: 56

GridSearchCV(cv=5, error_score=nan,
             estimator=StackingClassifier(cv=None,
                                          estimators=[('naive-bayes',
                                                       GaussianNB(priors=None,
                                                                  var_smoothing=1e-09)),
                                                      ('random-forest',
                                                       RandomForestClassifier(bootstrap=True,
                                                                              ccp_alpha=0.0,
                                                                              class_weight=None,
                                                                              criterion='gini',
                                                                              max_depth=None,
                                                                              max_features='auto',
                              

In [91]:
# predicting the test set results
y_pred = grid.predict(x_test)

In [92]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1991
           1       0.75      0.46      0.57       509

    accuracy                           0.86      2500
   macro avg       0.81      0.71      0.75      2500
weighted avg       0.85      0.86      0.85      2500



On a Naive Note, Regardless of the improvement/demotion we got after hyperparameter tuning, the basic idea was to demonstrate how it's done.

# End