# Stacking Classifier

Documentations -

1.   [Sklearn Documentation](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.StackingClassifier.html)
2.   [Mlxtend](http://rasbt.github.io/mlxtend/user_guide/classifier/StackingClassifier/)




# 1. Importing Libraries and Dataset


In [None]:
!pip install category_encoders

In [None]:
# Importing Libraries

## For Data Operations and Visualizations
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from category_encoders import TargetEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, roc_auc_score

## For Classifiers
from sklearn.ensemble import RandomForestClassifier as rfc
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Getting cwd
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Importing Dataset
df = pd.read_csv('/kaggle/input/churn-modelling/Churn_Modelling.csv')

# 2. Feature Engineering and Selection 

In [None]:
df.columns

In [None]:
# Dropping off redundant columns
df.drop(['RowNumber', 'CustomerId', 'Surname'], inplace = True, axis = 1)  

In [None]:
df.info()

In [None]:
# Check for Imbalance
df.groupby('Exited')['Geography'].count()

# 3. Data Preprocessing  

Encoding Categorical Variables

In [None]:
l = LabelEncoder()
df['Gender'] = l.fit_transform(df['Gender'])

In [None]:
encoder = TargetEncoder()
df['country'] = encoder.fit_transform(df['Geography'], df['Exited'])

In [None]:
df.drop(['Geography'], inplace = True, axis = 1)

In [None]:
df

In [None]:
# Spliting into dependent and independent vectors
x = df.drop(['Exited'], axis = 1)
y = df.Exited

In [None]:
# Standard Scaling
S = StandardScaler()
x = S.fit_transform(x)

# 4. Splitting the dataset into training set and test set  

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y ,test_size = 0.25, 
                                                    random_state = 0)

# 5. Random Forest Classifier

In [None]:
# fitting my model
classifier = rfc(n_estimators = 100, random_state = 0, criterion = 'entropy')
classifier.fit(x_train, y_train)

In [None]:
# predicting the test set results
y_pred = classifier.predict(x_test)

In [None]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

# 6. Logistic Regression Classifier

In [None]:
# fitting my model
classifier = LogisticRegression(random_state = 0)
classifier.fit(x_train, y_train)

In [None]:
# predicting the test set results
y_pred = classifier.predict(x_test)

In [None]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

# 7. Naive Bayes Classifier 

In [None]:
# fitting my model
classifier = GaussianNB()
classifier.fit(x_train, y_train)

In [None]:
# predicting the test set results
y_pred = classifier.predict(x_test)

In [None]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

# 8. Multi-Layer Perceptron Classifier 

In [None]:
# fitting my model
classifier = MLPClassifier(activation = "relu", alpha = 0.05, random_state = 0)
classifier.fit(x_train, y_train)

In [None]:
# predicting the test set results
y_pred = classifier.predict(x_test)

In [None]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

# 9. Stacking Classifier

In [None]:
# Importing Necessary Libraries
from sklearn.ensemble import StackingClassifier

Note : You can setup any classifier as stacking classifiers or meta-classifiers according to your choice or performance.

In [None]:
# Initialising the Stacking Algorithms
estimators = [
        ('naive-bayes', GaussianNB()),
        ('random-forest', rfc(n_estimators = 100, random_state = 0)),
        ('mlp', MLPClassifier(activation = "relu", alpha = 0.05, random_state = 0))
        ]

In [None]:
# Setting up the Meta-Classifier
clf = StackingClassifier(
        estimators = estimators, 
        final_estimator = LogisticRegression(random_state = 0)
        )

In [None]:
# fitting my model
clf.fit(x_train, y_train)

In [None]:
# getting info about the hyperparameters 
clf.get_params()

In [None]:
# predicting the test set results
y_pred = clf.predict(x_test)

In [None]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

The below was for Simple Random Forest Classifier,
```
 precision    recall  f1-score   support

           0       0.87      0.96      0.91      1991
           1       0.72      0.45      0.56       509

    accuracy                           0.85      2500
   macro avg       0.80      0.70      0.73      2500
weighted avg       0.84      0.85      0.84      2500
```
So, by comparison on a naive basis, we can say that we can obtain better results by merging two or more algorithms together forming ensemble based learning and obtain better results without hyperparameter tuning as compared to 1-Base Algo Ensembled Based Learning.


# 10. Tuning the Meta-Classifier

In [None]:
# Defining Parameter Grid
params = {'final_estimator__C': [1.0,1.1,1.5],
          'final_estimator__max_iter': [50,100,150,200],
          'final_estimator__n_jobs': [1,-1,5],
          'final_estimator__penalty': ['l1','l2'],
          'final_estimator__random_state': [0],
          }

In [None]:
# Initialize GridSearchCV
grid = GridSearchCV(estimator = clf, 
                    param_grid = params, 
                    cv = 5,
                    scoring = "roc_auc",
                    verbose = 10,
                    n_jobs = -1)

In [None]:
# Fit GridSearchCV
grid.fit(x_train, y_train)

In [None]:
# predicting the test set results
y_pred = grid.predict(x_test)

In [None]:
# Checking Accuracy
print(classification_report(y_test, y_pred))

On a Naive Note, Regardless of the improvement/demotion we got after hyperparameter tuning, the basic idea was to demonstrate how it's done.

# End