# Ensembling

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

from sklearn.tree import DecisionTreeClassifier


In [3]:
df = pd.read_csv("bank.csv")

In [4]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,0,1,1,0,2343,1,0,2,5,8,1042,1,-1,0,3,1
1,56,0,1,1,0,45,0,0,2,5,8,1467,1,-1,0,3,1
2,41,9,1,1,0,1270,1,0,2,5,8,1389,1,-1,0,3,1
3,55,7,1,1,0,2476,1,0,2,5,8,579,1,-1,0,3,1
4,54,0,1,2,0,184,0,0,2,5,8,673,2,-1,0,3,1


In [5]:
x = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=1)

In [7]:
def create_model(model):
    model.fit(x_train,y_train)
    y_pred = model.predict(x_test)
    print(classification_report(y_test,y_pred))
    return model

## Naive Aggregation

In [8]:
lr = LogisticRegression()
dt1 = DecisionTreeClassifier()
dt2 = DecisionTreeClassifier(criterion = "entropy")

In [9]:
model_list = [("lr",lr),("dt1",dt1),("dt2",dt2)]

In [10]:
from sklearn.ensemble import VotingClassifier

In [11]:
vc = VotingClassifier(estimators = model_list)

### 1) Hard Voting

In [12]:
create_model(vc)

              precision    recall  f1-score   support

           0       0.83      0.84      0.83      1760
           1       0.82      0.81      0.82      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349



VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='warn', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('dt1',
                              DecisionTreeClassifier(class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None...
                             ('dt2',
   

### 2) Soft Voting

In [13]:
vc2 = VotingClassifier(estimators = model_list,voting = "soft")

In [14]:
create_model(vc2)

              precision    recall  f1-score   support

           0       0.83      0.83      0.83      1760
           1       0.81      0.81      0.81      1589

    accuracy                           0.82      3349
   macro avg       0.82      0.82      0.82      3349
weighted avg       0.82      0.82      0.82      3349



VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='warn', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('dt1',
                              DecisionTreeClassifier(class_weight=None,
                                                     criterion='gini',
                                                     max_depth=None...
                             ('dt2',
   

## Bootstrap Aggregation

In [15]:
from sklearn.ensemble import BaggingClassifier

### 1) Bagging

In [16]:
bc = BaggingClassifier(LogisticRegression(),n_estimators = 10,max_samples=10,random_state = 1)

In [17]:
create_model(bc)

              precision    recall  f1-score   support

           0       0.76      0.76      0.76      1760
           1       0.73      0.73      0.73      1589

    accuracy                           0.75      3349
   macro avg       0.74      0.74      0.74      3349
weighted avg       0.75      0.75      0.75      3349



BaggingClassifier(base_estimator=LogisticRegression(C=1.0, class_weight=None,
                                                    dual=False,
                                                    fit_intercept=True,
                                                    intercept_scaling=1,
                                                    l1_ratio=None, max_iter=100,
                                                    multi_class='warn',
                                                    n_jobs=None, penalty='l2',
                                                    random_state=None,
                                                    solver='warn', tol=0.0001,
                                                    verbose=0,
                                                    warm_start=False),
                  bootstrap=True, bootstrap_features=False, max_features=1.0,
                  max_samples=10, n_estimators=10, n_jobs=None, oob_score=False,
                  random_state=1, verb

### 2) Pasting

In [18]:
bc2 =  BaggingClassifier(LogisticRegression(),n_estimators = 10,max_samples=10,random_state = 1,bootstrap = False)

In [19]:
create_model(bc2)

              precision    recall  f1-score   support

           0       0.76      0.76      0.76      1760
           1       0.73      0.73      0.73      1589

    accuracy                           0.75      3349
   macro avg       0.74      0.74      0.74      3349
weighted avg       0.75      0.75      0.75      3349



BaggingClassifier(base_estimator=LogisticRegression(C=1.0, class_weight=None,
                                                    dual=False,
                                                    fit_intercept=True,
                                                    intercept_scaling=1,
                                                    l1_ratio=None, max_iter=100,
                                                    multi_class='warn',
                                                    n_jobs=None, penalty='l2',
                                                    random_state=None,
                                                    solver='warn', tol=0.0001,
                                                    verbose=0,
                                                    warm_start=False),
                  bootstrap=False, bootstrap_features=False, max_features=1.0,
                  max_samples=10, n_estimators=10, n_jobs=None, oob_score=False,
                  random_state=1, ver

In [20]:
#bc3 = BaggingClassifier(DecisionTreeClassifier(),n_estimators = 10,max_samples=10,random_state = 1)

## Random Forest

In [21]:
from sklearn.ensemble import RandomForestClassifier

In [22]:
rf = RandomForestClassifier(n_estimators = 10,max_features=10,random_state = 1)  #each tree train on 10 features

In [23]:
create_model(rf)

              precision    recall  f1-score   support

           0       0.84      0.83      0.84      1760
           1       0.82      0.83      0.82      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features=10, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

## Stacking

In [24]:
from mlxtend.classifier import StackingClassifier

In [25]:
lr = LogisticRegression()
dt1 = DecisionTreeClassifier()
dt2 = DecisionTreeClassifier(criterion = "entropy")

In [26]:
model_list = [lr,dt1,dt2]

In [28]:
meta = LogisticRegression()

In [29]:
sc = StackingClassifier(classifiers = model_list,meta_classifier = meta )

In [30]:
create_model(sc)

              precision    recall  f1-score   support

           0       0.87      0.73      0.80      1760
           1       0.75      0.88      0.81      1589

    accuracy                           0.80      3349
   macro avg       0.81      0.81      0.80      3349
weighted avg       0.81      0.80      0.80      3349



StackingClassifier(average_probas=False,
                   classifiers=[LogisticRegression(C=1.0, class_weight=None,
                                                   dual=False,
                                                   fit_intercept=True,
                                                   intercept_scaling=1,
                                                   l1_ratio=None, max_iter=100,
                                                   multi_class='warn',
                                                   n_jobs=None, penalty='l2',
                                                   random_state=None,
                                                   solver='warn', tol=0.0001,
                                                   verbose=0,
                                                   warm_start=False),
                                DecisionTreeClassifier(class_weight=None,
                                                       criterion='gini',
                    