This notebook included a small example of Bagging and Boosting with AdaBoost and their comparisons

### Import Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#################################
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
###################################
from sklearn.datasets import load_digits

### Load Data

In [2]:
data = load_digits()

In [3]:
# Define Data
x = data.data
y = data.target

In [4]:
print("X shape : {}".format(x.shape))
print("Y shape : {}".format(y.shape))

X shape : (1797, 64)
Y shape : (1797,)


### Preprocessing

In [5]:
# Scale Data
scale = StandardScaler()
# Fit Data
x = scale.fit_transform(x)

In [6]:
# Train Test Split
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.33)

### Model

In [7]:
rfc = RandomForestClassifier(max_depth=4)

In [8]:
rfc.fit(x_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=4, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [9]:
# predict data
y_pred = rfc.predict(x_test)

In [10]:
## Confusion Matrix And Classification Report
print("Confusion Matrix :\n")
print(confusion_matrix(y_test, y_pred))
print()
print("Classification Report :\n")
print(classification_report(y_test, y_pred))
print()
print("F1 SCORE  : {0:.2f}".format(f1_score(y_test, y_pred, average='micro')))

Confusion Matrix :

[[54  0  0  0  1  0  0  0  0  0]
 [ 0 44  4  2  0  1  0  0  0  4]
 [ 0  0 49  2  0  0  0  0  1  0]
 [ 0  0  0 53  0  0  0  0  3  0]
 [ 0  0  0  0 60  0  0  4  0  0]
 [ 1  0  0  0  1 61  1  0  0  9]
 [ 1  0  0  0  0  0 56  0  0  0]
 [ 0  0  0  0  2  0  0 59  0  1]
 [ 0  3  0  0  0  1  0  0 46  2]
 [ 0  0  0  2  0  0  0  5  0 61]]

Classification Report :

              precision    recall  f1-score   support

           0       0.96      0.98      0.97        55
           1       0.94      0.80      0.86        55
           2       0.92      0.94      0.93        52
           3       0.90      0.95      0.92        56
           4       0.94      0.94      0.94        64
           5       0.97      0.84      0.90        73
           6       0.98      0.98      0.98        57
           7       0.87      0.95      0.91        62
           8       0.92      0.88      0.90        52
           9       0.79      0.90      0.84        68

    accuracy               

### Bagging And Boosting

In [11]:
# Define Bagging
bg = BaggingClassifier(rfc, n_estimators=20)

In [12]:
bg.fit(x_train, y_train)

BaggingClassifier(base_estimator=RandomForestClassifier(bootstrap=True,
                                                        ccp_alpha=0.0,
                                                        class_weight=None,
                                                        criterion='gini',
                                                        max_depth=4,
                                                        max_features='auto',
                                                        max_leaf_nodes=None,
                                                        max_samples=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impurity_split=None,
                                                        min_samples_leaf=1,
                                                        min_samples_split=2,
                                                        min_weight_fraction_leaf=0.0,
        

In [13]:
# Predict Data
bg_pred = bg.predict(x_test)

In [14]:
## Confusion Matrix And Classification Report
print("Confusion Matrix :\n")
print(confusion_matrix(y_test, bg_pred))
print()
print("Classification Report :\n")
print()
print(classification_report(y_test, bg_pred))
print("F1 SCORE  : {0:.2f}".format(f1_score(y_test, bg_pred, average='micro')))



print("""

     Accuracy in  RandomForest Classifier => 0.83 
     Accuracy in  Bagging      Classifier => 0.93


""")


Confusion Matrix :

[[54  0  0  0  1  0  0  0  0  0]
 [ 0 47  5  0  0  1  0  0  0  2]
 [ 0  0 51  1  0  0  0  0  0  0]
 [ 0  0  0 52  0  1  0  0  3  0]
 [ 0  0  0  0 62  0  0  2  0  0]
 [ 0  0  0  0  2 64  1  0  0  6]
 [ 1  0  0  0  0  0 56  0  0  0]
 [ 0  0  0  0  0  0  0 61  0  1]
 [ 0  3  0  0  0  1  0  1 46  1]
 [ 0  0  0  1  1  1  0  4  0 61]]

Classification Report :


              precision    recall  f1-score   support

           0       0.98      0.98      0.98        55
           1       0.94      0.85      0.90        55
           2       0.91      0.98      0.94        52
           3       0.96      0.93      0.95        56
           4       0.94      0.97      0.95        64
           5       0.94      0.88      0.91        73
           6       0.98      0.98      0.98        57
           7       0.90      0.98      0.94        62
           8       0.94      0.88      0.91        52
           9       0.86      0.90      0.88        68

    accuracy              

### Boosting

In [15]:
# Define Boosting
bs = AdaBoostClassifier(bg, n_estimators=50)

In [16]:
# Fit Data
bs.fit(x_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R',
                   base_estimator=BaggingClassifier(base_estimator=RandomForestClassifier(bootstrap=True,
                                                                                          ccp_alpha=0.0,
                                                                                          class_weight=None,
                                                                                          criterion='gini',
                                                                                          max_depth=4,
                                                                                          max_features='auto',
                                                                                          max_leaf_nodes=None,
                                                                                          max_samples=None,
                                                                                          min_impur

In [17]:
# predict data
bs_pred = bs.predict(x_test)

In [18]:
## Confusion Matrix And Classification Report
print("Confusion Matrix :\n")
print(confusion_matrix(y_test, bs_pred))
print()
print("Classification Report :\n")
print(classification_report(y_test, bs_pred))
print("F1 SCORE  : {0:.2f}".format(f1_score(y_test, bs_pred, average='micro')))

print("""

     Accuracy in  RandomForest Classifier => 0.83 
     Accuracy in  Bagging Classifier      => 0.93
     Accuracy in  AdaBoostClassifier      => 0.97


""")

Confusion Matrix :

[[55  0  0  0  0  0  0  0  0  0]
 [ 0 54  0  0  0  0  0  0  0  1]
 [ 0  0 50  0  0  0  0  0  2  0]
 [ 0  0  0 51  0  1  0  0  4  0]
 [ 0  0  0  0 64  0  0  0  0  0]
 [ 0  0  0  0  1 69  1  0  0  2]
 [ 0  0  0  0  0  1 56  0  0  0]
 [ 0  0  0  0  0  0  0 61  0  1]
 [ 0  0  0  0  0  1  0  0 51  0]
 [ 0  0  0  0  0  1  0  0  3 64]]

Classification Report :

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        55
           1       1.00      0.98      0.99        55
           2       1.00      0.96      0.98        52
           3       1.00      0.91      0.95        56
           4       0.98      1.00      0.99        64
           5       0.95      0.95      0.95        73
           6       0.98      0.98      0.98        57
           7       1.00      0.98      0.99        62
           8       0.85      0.98      0.91        52
           9       0.94      0.94      0.94        68

    accuracy               