<a href="https://colab.research.google.com/github/sharminislamshroddha/machine_learning/blob/main/ensemble_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Import Libraries**

In [None]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, VotingClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.svm import SVC
import xgboost as xgb
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

## **Loading and Spliting the Dataset**

In [None]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
np.unique(y)

array([0, 1])

## **Voting Classifiers**

In [None]:
voting_clf = VotingClassifier(
    estimators=[
        ('LogisticRegression', LogisticRegression(random_state = 42)),
        ('RandomForestClassifier', RandomForestClassifier(random_state=42)),
        ('SVC', SVC(random_state=42))
    ]
)

voting_clf.fit(X_train, y_train)

In [None]:
print('Individual score: ')

for name, classifier in voting_clf.named_estimators_.items():

  print('{0} = {1}'.format(name, classifier.score(X_test, y_test)))

Individual score: 
LogisticRegression = 0.864
RandomForestClassifier = 0.896
SVC = 0.896


In [None]:
# view the prediction for 1 data point

[clf.predict(X_test[:1]) for clf in voting_clf.estimators_]

[array([1]), array([1]), array([0])]

In [None]:
print('Voting classification score {0}'.format(voting_clf.score(X_test, y_test)))

Voting classification score 0.912


## **Bagging and Pasting**

#### **Bagging**

In [None]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), 
                            n_estimators=500,
                            max_samples=100,
                            n_jobs=-1,
                            random_state=42
                            )

bag_clf.fit(X_train, y_train)

In [None]:
y_pred = bag_clf.predict(X_test)

print('Accuracy score: {0}'.format(accuracy_score(y_test, y_pred)))

Accuracy score: 0.904


#### **Pasting**

In [None]:
pas_clf = BaggingClassifier(DecisionTreeClassifier(), 
                            n_estimators=500,
                            max_samples=100,
                            bootstrap = False, # indicate that pasting is used
                            n_jobs=-1,
                            random_state=42
                            )

pas_clf.fit(X_train, y_train)

In [None]:
y_pred = pas_clf.predict(X_test)

print('Accuracy score: {0}'.format(accuracy_score(y_test, y_pred)))

Accuracy score: 0.92


#### **A Random Forests is equicalent to a bag of dicision trees:**

In [None]:
# Create RandomForestClassifier Model

rf_clf = RandomForestClassifier(n_estimators=500, 
                                max_leaf_nodes=16,
                                n_jobs=-1, 
                                random_state=42
                               )

rf_clf.fit(X_train, y_train)

y_pred_rf = rf_clf.predict(X_test)

In [None]:
# Create BaggingClassifier Model

bag_clf = BaggingClassifier(
                            DecisionTreeClassifier(max_features="sqrt", 
                                                   max_leaf_nodes=16
                                                  ),
                            n_estimators=500, 
                            n_jobs=-1, 
                            random_state=42)

bag_clf.fit(X_train, y_train)

y_pred_bag = bag_clf.predict(X_test)

In [None]:
print('Are prediction of both classifire same? {0}'.format(np.all(y_pred_bag == y_pred_rf)))

Are prediction of both classifire same? True


## **Boosting**

#### **Adaptive Boosting (AdaBoost)**

In [None]:
ada_clf = AdaBoostClassifier(
                  DecisionTreeClassifier(max_depth=1),
                  n_estimators=30,
                  learning_rate=0.5,
                  random_state=42
)

ada_clf.fit(X_train, y_train)

In [None]:
y_pred = ada_clf.predict(X_test)

print('Accuracy score: {0}'.format(accuracy_score(y_test, y_pred)))

Accuracy score: 0.904


#### **Gradient Boosting**

In [None]:
gbc = GradientBoostingClassifier(
                max_depth=2,
                n_estimators=500,
                learning_rate=0.05,
                random_state=42
)

gbc.fit(X_train, y_train)

In [None]:
y_pred = gbc.predict(X_test)

print('Accuracy score: {0}'.format(accuracy_score(y_test, y_pred)))

Accuracy score: 0.896


#### **Stacking**

In [None]:
stacking_clf = StackingClassifier(
    estimators=[
        ('LogisticRegression', LogisticRegression(random_state=42)),
        ('RandomForestClassifier', RandomForestClassifier(random_state=42)),
        ('SVC', SVC(probability=True, random_state=42))
    ],

    final_estimator = RandomForestClassifier(random_state=42),
    cv = 5 # number of cross-validation folds
)

stacking_clf.fit(X_train, y_train)

In [None]:
print('Accuracy Score: {0}'.format(stacking_clf.score(X_test, y_test)))

Accuracy Score: 0.912


#### **XGBoost**

In [None]:
print('Unique classes in the dataset: {0}\n'.format(np.unique(y)))

Unique classes in the dataset: [0 1]



In [None]:
xgb_clf = xgb.XGBClassifier(objective = 'binary:logistic', # 'multi:softmax' for multiclass calssification
                            n_estimators=100, 
                            random_state = 42
                           )

xgb_clf.fit(X_train, y_train)

In [None]:
y_pred = xgb_clf.predict(X_test)

print('Accuracy Score: {0}'.format(accuracy_score(y_test, y_pred)))

Accuracy Score: 0.872
