In [41]:
# Here, we will use the digits dataset from scikit-learn datasets library
# Lets load the library
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

digits = load_digits()
data = digits.data
labels = digits.target

xtrain, xtest, ytrain, ytest = train_test_split(data, labels, 
                                               test_size = 0.3,
                                               random_state = 42)

In [42]:
## Here, we will use three classifiers Random Forest, SVC KNeighbors and then we will implement Voting classifier 

from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier 

knn_clf = KNeighborsClassifier()
svc_clf = SVC()
mlp_clf = MLPClassifier()
voting_clf = VotingClassifier(
            estimators = [("knn", knn_clf), ("mlp", mlp_clf),
                         ("svc", svc_clf)],
            voting = "hard")

In [45]:
# Let us look at the accuracies of each classifier
for clf in (knn_clf, svc_clf, mlp_clf, voting_clf):
    clf.fit(xtrain, ytrain)
    y_pred = clf.predict(xtest)
    print("{}:\t{:.2f}%".format(clf.__class__.__name__,accuracy_score(ytest, y_pred) * 100))
    

KNeighborsClassifier:	99.26%
SVC:	98.70%
MLPClassifier:	98.52%
VotingClassifier:	98.89%


In [54]:
## Bagging
## We are using the same digits dataset 

from sklearn.ensemble import BaggingClassifier

bag_clf = BaggingClassifier(RandomForestClassifier(), n_estimators = 1000,
                           max_samples = 64, bootstrap =True, n_jobs = -1)

bag_clf.fit(xtrain, ytrain)
y_pred = bag_clf.predict(xtest)
print("Accuracy score if we apply bootstrapping (Bagging):{}".format(accuracy_score(ytest, y_pred)))


Accuracy score if we apply bootstrapping (Bagging):0.9314814814814815


In [55]:
## Pasting
## We are using the same digits dataset 

from sklearn.ensemble import BaggingClassifier

bag_clf = BaggingClassifier(RandomForestClassifier(), n_estimators = 1000,
                           max_samples = 64, bootstrap =False, n_jobs = -1)

bag_clf.fit(xtrain, ytrain)
y_pred = bag_clf.predict(xtest)
print("Accuracy score if we use Pasting as an ensembling technique:{}".format(accuracy_score(ytest, y_pred)))

Accuracy score if we use Pasting as an ensembling technique:0.9314814814814815


In [56]:
## Ada Boost 

from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(RandomForestClassifier(), n_estimators = 500,learning_rate = 0.01)
ada_clf.fit(xtrain, ytrain)
y_pred = ada_clf.predict(xtest)
print("Accuracy score of Ada Boost Classifier:{}", accuracy_score(ytest, y_pred))

0.9740740740740741


In [35]:
## Gradient Boosting
from sklearn.ensemble import GradientBoostingClassifier

gb_clf = GradientBoostingClassifier(n_estimators = 500, learning_rate = 0.01)
gb_clf.fit(xtrain , ytrain)
y_pred = gb_clf.predict(xtest)

print("Accuracy score of Gradient Boosting: ", accuracy_score(ytest, y_pred))

Accuracy score of Gradient Boosting:  0.9611111111111111


In [37]:
## XGBoost 
from xgboost import XGBClassifier

xgb_clf = XGBClassifier(objective= "softmax")
xgb_clf.fit(xtrain, ytrain)
y_pred = xgb_clf.predict(xtest)
print("Accuracy score of XGBoost: ", accuracy_score(ytest, y_pred))

Accuracy score of XGBoost:  0.9648148148148148


In [58]:
# Stacking Classifier

from sklearn.ensemble import StackingClassifier

svc_clf = SVC()
rf_clf = RandomForestClassifier()
mlp_clf = MLPClassifier()

estimators = [("svc", svc_clf), ("rf_clf", rf_clf), ("mlp_clf", mlp_clf)]

stacking_clf = StackingClassifier(estimators = estimators, final_estimator = RandomForestClassifier())
stacking_clf.fit(xtrain, ytrain)
y_pred = stacking_clf.predict(xtest)
print("Accuracy score of Stacking Classifier: ", accuracy_score(ytest, y_pred))

Accuracy score of Stacking Classifier:  0.9888888888888889
