In [2]:
# ensure python version is more than 3.5
import sys
assert sys.version_info >= (3,5)

# ensure scikit version is >=0.20
import sklearn
assert sklearn.__version__ >= "0.20"

import numpy as np
import os

np.random.seed(42)

##Data generation

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

# generate some data for classification
X, y = make_moons(n_samples=500, noise=0.3, random_state=42)
# split the data for test and train
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

##Hard voting

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier

# initialize three classifiers
lr_clf = LogisticRegression(solver='lbfgs',random_state=42)
rf_clf = RandomForestClassifier(n_estimators=100,random_state=42)
svm_clf = SVC(gamma='scale',random_state=42)

# democratic voting classifier
voting_clf = VotingClassifier(estimators=[('lr',lr_clf),('rf',rf_clf),('svm',svm_clf)],voting='hard')

In [5]:
voting_clf.fit(X_train, y_train)

In [6]:
from sklearn.metrics import accuracy_score

for clf in (lr_clf, rf_clf, svm_clf, voting_clf):
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_test)
  print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.912


##Soft voting

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier

# initialize three classifiers
lr_clf = LogisticRegression(solver='lbfgs',random_state=42)
rf_clf = RandomForestClassifier(n_estimators=100,random_state=42)
svm_clf = SVC(gamma='scale',probability=True, random_state=42)

# democratic voting classifier
voting_clf = VotingClassifier(estimators=[('lr',lr_clf),('rf',rf_clf),('svm',svm_clf)],voting='soft')

In [8]:
from sklearn.metrics import accuracy_score

for clf in (lr_clf, rf_clf, svm_clf, voting_clf):
  clf.fit(X_train, y_train)
  y_pred = clf.predict(X_test)
  print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.92


##Bagging ensembles

In [9]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500,
                            max_samples=100, random_state=42)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)
print('accuracy_score : {}'.format(accuracy_score(y_pred, y_test)))

accuracy_score : 0.904


In [10]:
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)
y_pred = dt_clf.predict(X_test)
print('accuracy_score: {}'.format(accuracy_score(y_pred, y_test)))

accuracy_score: 0.856


## Random Forests

In [12]:
# create a manual random forest
rf_clf = BaggingClassifier(DecisionTreeClassifier(max_features='sqrt'),
                           n_estimators=500, random_state=42)
rf_clf.fit(X_train, y_train)
y_pred = rf_clf.predict(X_test)
print('accuracy_score: {}'.format(accuracy_score(y_pred, y_test)))

accuracy_score: 0.896


## AdaBoost