In [1]:
import numpy as np
from sklearn.metrics import accuracy_score

In [39]:
from sklearn.base import clone
from collections import Counter

class EnsembleBagging:
  def __init__(self, model, model_num=10, model_type = 'Classification'):
    self.model = model
    self.model_num = model_num
    self.model_type = model_type
    self.models = []

  # def model_type(self, model_type):
  #   if model_type != 'Classification' or self.model_type != 'Regression':
  #     raise ValueError('model_type must be "Classification" or "Regression"')

  @staticmethod
  def reSample(X, y):
      n_rows, n_cols = X.shape
      samples = np.random.choice(a=n_rows, size=n_rows, replace=True)
      return X[samples], y[samples]

  def fit(self, X, y):
    for i in range(self.model_num):
      # Resample for X (rows, features) using bootstraped technique
      X_sample, y_sample = self.reSample(X, y)
      classifier = clone(self.model)
      classifier.fit(X_sample, y_sample)
      self.models.append(classifier)

  def predict(self, X):
    y_preds = np.array([model.predict(X) for model in self.models]).T
    if self.model_type == 'classification':
      major_votes = []
      for y in y_preds:
        major_votes.append(np.bincount(y).argmax())
    else:
      return np.mean(y_preds, axis=1)
    return np.array(major_votes)

In [46]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

dataset = load_iris()
X = dataset.data
y = dataset.target
X[:5,:]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
classifier = EnsembleBagging(model=DecisionTreeClassifier(min_samples_split=2, max_depth=5), model_num=5, model_type='classification')
classifier.fit(X_train, y_train)

In [47]:
print(y_test)
preds = classifier.predict(X_test)
preds

[2 1 1 0 2 2 1 2 0 0 2 2 2 0 0 0 1 1 0 2 2 0 1 0 2 2 2 0 2 1]


array([2, 1, 1, 0, 2, 2, 1, 2, 0, 0, 2, 2, 2, 0, 0, 0, 1, 1, 0, 2, 2, 0,
       1, 0, 2, 2, 2, 0, 2, 1])

In [48]:
accuracy_score(y_test, preds)

1.0