In [6]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from collections import Counter


class sample:
    def __init__(self, X: np.array, n_subspace: int):
        self.idx_subspace = self.random_subspace(X, n_subspace)

    def __call__(self, X: np.array, y: np.array):
        idx_obj = self.bootstrap_sample(X)
        X_sampled, y_sampled = self.get_subsample(
            X, y, self.idx_subspace, idx_obj
        )
        return X_sampled, y_sampled

    @staticmethod
    def bootstrap_sample(X: np.array):
        return np.unique(np.random.choice(X.shape[0], X.shape[0]))

    @staticmethod
    def random_subspace(X: np.array, n_subspace: int):
        return np.sort(np.random.choice(X.shape[1], n_subspace, replace=False))

    @staticmethod
    def get_subsample(
        X: np.array, y: np.array, idx_subspace: np.array, idx_obj: np.array
    ):
        return X[idx_obj][:, idx_subspace], y[idx_obj]
class random_forest:
    def __init__(
        self,
        n_estimators: int,
        max_depth: int,
        subspaces_dim: int,
        random_state: int,
    ):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.subspaces_dim = subspaces_dim
        self.random_state = random_state
        self.Classifier = []
        self.subspace_idx = []

    def fit(self, X: np.array, y: np.array):
        for _ in range(self.n_estimators):
            s = sample(X, self.subspaces_dim)
            X_sample, y_sample = s(X, y)
            self.Classifier.append(
                DecisionTreeClassifier(max_depth=self.max_depth).fit(
                    X_sample, y_sample
                )
            )
            self.subspace_idx.append(s.idx_subspace)

    def predict(self, X: np.array):
        tmp = np.empty((self.n_estimators, X.shape[0]), dtype=np.float64)

        for i in range(self.n_estimators):
            tmp[i, :] = self.Classifier[i].predict(X[:, self.subspace_idx[i]])

        out = tmp.T
        predicted = np.zeros(X.shape[0], dtype=int)

        for i, x in enumerate(out):
            counter = Counter(x)
            predicted[i] = counter.most_common(1)[0][0]

        return predicted
X, y = load_iris(return_X_y=True)
X_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, shuffle=True, random_state=42
)
accuracy = np.empty((20, 20, 4), dtype=np.float64)

for n_estimators in range(1, 21):
    for max_depth in range(1, 21):
        for subspace_dim in range(1, 5):
            clf = random_forest(n_estimators, max_depth, subspace_dim, 42)
            clf.fit(X_train, y_train)
            y_pred = clf.predict(x_test)
            accuracy[
                n_estimators - 1, max_depth - 1, subspace_dim - 1
            ] = accuracy_score(y_test, y_pred)

ind = np.unravel_index(np.argmax(accuracy, axis=None), accuracy.shape)
print(ind)
print(accuracy[ind[0], ind[1], ind[2]])

(0, 1, 0)
1.0
