In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier


In [2]:
class Bagging:
    def __init__(self, n_estimators=10):
        self.n_estimators = n_estimators
        self.estimators = []

    def fit(self, X, y):
        for _ in range(self.n_estimators):
            # Create a base learner
            base_estimator = DecisionTreeClassifier()
            # Randomly sample the training data with replacement
            indices = np.random.choice(len(X), len(X), replace=True)
            X_subset, y_subset = X[indices], y[indices]
            # Train the base learner on the sampled data
            base_estimator.fit(X_subset, y_subset)
            # Add the trained base learner to the list of estimators
            self.estimators.append(base_estimator)

    def predict(self, X):
        # Make predictions for each base learner and aggregate the results
        predictions = np.array([estimator.predict(X) for estimator in self.estimators])
        # Take the majority vote as the final prediction
        return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)


In [3]:
class BaggingWithUndersampling:
    def __init__(self, n_estimators=10, max_samples=1.0, max_features=1.0, random_state=None):
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.max_features = max_features
        self.random_state = random_state
        self.estimators = []

    def fit(self, X, y):
        for _ in range(self.n_estimators):
            # Create a base learner
            base_estimator = DecisionTreeClassifier(max_features=self.max_features, random_state=self.random_state)
            # Randomly sample the training data with replacement and undersampling
            minority_class_indices = np.where(y == 1)[0]
            majority_class_indices = np.where(y == 0)[0]
            majority_class_indices_sampled = np.random.choice(majority_class_indices, int(len(minority_class_indices) * self.max_samples), replace=False)
            indices = np.concatenate((minority_class_indices, majority_class_indices_sampled))
            X_subset, y_subset = X[indices], y[indices]
            # Train the base learner on the sampled data
            base_estimator.fit(X_subset, y_subset)
            # Add the trained base learner to the list of estimators
            self.estimators.append(base_estimator)

    def predict(self, X):
        # Make predictions for each base learner and aggregate the results
        predictions = np.array([estimator.predict(X) for estimator in self.estimators])
        # Take the majority vote as the final prediction
        return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)


In [4]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier


class AdaBoostWithUndersampling:
    def __init__(self, n_estimators=10, max_depth=1):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.estimators = []
        self.estimator_weights = []

    def fit(self, X, y):
        n_samples = X.shape[0]
        sample_weights = np.full(n_samples, (1 / n_samples))
        
        for _ in range(self.n_estimators):
            # Create a base learner
            base_estimator = DecisionTreeClassifier(max_depth=self.max_depth)
            # Randomly sample the training data based on the sample weights
            minority_class_indices = np.where(y == 1)[0]
            majority_class_indices = np.where(y == 0)[0]
            majority_class_indices_sampled = np.random.choice(majority_class_indices, len(minority_class_indices), replace=False)
            indices = np.concatenate((minority_class_indices, majority_class_indices_sampled))
            X_subset, y_subset, sample_weights_subset = X[indices], y[indices], sample_weights[indices]
            # Train the base learner on the sampled data
            base_estimator.fit(X_subset, y_subset, sample_weight=sample_weights_subset)
            # Make predictions on the training data
            predictions = base_estimator.predict(X)
            # Calculate the weighted error
            weighted_error = np.sum(sample_weights * (predictions != y))
            # Calculate the estimator weight
            estimator_weight = 0.5 * np.log((1 - weighted_error) / weighted_error)
            # Update the sample weights
            sample_weights *= np.exp(estimator_weight * (predictions != y))
            # Normalize the sample weights
            sample_weights /= np.sum(sample_weights)
            # Add the trained base learner and its weight to the list of estimators and estimator weights
            self.estimators.append(base_estimator)
            self.estimator_weights.append(estimator_weight)

    def predict(self, X):
        predictions = np.array([estimator.predict(X) for estimator in self.estimators])
        # Weighted majority vote
        weighted_votes = np.sum(predictions.T * self.estimator_weights, axis=1)
        # Convert weighted votes to binary predictions
        binary_predictions = np.where(weighted_votes >= 0.5 * np.sum(self.estimator_weights), 1, 0)
        return binary_predictions
