In [1]:
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score , hamming_loss
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.utils import resample
import numpy as np
import pandas as pd



In [2]:
class MyDecisionTreeClassifier:
    def __init__(self):
        self.model = DecisionTreeClassifier()

    def fit(self, X, y):
        self.model.fit(X, y)

    def predict(self, X):
        return self.model.predict(X)
    
class MyLogisticClassifier:
    def __init__(self, num_classes=9, learning_rate=0.005, num_epochs=5000):
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.weights = None
        self.bias = None

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def cross_entropy_loss(self, y_pred, y_true):
        m = y_pred.shape[0]
        return -np.sum(np.log(y_pred) * y_true) / m

    def fit(self, X_train, y_train):
        m, n = X_train.shape
        self.weights = np.zeros((n, self.num_classes))
        self.bias = np.zeros((1, self.num_classes))

        for epoch in range(self.num_epochs):
            z = np.dot(X_train, self.weights) + self.bias
            y_pred = self.softmax(z)

            y_train_encoded = np.eye(self.num_classes)[y_train]  # One-hot encoding
            loss = self.cross_entropy_loss(y_pred, y_train_encoded)

            dz = y_pred - y_train_encoded
            dw = np.dot(X_train.T, dz) / m
            db = np.sum(dz, axis=0, keepdims=True) / m

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        z = np.dot(X, self.weights) + self.bias
        y_pred = self.softmax(z)
        return np.argmax(y_pred, axis=1)

class MyMLPClassifier(object):
    def __init__(self, learning_rate=0.001,epochs = 5000, inputLayerSize: int = 12, hiddenLayerSize = [21] , outputLayerSize: int = 9):
        self.inputLayerSize = inputLayerSize
        self.outputLayerSize = outputLayerSize
        self.hiddenLayerSize = hiddenLayerSize
        self.activation = self.sigmoid
        self.activationPrime = self.sigmoidPrime
        self.learning_rate = learning_rate
        self.num_layers = len(hiddenLayerSize)+1
        self.weights = []
        self.weights.append(np.random.randn(self.inputLayerSize, self.hiddenLayerSize[0]))
        for i in range(1,self.num_layers-1):
            self.weights.append(np.random.randn(self.hiddenLayerSize[i-1], self.hiddenLayerSize[i]))
        self.weights.append(np.random.randn(self.hiddenLayerSize[-1], self.outputLayerSize))
        self.optimizer = 'SGD'
        self.max_iterations = epochs

    def forward(self, X):
        self.z = [None] * (len(self.weights))
        self.a = [None] * (len(self.weights)-1)
        
        self.z[0] = np.dot(X, self.weights[0])
        for i in range(0,len(self.a)):
            self.a[i] = self.activation(self.z[i])
            self.z[i+1] = np.dot(self.a[i],self.weights[i+1])
        yHat = self.softmax(self.z[-1])
        return yHat

    def relu(self, z):
        return np.maximum(0, z)  # ReLU activation

    def tanh(self, z):
        return np.tanh(z)  # Tanh activation

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoidPrime(self, z):
        return np.exp(-z) / ((1 + np.exp(-z))**2)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z))  # Subtracting max(z) for numerical stability
        return exp_z / exp_z.sum(axis=1, keepdims=True)

    def costFunction(self, X, y):
        self.yHat = self.forward(X)
        J = -np.sum(y * np.log(self.yHat)) / len(X)
        return J

    def costFunctionPrime(self, X, y):
        self.yHat = self.forward(X)
        deltas = [None] * (len(self.weights))
        deltas[-1] = self.yHat - y
        dJdWs = [None] * (len(self.weights))
        for i in range(len(deltas)-2,-1,-1):
            dJdWs[i+1] = np.dot(self.a[i].T, deltas[i+1])
            deltas[i] = np.dot(deltas[i+1],self.weights[i+1].T)*self.activationPrime(self.z[i])
        dJdWs[0] = np.dot(X.T, deltas[0])
        return dJdWs

    def reluPrime(self, z):
        return np.where(z > 0, 1, 0)  # Derivative of ReLU

    def tanhPrime(self, z):
        return 1 - np.tanh(z)**2  # Derivative of Tanh

    def backward(self, X, y, learning_rate):
        dJdWs = self.costFunctionPrime(X, y)
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dJdWs[i]
    def fit(self, X, y, batch_size = 32, learning_rate=0.1, max_iterations=10000,wand = 0):
        if(self.optimizer == 'SGD'):
            return self.train_sgd(X=X, y=y,wand=wand)
        elif(self.optimizer == 'Batch'):
            return self.train_batch(X=X, y=y,wand=wand)
        elif(self.optimizer == 'MiniBatch'):
            return self.train_mini_batch(X=X, y=y,wand=wand)
    def train_sgd(self, X, y, learning_rate=0.1, max_iterations=10000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on MLP Classifier for WineQT Dataset")
        for i in range(self.max_iterations):
            idx = np.random.randint(len(X))
            self.backward(X[idx:idx+1], y[idx:idx+1], self.learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                # print("Iteration %d: loss = %f" % (i, loss))
                if(wand == 1):
                    wandb.log({"Loss": loss, "Epoch": i})
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")
    def train_batch(self, X, y, learning_rate=0.0001, max_iterations=10000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on MLP Classifier for WineQT Dataset")
        for i in range(self.max_iterations):
            self.backward(X, y, self.learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                # print("Iteration %d: loss = %f" % (i, loss))
                if(wand == 1):
                    wandb.log({"Loss": loss, "Epoch": i})
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")
    def train_mini_batch(self, X, y, batch_size=32, learning_rate=0.01, max_iterations=5000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on MLP Classifier for WineQT Dataset")
        for i in range(self.max_iterations):
            indices = np.arange(len(X))
            np.random.shuffle(indices)
            for j in range(0, len(X), batch_size):
                batch_indices = indices[j:j+batch_size]
                self.backward(X[batch_indices], y[batch_indices], self.learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                # print("Iteration %d: loss = %f" % (i, loss))
                if(wand == 1):
                    wandb.log({"Loss": loss, "Epoch": i})
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")

    def predict(self, X):
        yHat = self.forward(X)
        # return yHat
        binary_predictions = np.zeros_like(yHat)
        binary_predictions[np.arange(len(yHat)), yHat.argmax(axis=1)] = 1
        return binary_predictions
        # return np.argmax(yHat, axis=1)
    def set_learning_rate(self, learning_rate):
        self.learning_rate = learning_rate

    def set_activation_function(self, activation_function):
        if(activation_function == 'sigmoid'):
            self.activation = self.sigmoid
            self.activationPrime = self.sigmoidPrime
        if(activation_function == 'relu'):
            self.activation = self.relu
            self.activationPrime = self.reluPrime
        if(activation_function == 'tanh'):
            self.activation = self.tanh
            self.activationPrime = self.tanhPrime

    def set_optimizer(self, optimizer):
        self.optimizer = optimizer

    def set_hidden_layers(self, hidden_layer_sizes):
        self.hiddenLayerSize = hidden_layer_sizes
        self.num_layers = len(self.hiddenLayerSize)+1
        self.weights = []
        self.weights.append(np.random.randn(self.inputLayerSize, self.hiddenLayerSize[0]))
        for i in range(1,self.num_layers-1):
            self.weights.append(np.random.randn(self.hiddenLayerSize[i-1], self.hiddenLayerSize[i]))
        self.weights.append(np.random.randn(self.hiddenLayerSize[-1], self.outputLayerSize))
        # self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
        # self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
        # self.weights, self.biases = self.initialize_weights()
    def set_epochs(self, epcohs):
        self.max_iterations = epochs
        print(self.max_iterations)


In [3]:
class MyDecisionTreeRegressor:
    def __init__(self):
        self.model = DecisionTreeRegressor(random_state=42)

    def fit(self, X, y):
        self.model.fit(X, y)

    def predict(self, X):
        return self.model.predict(X)
    
class MyLinearRegressor:
    def __init__(self):
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        num_samples, num_features = X.shape
        self.weights = np.zeros(num_features)
        self.bias = 0
        # print(X.T)
        # Closed-form solution for linear regression
        X_transpose = X.T
        X = np.array(X)
        y = np.array(y)
        # print(y.shape)
        # print(X.shape)
        # print(X_transpose)
        self.weights = np.dot(np.linalg.inv(np.dot(X_transpose, X)), np.dot(X_transpose, y))
        # print(self.weights)
    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        return linear_model
    
class MyMLPRegressor(object):
    def __init__(self, learning_rate=0.001,epochs = 5000, inputLayerSize: int = 13, hiddenLayerSize = [13,13,13,13,13,13,13,13,13] , outputLayerSize: int = 1):
        self.inputLayerSize = inputLayerSize
        self.outputLayerSize = outputLayerSize
        self.hiddenLayerSize = hiddenLayerSize
        self.activation = self.sigmoid
        self.activationPrime = self.sigmoidPrime
        self.learning_rate = learning_rate
        # Weights (parameters)
        self.num_layers = len(hiddenLayerSize)+1
        self.weights = []
        self.weights.append(np.random.randn(self.inputLayerSize, self.hiddenLayerSize[0]))
        for i in range(1,self.num_layers-1):
            self.weights.append(np.random.randn(self.hiddenLayerSize[i-1], self.hiddenLayerSize[i]))
        self.weights.append(np.random.randn(self.hiddenLayerSize[-1], self.outputLayerSize))
        self.optimizer = 'MiniBatch'
        self.max_iterations = epochs

    def forward(self, X):
        self.z = [None] * (len(self.weights))
        self.a = [None] * (len(self.weights) - 1)

        self.z[0] = np.dot(X, self.weights[0])
        for i in range(0, len(self.a)):
            self.a[i] = self.activation(self.z[i])
            self.z[i + 1] = np.dot(self.a[i], self.weights[i + 1])
        yHat = self.z[-1]  # Linear activation for regression
        return yHat

    def relu(self, z):
        return np.maximum(0, z)  # ReLU activation

    def tanh(self, z):
        return np.tanh(z)  # Tanh activation

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoidPrime(self, z):
        return np.exp(-z) / ((1 + np.exp(-z))**2)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z))  # Subtracting max(z) for numerical stability
        return exp_z / exp_z.sum(axis=1, keepdims=True)

    def costFunction(self, X, y):
        self.yHat = self.forward(X)
        J = np.sum((y - self.yHat) ** 2) / (2 * len(X))  # Mean Squared Error
        return J

    def costFunctionPrime(self, X, y):
        self.yHat = self.forward(X)
        deltas = [None] * (len(self.weights))
        deltas[-1] = -(y - self.yHat)
        dJdWs = [None] * (len(self.weights))
        for i in range(len(deltas) - 2, -1, -1):
            dJdWs[i + 1] = np.dot(self.a[i].T, deltas[i + 1])
            deltas[i] = np.dot(deltas[i + 1], self.weights[i + 1].T) * self.activationPrime(self.z[i])
        dJdWs[0] = np.dot(X.T, deltas[0])
        return dJdWs

    def reluPrime(self, z):
        return np.where(z > 0, 1, 0)  # Derivative of ReLU

    def tanhPrime(self, z):
        return 1 - np.tanh(z)**2  # Derivative of Tanh

    def backward(self, X, y, learning_rate):
        dJdWs = self.costFunctionPrime(X, y)
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dJdWs[i]
        # self.W1 -= learning_rate * dJdW1
        # self.W2 -= learning_rate * dJdW2
    def fit(self, X, y, batch_size = 32, learning_rate=0.1, max_iterations=10000,wand = 0):
        if(self.optimizer == 'SGD'):
            return self.train_sgd(X=X, y=y,wand=wand)
        elif(self.optimizer == 'Batch'):
            return self.train_batch(X=X, y=y,wand=wand)
        elif(self.optimizer == 'MiniBatch'):
            return self.train_mini_batch(X=X, y=y,wand=wand)
    def train_sgd(self, X, y, learning_rate=0.1, max_iterations=10000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on Housing Dataset in MLP Regression")
        for i in range(self.max_iterations):
            idx = np.random.randint(len(X))
            self.backward(X[idx:idx+1], y[idx:idx+1], self.learning_rate)
            if i % 100 == 0:
                loss = self.costFunction(X, y)
                print("Iteration %d: loss = %f" % (i, loss))
                if(wand == 1):
                    wandb.log({"Loss": loss, "Epoch": i})
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")
    def train_batch(self, X, y, learning_rate=0.0001, max_iterations=10000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on Housing Dataset in MLP Regression")
        for i in range(self.max_iterations):
            self.backward(X, y, self.learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                # print("Iteration %d: loss = %f" % (i, loss))
                if(wand == 1):
                    wandb.log({"Loss": loss, "Epoch": i})
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")
    def train_mini_batch(self, X, y, batch_size=32, learning_rate=0.01, max_iterations=5000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on Housing Dataset in MLP Regression")
        for i in range(self.max_iterations):
            # Randomly shuffle the data and split into mini-batches
            indices = np.arange(len(X))
            np.random.shuffle(indices)
            for j in range(0, len(X), batch_size):
                batch_indices = indices[j:j+batch_size]
                self.backward(X[batch_indices], y[batch_indices], self.learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                # print("Iteration %d: loss = %f" % (i, loss))
                if(wand == 1):
                    wandb.log({"Loss": loss, "Epoch": i})
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")

    def predict(self, X):
        yHat = self.forward(X)
        return yHat
    def set_learning_rate(self, learning_rate):
        self.learning_rate = learning_rate

    def set_activation_function(self, activation_function):
        if(activation_function == 'sigmoid'):
            self.activation = self.sigmoid
            self.activationPrime = self.sigmoidPrime
        if(activation_function == 'relu'):
            self.activation = self.relu
            self.activationPrime = self.reluPrime
        if(activation_function == 'tanh'):
            self.activation = self.tanh
            self.activationPrime = self.tanhPrime

    def set_optimizer(self, optimizer):
        self.optimizer = optimizer

    def set_hidden_layers(self, hidden_layer_sizes):
        self.hiddenLayerSize = hidden_layer_sizes
        self.num_layers = len(self.hiddenLayerSize)+1
        self.weights = []
        self.weights.append(np.random.randn(self.inputLayerSize, self.hiddenLayerSize[0]))
        for i in range(1,self.num_layers-1):
            self.weights.append(np.random.randn(self.hiddenLayerSize[i-1], self.hiddenLayerSize[i]))
        self.weights.append(np.random.randn(self.hiddenLayerSize[-1], self.outputLayerSize))
        # self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
        # self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
        # self.weights, self.biases = self.initialize_weights()
    def set_epochs(self, epcohs):
        self.max_iterations = epochs
        print(self.max_iterations)






In [4]:
data = pd.read_csv('HousingData.csv')
imputer = SimpleImputer(strategy='mean')
data = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)
data = data.fillna(data.mean())
X = data.drop(columns=['MEDV'])
y = data['MEDV']
# X = data.iloc[1:, :-1].reset_index(drop=True)
# y = data.iloc[1:, -1].reset_index(drop=True)

# scaler = StandardScaler()
# X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [5]:
std_scaler = StandardScaler()
minmax_scaler = MinMaxScaler()

X_train_std = std_scaler.fit_transform(X_train)
X_test_std = std_scaler.transform(X_test)

X_train_norm = minmax_scaler.fit_transform(X_train_std)
X_test_norm = minmax_scaler.transform(X_test_std)
X_train = X_train_norm
X_test = X_test_norm
y_train = np.array(y_train)
y_test = np.array(y_test)
print(X_train)
print(y_train)

[[1.68762759e-01 0.00000000e+00 6.42962963e-01 ... 8.08510638e-01
  8.80427656e-01 6.39624724e-01]
 [6.95009416e-03 0.00000000e+00 2.74074074e-01 ... 8.93617021e-01
  9.96772404e-01 1.85982340e-01]
 [2.87746689e-04 1.12119342e-01 1.97037037e-01 ... 4.57446809e-01
  9.12627969e-01 1.68322296e-01]
 ...
 [6.68786251e-05 8.00000000e-01 4.70370370e-02 ... 4.68085106e-01
  9.84971506e-01 1.17549669e-01]
 [1.25342233e-01 0.00000000e+00 6.42962963e-01 ... 8.08510638e-01
  2.76186394e-01 5.94370861e-01]
 [2.46945108e-03 0.00000000e+00 2.89629630e-01 ... 8.82978723e-01
  1.77719502e-01 2.45584989e-01]]
[12.  19.9 19.4 13.4 18.2 24.6 21.1 24.7  8.7 27.5 20.7 36.2 31.6 11.7
 39.8 13.9 21.8 23.7 17.6 24.4  8.8 19.2 25.3 20.4 23.1 37.9 15.6 45.4
 15.7 22.6 14.5 18.7 17.8 16.1 20.6 31.6 29.1 15.6 17.5 22.5 19.4 19.3
  8.5 20.6 17.  17.1 14.5 50.  14.3 12.6 28.7 21.2 19.3 23.1 19.1 25.
 33.4  5.  29.6 18.7 21.7 23.1 22.8 21.  48.8 14.6 16.6 27.1 20.1 19.8
 21.  41.3 23.2 20.4 18.5 29.4 36.4 24.4 11.8 

In [6]:
def bagging_ensemble_regression(base_estimator, n_estimators, sample_fraction,with_replacement, voting):
    ensemble_models = []
    
    sample_size = int(sample_fraction * len(X_train))
    # np.random.seed(42)
    for _ in range(n_estimators):
        sample_indices = np.random.choice(len(X_train), size=sample_size, replace=with_replacement)
        sample_X = X_train[sample_indices]
        sample_y = y_train[sample_indices]
        sample_y = np.array(sample_y)
        if base_estimator == "DecisionTree":
            base_model = MyDecisionTreeRegressor()
            base_model.fit(sample_X, sample_y)
        elif base_estimator == "Linear":
            base_model = MyLinearRegressor()
            base_model.fit(sample_X, sample_y)
        elif base_estimator == "MLP":
            base_model = MyMLPRegressor()
            
            y_train_array = np.array([[x] for x in sample_y])
            base_model.fit(sample_X, y_train_array)
            

        # base_model.fit(sample_X, sample_y)
        ensemble_models.append(base_model)

    def ensemble_predict(X):
        predictions = [model.predict(X) for model in ensemble_models]

        if voting == "hard":
            return np.round(np.mean(predictions, axis=0))
        elif voting == "soft":
            confidences = [1 / (mean_squared_error(y_test, model.predict(X_test)) + 1e-10) for model in ensemble_models]  
            weighted_predictions = [prediction * confidence for prediction, confidence in zip(predictions, confidences)]
            return np.sum(weighted_predictions, axis=0) / np.sum(confidences)

    return ensemble_predict

# Example usage for bagging ensemble without replacement with Decision Tree regressor and soft voting
bagged_regressor_decision_tree_soft = bagging_ensemble_regression(base_estimator="DecisionTree", n_estimators=10, sample_fraction=0.75, with_replacement=True, voting="soft")

# Make predictions using the bagged ensemble
y_pred = bagged_regressor_decision_tree_soft(X_test)

# Evaluate the performance, e.g., by calculating the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 7.500156976167759


In [7]:
data = pd.read_csv('WineQT.csv', header=0)
train_data, test_data = train_test_split(data, test_size=0.2,random_state = 42)

scaler = StandardScaler()
minmax_scaler = MinMaxScaler()

imputer = SimpleImputer(strategy='mean')  

train_data_scaled = scaler.fit_transform(imputer.fit_transform(train_data.drop(columns=['quality'])))
train_labels = train_data['quality']

test_data_scaled = scaler.transform(imputer.transform(test_data.drop(columns=['quality'])))

train_data_scaled_normalized = minmax_scaler.fit_transform(train_data_scaled)
test_data_scaled_normalized = minmax_scaler.transform(test_data_scaled)

X_train = train_data_scaled
y_train = train_labels
X_test = test_data_scaled
y_test = test_data['quality']
y_train = np.array(y_train)
y_test = np.array(y_test)
# y_train = pd.get_dummies(y_train).astype(int)
# y_train = np.eye(9)[y_train]
# y_train = np.array(y_train)
# y_test = pd.get_dummies(y_test).astype(int)
# print(X_train)
# print(y_train)
# y_test = np.eye(9)[y_test]
# y_test = np.array(y_test)
y_test = np.eye(9)[y_test]
y_test = np.array(y_test)


In [9]:
def bagging_ensemble_classification(base_classifier, n_estimators, sample_fraction, with_replacement, voting):
    ensemble_models = []
    
    sample_size = int(sample_fraction * len(X_train))
    # np.random.seed(42)
    for _ in range(n_estimators):
        sample_indices = np.random.choice(len(X_train), size=sample_size, replace=with_replacement)
        sample_X = X_train[sample_indices]  # Assuming X_train is a DataFrame
        sample_y = y_train[sample_indices]  # Assuming y_train is a Series

        if base_classifier == "DecisionTree":
            base_model = MyDecisionTreeClassifier()
            sample_y = np.eye(9)[sample_y]
            sample_y = np.array(sample_y)
            base_model.fit(sample_X, sample_y)
        elif base_classifier == "Logistic":
            base_model = MyLogisticClassifier()
            base_model.fit(sample_X, sample_y)
        elif base_classifier == "MLP":
            base_model = MyMLPClassifier()
            sample_y = np.eye(9)[sample_y]
            sample_y = np.array(sample_y)
            base_model.fit(sample_X, sample_y)
            
        ensemble_models.append(base_model)

    def ensemble_predict(X):
        predictions = [model.predict(X) for model in ensemble_models]
        predictions = np.array(predictions)
        
        print(predictions.shape)
        # return predictions
        if(base_classifier == "DecisionTree"):
            if voting == "hard":
                array_2d = np.sum(predictions, axis=0)
                for array in range(len(array_2d)):
                    k = np.argmax(array_2d[array])
                    array_2d[array] = np.zeros(9)
                    array_2d[array][k] = 1
                return array_2d

            elif voting == "soft":
                # For soft voting, use the class probabilities (if available) and average them
                class_probs = [model.predict(X) for model in ensemble_models]
                return np.mean(class_probs, axis=0)
        elif(base_classifier == "Logistic"):
            if voting == "hard":
                # print(predictions[: , 0])
                # print(predictions[: , 1])
                array_2d = np.zeros((229, 9))
                # print(array_2d[0])
                # print(array_2d[1])
                # return array_2d
                for idx in range(len(predictions[0])):
                    uniq, cnt = np.unique(predictions[:, idx], return_counts=True)
                    # print(uniq)
                    # print(cnt)
                    k = np.argmax(cnt)
                    # print(k)
                    array_2d[idx][uniq[k]] = 1
                return array_2d

            elif voting == "soft":
                # For soft voting, use the class probabilities (if available) and average them
                class_probs = [model.predict(X) for model in ensemble_models]
                return np.mean(class_probs, axis=0)
        elif(base_classifier == "MLP"):
            if voting == "hard":
                array_2d = np.sum(predictions, axis=0)
                for array in range(len(array_2d)):
                    k = np.argmax(array_2d[array])
                    array_2d[array] = np.zeros(9)
                    array_2d[array][k] = 1
                return array_2d

            elif voting == "soft":
                # For soft voting, use the class probabilities (if available) and average them
                class_probs = [model.predict(X) for model in ensemble_models]
                return np.mean(class_probs, axis=0)

    return ensemble_predict

# Example usage for bagging ensemble with Decision Tree classifier and soft voting
bagged_classifier_decision_tree_soft = bagging_ensemble_classification(base_classifier="MLP", n_estimators=10, sample_fraction=0.75, with_replacement=True, voting="hard")

# Make predictions using the bagged ensemble
y_pred = bagged_classifier_decision_tree_soft(X_test)
y_pred = np.array(y_pred)
print(y_test.shape)
print(y_pred.shape)
for i in range(len(y_pred)):
    print(y_pred[i])
    print(y_test[i])
print(y_pred[1])
y_test = np.array(y_test)
y_pred = np.array(y_pred)
# print("Pred :", y_pred)
# print("Test :", y_test)
# Evaluate the performance, e.g., by calculating the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

MLP
MLP
MLP
MLP
MLP
MLP
MLP
MLP
MLP
MLP
(10, 229, 9)
(229, 9)
(229, 9)
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 1.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1. 0. 0.]
[