# Defining MLP

In [86]:
import os
os.chdir('/Users/mikolajmroz/Developer/Computational_Intelligence_Methods')
print(os.getcwd())

/Users/mikolajmroz/Developer/Computational_Intelligence_Methods


In [87]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import f1_score

In [88]:
def relu(x):
    return np.maximum(0, x)

In [89]:
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [90]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [91]:
def sigmoid_derivative(x):
    # Corrected sigmoid derivative to properly compute the derivative
    return sigmoid(x) * (1 - sigmoid(x))

In [92]:
def mse(predictions, targets):
    return np.mean((predictions - targets) ** 2)

In [93]:
def softmax(x):
    exp_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
    return exp_x / exp_x.sum(axis=0, keepdims=True)

In [94]:
def cross_entropy_loss(softmax_output, y_true):
    # Assuming y_true is one-hot encoded
    m = y_true.shape[1]  # Number of examples
    log_likelihood = -np.log(softmax_output[y_true.argmax(axis=0), range(m)])
    loss = np.sum(log_likelihood) / m
    return loss

In [113]:
def cross_entropy_loss_derivative(softmax_output, y_true):

    corrected_softmax_output = softmax_output - y_true
    
    return corrected_softmax_output
        


In [114]:
def softmax_derivative(softmax_output):
    # For softmax combined with cross-entropy loss, the derivative simplifies
    # the gradient calculation in backpropagation, directly using output error.
    return softmax_output

In [115]:
class MLP:
    """
    Multilayer Perceptron (MLP) Class:
    A simple implementation of a feedforward neural network that uses backpropagation for training.

    Attributes:
    - layer_sizes (list): The sizes of the layers in the neural network.
    - layer_weights (list): Weights for each layer in the neural network, initialized based on He initialization.
    - layer_biases (list): Biases for each layer in the neural network, initialized to zeros.
    """
    
    def __init__(self, sizes, activation_fn=relu, activation_fn_derivative=relu_derivative):
        """
        Initializes a new MLP instance.
        
        Parameters:
        - sizes (list): A list containing the size (number of neurons) of each layer in the network.
        - activation_fn (str): The name of the activation function to use ('sigmoid' by default).
        """
        self.layer_sizes = sizes
        
        self.activation_fn = activation_fn
        self.activation_fn_derivative = activation_fn_derivative
        
        # Initialize weights with He initialization for ReLU activations; suitable for layers not using ReLU, but common.
        self.layer_weights = [
            np.random.randn(next_layer, prev_layer) * np.sqrt(2.0 / prev_layer)
            for prev_layer, next_layer in zip(sizes[:-1], sizes[1:])
        ]
        
        # Initialize biases for all layers (except input layer) to zeros.
        self.layer_biases = [np.zeros((neurons, 1)) for neurons in sizes[1:]]

    def display_weights_biases(self):
        """Prints the weights and biases of each layer in the network."""
        print("Final Weights and Biases:")
        for layer_index, (weights, biases) in enumerate(zip(self.layer_weights, self.layer_biases)):
            print(f"Layer {layer_index + 1} Weights:\n{weights}")
            print(f"Layer {layer_index + 1} Biases:\n{biases}")

    def propagate_forward(self, input_activation):
        activations = [input_activation]
        for biases, weights in zip(self.layer_biases[:-1], self.layer_weights[:-1]):
            input_activation = self.activation_fn(np.dot(weights, input_activation) + biases)
            activations.append(input_activation)
        # Use softmax for the final layer for multi-class classification
        final_input = np.dot(self.layer_weights[-1], input_activation) + self.layer_biases[-1]

        final_output = softmax(final_input)
        activations.append(final_output)
        return final_output, activations

    def backward_propagation(self, input_val, true_val):
        weight_gradients = [np.zeros(weight.shape) for weight in self.layer_weights]
        bias_gradients = [np.zeros(bias.shape) for bias in self.layer_biases]
        
        final_act, activations = self.propagate_forward(input_val)
        
        # Error at the output layer for softmax combined with cross-entropy
        error = cross_entropy_loss_derivative(final_act, true_val)
        bias_gradients[-1] = error
        weight_gradients[-1] = np.dot(error, activations[-2].T)
        
        for l in range(2, len(self.layer_sizes)):
            # Calculate 'z' using the transposed weights of the next layer and the current error
            z = np.dot(self.layer_weights[-l+1].T, error)
            # Recalculate error for the current layer
            error = self.activation_fn_derivative(z)
            # Update gradients
            bias_gradients[-l] = error
            weight_gradients[-l] = np.dot(error, activations[-l-1].T)

        
        return weight_gradients, bias_gradients
    
    def update_batch(self, batch, learn_rate, regularization, total_size, optimization_method, beta, epsilon=None):
        """
        Updates the weights and biases of the network for a single batch using gradient descent with regularization
        and optionally applies momentum or RMSprop as optimization methods to accelerate convergence.
    
        Parameters:
        - batch (list of tuples): Each tuple contains input data and true labels/values for a batch of samples.
        - learn_rate (float): Learning rate for the optimization.
        - regularization (float): Regularization factor to reduce overfitting by penalizing large weights.
        - total_size (int): Total number of samples in the dataset, used for regularization calculation.
        - optimization_method (str): Specifies the optimization method ('momentum' or 'rmsprop').
        - beta (float): Hyperparameter for the optimization methods, affecting the weighting of past gradients.
        - epsilon (float, optional): A small number to avoid division by zero in 'rmsprop', default is None.
    
        Returns:
        - None, but updates the network's weights and biases in place.
        """
        # Initialize gradients for weights and biases with zeros
        gradient_w = [np.zeros(weight.shape) for weight in self.layer_weights]
        gradient_b = [np.zeros(bias.shape) for bias in self.layer_biases]
        
        # Loop through each data point in the batch
        for input_val, true_val in batch:
            # Perform backpropagation to compute gradients for current data point
            delta_gradient_w, delta_gradient_b = self.backward_propagation(input_val, true_val)
            # Accumulate gradients over the batch
            gradient_w = [w + dw for w, dw in zip(gradient_w, delta_gradient_w)]
            gradient_b = [b + db for b, db in zip(gradient_b, delta_gradient_b)]

        # Apply optimization method if specified
        if optimization_method == 'momentum':
            # Initialize velocity terms for weights and biases if not already initialized
            if not hasattr(self, 'velocity_weights'):
                self.velocity_weights = [np.zeros_like(w) for w in self.layer_weights]
                self.velocity_biases = [np.zeros_like(b) for b in self.layer_biases]
                
            # Update velocities based on gradients and apply to weights and biases
            self.velocity_weights = [beta * vw + (1 - beta) * gw for vw, gw in zip(self.velocity_weights, gradient_w)]
            self.velocity_biases = [beta * vb + (1 - beta) * gb for vb, gb in zip(self.velocity_biases, gradient_b)]
            # Update weights and biases using momentum method
            self.layer_weights = [(1 - learn_rate * (regularization / total_size)) * w - learn_rate * vw
                                  for w, vw in zip(self.layer_weights, self.velocity_weights)]
            self.layer_biases = [b - learn_rate * vb for b, vb in zip(self.layer_biases, self.velocity_biases)]
            
        elif optimization_method == 'rmsprop':
            # Initialize squared gradient terms for weights and biases if not already initialized
            if not hasattr(self, 'squared_gradients_weights'):
                self.squared_gradients_weights = [np.zeros_like(w) for w in self.layer_weights]
                self.squared_gradients_biases = [np.zeros_like(b) for b in self.layer_biases]
                
            # Update squared gradients based on new gradients
            self.squared_gradients_weights = [beta * sgw + (1 - beta) * (gw**2)
                                              for sgw, gw in zip(self.squared_gradients_weights, gradient_w)]
            self.squared_gradients_biases = [beta * sgb + (1 - beta) * (gb**2)
                                             for sgb, gb in zip(self.squared_gradients_biases, gradient_b)]
            # Update weights and biases using rmsprop method
            self.layer_weights = [(1 - learn_rate * (regularization / total_size)) * w - 
                                  (learn_rate / (np.sqrt(sgw) + epsilon)) * gw
                                  for w, sgw, gw in zip(self.layer_weights, self.squared_gradients_weights, gradient_w)]
            self.layer_biases = [b - (learn_rate / (np.sqrt(sgb) + epsilon)) * gb
                                 for b, sgb, gb in zip(self.layer_biases, self.squared_gradients_biases, gradient_b)]
    
    def train(self, training_set, epoch_count, learn_rate, batch_size_input, regularization=0.0,
              visual_interval=100, optimization_method='momentum', beta=0.9, epsilon=1e-8, 
              X_val_scaled=None, y_val_scaled=None, X_val=None, y_val=None,
              scaler_X=None, scaler_y=None, mse_limit=None):
        """
        Trains the neural network over a specified number of epochs using mini-batch gradient descent,
        and tracks performance over epochs through mean squared error on a validation set. Supports learning
        rate scheduling, regularization, and early stopping.
    
        Parameters:
        - training_set (list): Training data consisting of tuples of input values and true labels.
        - epoch_count (int): Total number of epochs to train the network.
        - learn_rate (float): Initial learning rate for optimization.
        - batch_size_input (int or float): Size of the mini-batches for training. If float, it represents
          the percentage of the total dataset size.
        - regularization (float): Regularization factor for reducing overfitting.
        - visual_interval (int): Frequency of epochs at which to calculate and print the MSE for tracking.
        - optimization_method (str): Optimization method to use ('momentum' or 'rmsprop').
        - beta (float): Parameter for the optimization method that controls the momentum or the weighted average.
        - epsilon (float): Small value to prevent division by zero in 'rmsprop'.
        - X_val_scaled, y_val_scaled, X_val, y_val (ndarray, optional): Validation datasets for performance evaluation.
        - scaler_X, scaler_y (preprocessing scaler objects, optional): Scalers used for transforming data back to original scale for MSE calculation.
        - mse_limit (float, optional): MSE value at which training can be stopped early for performance.
    
        Returns:
        - mse_history (list): History of mean squared error values at each visual interval.
        """
    
        # Initialize an empty list to store the history of mean squared errors (MSE) for each epoch
        loss_history = []
        # Determine the total number of samples in the training set
        data_size = len(training_set)
        # Store the initial learning rate, which may be adjusted during training
        rate_init = learn_rate
    
        # Determine mini-batch size based on whether the batch_size_input is a percentage or fixed value
        if isinstance(batch_size_input, float):  # If batch_size_input is a float, treat it as a percentage
            mini_batch_size = max(1, min(data_size, int(data_size * batch_size_input / 100)))
        elif isinstance(batch_size_input, int):  # If batch_size_input is an integer, treat it as a fixed size
            mini_batch_size = max(1, min(data_size, batch_size_input))
        else:  # Raise an error if batch_size_input is neither float nor int
            raise ValueError("batch_size_input must be an integer (fixed size) or a float (percentage of dataset)")
    
        # Iterate through each epoch for training
        for epoch in range(epoch_count):
            # Randomly shuffle the training set to ensure randomness of mini-batches
            np.random.shuffle(training_set)
            # Create mini-batches from the training set
            mini_batches = [training_set[k:k + mini_batch_size] for k in range(0, data_size, mini_batch_size)]
            # Update the model's weights and biases for each mini-batch
            for mini_batch in mini_batches:
                self.update_batch(mini_batch, learn_rate, regularization, data_size, optimization_method, beta, epsilon)
    
            # Adjust the learning rate after each epoch (simple learning rate decay)
            learn_rate = rate_init / (1 + 0.01 * epoch)
    
            # At specified intervals or the last epoch, evaluate and print the model's performance
            if epoch % visual_interval == 0 or epoch == epoch_count - 1:
                '''
                # Generate predictions for the validation set
                predictions = np.array([self.propagate_forward(x.reshape(-1, 1))[0] for x in X_val])

                # Calculate the cross-entropy loss on the validation set
                # Assuming y_val is one-hot encoded and predictions are the output of the softmax layer
                cross_entropy_loss_val = cross_entropy_loss(predictions, y_val)

                # Replace mse_history with a more appropriately named variable, like loss_history
                loss_history.append((epoch, cross_entropy_loss_val))

                # Print the current epoch and its cross-entropy loss
                print(f'Epoch {epoch}, Cross-Entropy Loss: {cross_entropy_loss_val}')


                # If a threshold for MSE is set and the current MSE is below this threshold, stop training
                if mse_limit is not None and cross_entropy_loss_val < mse_limit:
                    break 
                '''
                predictions = np.argmax(np.array([self.propagate_forward(x.reshape(-1, 1))[0] for x in X_val]), axis=1)
                # Calculate accuracy or other metrics
                accuracy = np.mean(predictions == y_val)
                print(f'epoch: {epoch}', f'Test accuracy: {accuracy}')
                
                # Calculate F1 Score
                f1_weighted = f1_score(y_val, predictions, average='weighted')
                print(f"F1 Score (Weighted): {f1_weighted}")
        # 
        # # Return the history of MSE values
        # return loss_history



In [116]:
class DataScaler:
    def __init__(self, method="standardization"):
        self.method = method
        self.min = None
        self.max = None
        self.mean = None
        self.std = None

    def fit_transform(self, data):
        if self.method == "min_max":
            return self.fit_transform_min_max(data)
        elif self.method == "standardization":
            return self.fit_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def transform(self, data):
        if self.method == "min_max":
            return self.transform_min_max(data)
        elif self.method == "standardization":
            return self.transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def inverse_transform(self, data):
        if self.method == "min_max":
            return self.inverse_transform_min_max(data)
        elif self.method == "standardization":
            return self.inverse_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def fit_transform_min_max(self, data):
        self.min = np.min(data, axis=0)
        self.max = np.max(data, axis=0)
        return (data - self.min) / (self.max - self.min)

    def transform_min_max(self, data):
        return (data - self.min) / (self.max - self.min)

    def inverse_transform_min_max(self, data):
        return data * (self.max - self.min) + self.min

    def fit_transform_standardization(self, data):
        self.mean = np.mean(data, axis=0)
        self.std = np.std(data, axis=0)
        return (data - self.mean) / self.std

    def transform_standardization(self, data):
        return (data - self.mean) / self.std

    def inverse_transform_standardization(self, data):
        return data * self.std + self.mean

In [117]:
def plot_mse(mse_history):
    plt.plot(mse_history)
    plt.title('MSE Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error')
    plt.show()

# Loading data

In [1]:
df_train_rings3_regular = pd.read_csv('./data/classification/rings3-regular-training.csv')
df_test_rings3_regular = pd.read_csv('./data/classification/rings3-regular-test.csv')

NameError: name 'pd' is not defined

In [None]:
df_train_easy = pd.read_csv('./data/classification/easy-training.csv')
df_test_easy = pd.read_csv('./data/classification/easy-test.csv')

In [None]:
df_train_xor3 = pd.read_csv('./data/classification/xor3-training.csv')
df_test_xor3 = pd.read_csv('./data/classification/xor3-test.csv')

### rings 3 regular dataset

In [121]:
scaler_X = DataScaler("standardization")

In [122]:
# Scale features
X1_train_rings = df_train_rings3_regular[['x']].values.reshape(-1, 1)
X1_test_rings = df_test_rings3_regular[['x']].values.reshape(-1, 1)


In [123]:
X2_train_rings = df_train_rings3_regular[['y']].values.reshape(-1, 1)
X2_test_rings = df_test_rings3_regular[['y']].values.reshape(-1, 1)

In [124]:
X_train_rings = np.hstack((X1_train_rings, X2_train_rings))
X_test_rings = np.hstack((X1_test_rings, X2_test_rings))

In [125]:
X_train_rings_scaled = np.hstack((scaler_X.fit_transform(X1_train_rings), scaler_X.fit_transform(X2_train_rings)))
X_test_rings_scaled = np.hstack((scaler_X.transform(X1_test_rings), scaler_X.transform(X2_test_rings)))

In [126]:
y_train_rings = df_train_rings3_regular['c'].values.reshape(-1, 1)
y_test_rings = df_test_rings3_regular['c'].values.reshape(-1, 1)

In [127]:
# Encode the 'c' column into one-hot vectors for the training and test datasets
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded_rings = encoder.fit_transform(y_train_rings)
y_test_encoded_rings = encoder.transform(y_test_rings)

In [128]:
num_classes_rings = y_train_encoded_rings.shape[1] 
num_classes_rings

3

In [129]:
training_data_rings = [
    (X_train_rings[i].reshape(-1, 1), y_train_encoded_rings[i].reshape(-1, 1))
    for i in range(len(X_train_rings))
]

In [130]:
# Initialize the MLP with the proper layer sizes.
# For example, with 2 input features, a hidden layer with 10 neurons, and output layer matching the number of classes
# num_classes_rings = y_train_encoded_rings.shape[1]  # Assuming y_train_encoded is one-hot encoded
mlp_rings = MLP(sizes=[2, 10, 3])  # Example layer setup

print()

# Train the MLP using your training data
# This step will vary depending on the exact implementation of your `train` method
# For example:
mlp_rings.train(training_set=training_data_rings, epoch_count=1000, learn_rate=1, batch_size_input=32, X_val=X_train_rings, y_val=y_train_rings, visual_interval=10)



ValueError: shapes (10,1) and (3,1) not aligned: 1 (dim 1) != 3 (dim 0)

In [49]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_rings = np.argmax(np.array([mlp_rings.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_rings]), axis=1)

In [29]:
# Calculate accuracy or other metrics
accuracy_rings = np.mean(predictions_rings == y_test_rings)
print(f'Test accuracy: {accuracy_rings}')


Test accuracy: 0.3975


In [336]:
# Calculate F1 Score
f1_weighted = f1_score(y_test_rings, predictions_rings, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted}")

F1 Score (Weighted): 0.1731459555511997


## scaled

In [281]:
training_data_rings_scaled = [
    (X_train_rings_scaled[i].reshape(-1, 1), y_train_encoded_rings[i].reshape(-1, 1))
    for i in range(len(X_train_rings_scaled))
]

In [50]:
# Initialize the MLP with the proper layer sizes.
# For example, with 2 input features, a hidden layer with 10 neurons, and output layer matching the number of classes
# num_classes_rings = y_train_encoded_rings.shape[1]  # Assuming y_train_encoded is one-hot encoded
mlp_rings = MLP(sizes=[2, 10, 3])  # Example layer setup

print()

# Train the MLP using your training data
# This step will vary depending on the exact implementation of your `train` method
# For example:
mlp_rings.train(training_set=training_data_rings_scaled, epoch_count=1000, learn_rate=1, batch_size_input=20, X_val=X_train_rings, y_val=y_train_rings, visual_interval=10)




NameError: name 'training_data_rings_scaled' is not defined

In [294]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_rings = np.argmax(np.array([mlp_rings.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_rings]), axis=1)

In [295]:
# Calculate accuracy or other metrics
accuracy_rings = np.mean(predictions_rings == y_test_rings)
print(f'Test accuracy: {accuracy_rings}')


Test accuracy: 0.421


In [296]:
# Calculate F1 Score
f1_weighted = f1_score(y_test_rings, predictions_rings, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted}")

F1 Score (Weighted): 0.35879514074384916


### easy dataset

In [69]:
# Scale features
X1_train_easy = df_train_easy[['x']].values.reshape(-1, 1)
X1_test_easy = df_test_easy[['x']].values.reshape(-1, 1)


In [70]:
X2_train_easy = df_train_easy[['y']].values.reshape(-1, 1)
X2_test_easy = df_test_easy[['y']].values.reshape(-1, 1)

In [71]:
X_train_easy= np.hstack((X1_train_easy, X2_train_easy))
X_test_easy = np.hstack((X1_test_easy, X2_test_easy))

In [72]:
y_train_easy = df_train_easy['c'].values.reshape(-1, 1)
y_test_easy = df_test_easy['c'].values.reshape(-1, 1)

In [73]:
# Encode the 'c' column into one-hot vectors for the training and test datasets
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded_easy = encoder.fit_transform(y_train_easy)
y_test_encoded_easy = encoder.transform(y_test_easy)

In [74]:
num_classes_easy = y_train_encoded_easy.shape[1] 
num_classes_easy

2

In [75]:
training_data_easy = [
    (X_train_easy[i].reshape(-1, 1), y_train_encoded_easy[i].reshape(-1, 1))
    for i in range(len(X_train_easy))
]

In [85]:
# Initialize the MLP with the proper layer sizes.
# For example, with 2 input features, a hidden layer with 10 neurons, and output layer matching the number of classes
# num_classes_rings = y_train_encoded_rings.shape[1]  # Assuming y_train_encoded is one-hot encoded
mlp_easy = MLP(sizes=[2, 5, 2])  # Example layer setup

# Train the MLP using your training data
# This step will vary depending on the exact implementation of your `train` method
# For example:
mlp_easy.train(training_set=training_data_easy, epoch_count=5000, learn_rate=0.01, batch_size_input=24, X_val=X_test_easy, y_val=y_train_easy, visual_interval=100)

epoch: 0 Test accuracy: 0.492
F1 Score (Weighted): 0.4919674859190989
epoch: 100 Test accuracy: 0.488
F1 Score (Weighted): 0.4752291772577445
epoch: 200 Test accuracy: 0.49
F1 Score (Weighted): 0.4769423898194528
epoch: 300 Test accuracy: 0.49
F1 Score (Weighted): 0.4769423898194528
epoch: 400 Test accuracy: 0.492
F1 Score (Weighted): 0.4786535303776683
epoch: 500 Test accuracy: 0.492
F1 Score (Weighted): 0.4786535303776683
epoch: 600 Test accuracy: 0.492
F1 Score (Weighted): 0.4786535303776683
epoch: 700 Test accuracy: 0.492
F1 Score (Weighted): 0.4786535303776683
epoch: 800 Test accuracy: 0.492
F1 Score (Weighted): 0.4786535303776683
epoch: 900 Test accuracy: 0.492
F1 Score (Weighted): 0.4786535303776683
epoch: 1000 Test accuracy: 0.492
F1 Score (Weighted): 0.4786535303776683
epoch: 1100 Test accuracy: 0.492
F1 Score (Weighted): 0.4786535303776683
epoch: 1200 Test accuracy: 0.492
F1 Score (Weighted): 0.4786535303776683
epoch: 1300 Test accuracy: 0.492
F1 Score (Weighted): 0.478653530

In [318]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_easy = np.argmax(np.array([mlp_easy.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_easy]), axis=1)
predictions_easy

array([[0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
    

In [319]:

# Calculate accuracy or other metrics
accuracy_easy = np.mean(predictions_easy == y_test_easy)
print(f'Test accuracy: {accuracy_easy}')


Test accuracy: 0.81


In [320]:
# Calculate F1 Score
f1_weighted = f1_score(y_test_easy, predictions_easy, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted}")

F1 Score (Weighted): 0.8032047129104457


### xor3 dataset

In [30]:
scaler_X = DataScaler("standardization")

In [31]:
# Scale features
X_train_xor = df_train_xor3[['x', 'y']].values
X_test_xor = df_test_xor3[['x', 'y']].values


In [32]:
# Use your custom scaler to scale the X values
X_train_scaled_xor = scaler_X.fit_transform(X_train_xor)
X_test_scaled_xor = scaler_X.transform(X_test_xor)

In [33]:
# Encode the 'c' column into one-hot vectors for the training and test datasets
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded_xor = encoder.fit_transform(df_train_xor3[['c']])
y_test_encoded_xor = encoder.transform(df_test_xor3[['c']])

In [34]:
# Initialize the MLP with the proper layer sizes.
# For example, with 2 input features, a hidden layer with 10 neurons, and output layer matching the number of classes
num_classes_xor = y_train_encoded_xor.shape[1]  # Assuming y_train_encoded is one-hot encoded
mlp_xor = MLP(sizes=[2, 10, num_classes_xor])  # Example layer setup

# Train the MLP using your training data
# This step will vary depending on the exact implementation of your `train` method
# For example:
# mlp.train(training_set=list(zip(X_train_scaled, y_train_encoded)), epoch_count=100, learn_rate=0.01, batch_size_input=32)


In [35]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_xor = np.argmax(np.array([mlp_xor.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_scaled_xor]), axis=1)

# Convert one-hot encoded y_test_encoded back to labels for comparison
y_test_labels_xor = np.argmax(y_test_encoded_xor, axis=1)

# Calculate accuracy or other metrics
accuracy_xor = np.mean(predictions_xor == y_test_labels_xor)
print(f'Test accuracy: {accuracy_xor}')


Test accuracy: 0.588


In [36]:
# Calculate F1 Score
f1_macro = f1_score(y_test_labels_xor, predictions_xor, average='macro')
f1_micro = f1_score(y_test_labels_xor, predictions_xor, average='micro')
f1_weighted = f1_score(y_test_labels_xor, predictions_xor, average='weighted')

print(f"F1 Score (Macro): {f1_macro}")
print(f"F1 Score (Micro): {f1_micro}")
print(f"F1 Score (Weighted): {f1_weighted}")

F1 Score (Macro): 0.3702770780856423
F1 Score (Micro): 0.588
F1 Score (Weighted): 0.4354458438287153
