# Defining MLP

In [1]:
# import os
# os.chdir('/Users/mikolajmroz/Developer/Computational_Intelligence_Methods')
# print(os.getcwd())

In [2]:
import os
os.chdir('c:\\Users\\mikol\\Documents\\Computational_Intelligence_Methods\\Computational_Intelligence_Methods')
print(os.getcwd())

c:\Users\mikol\Documents\Computational_Intelligence_Methods\Computational_Intelligence_Methods


In [3]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import f1_score

In [4]:
def linear(x):
    """Linear activation function: f(x) = x"""
    return x

In [5]:
def linear_derivative(x):
    """Derivative of linear activation function: f'(x) = 1"""
    return 1

In [6]:
def relu(x):
    return np.maximum(0, x)

In [7]:
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [8]:
def sigmoid(x):
    x = np.clip(x, -500, 500)  # Avoid overflow
    return np.where(x > 0, 1 / (1 + np.exp(-x)), np.exp(x) / (1 + np.exp(x)))

In [9]:
def sigmoid_derivative(sigmoid_output):
    # Assumes that sigmoid_output is the result of sigmoid(x)
    return sigmoid_output * (1 - sigmoid_output)

In [10]:
def tanh(x):
    """Tanh activation function: f(x) = tanh(x)"""
    return np.tanh(x)

In [11]:
def tanh_derivative(x):
    """Derivative of tanh activation function: f'(x) = 1 - tanh(x)^2"""
    return 1 - np.tanh(x)**2

In [12]:
def mse(predictions, targets):
    return np.mean((predictions - targets) ** 2)

In [13]:
def softmax(x):
    exp_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
    return exp_x / exp_x.sum(axis=0, keepdims=True)

In [14]:
def cross_entropy(softmax_output, y_true):
    # Assuming y_true is one-hot encoded
    m = y_true.shape[1]  # Number of examples
    log_likelihood = -np.log(softmax_output[y_true.argmax(axis=0), range(m)] + 1e-9)  # Small constant added
    loss = np.sum(log_likelihood) / m
    return loss

In [15]:
def cross_entropy_derivative(softmax_output, y_true):

    corrected_softmax_output = softmax_output - y_true
    
    return corrected_softmax_output
        


In [16]:
def softmax_derivative(softmax_output):
    # For softmax combined with cross-entropy loss, the derivative simplifies
    # the gradient calculation in backpropagation, directly using output error.
    return softmax_output

In [17]:
class MLP:
    def __init__(self, sizes, activation_fn=sigmoid, activation_fn_derivative=sigmoid_derivative):
        self.layer_sizes = sizes
        self.activation_fn = activation_fn
        self.layer_weights = [np.random.randn(y, x) * np.sqrt(2. / x) / 10 for x, y in zip(sizes[:-1], sizes[1:])]
        self.layer_biases = [np.zeros((y, 1)) for y in sizes[1:]]
        self.activation_fn_derivative = activation_fn_derivative

    def display_weights_biases(self):
        print("Final Weights and Biases:")
        for layer_index, (weights, biases) in enumerate(zip(self.layer_weights, self.layer_biases)):
            print(f"Layer {layer_index + 1} Weights:\n{weights}")
            print(f"Layer {layer_index + 1} Biases:\n{biases}")

    def propagate_forward(self, input_activation):
        activations = [input_activation]
        for biases, weights in zip(self.layer_biases, self.layer_weights[:-1]):
            input_activation = self.activation_fn(np.dot(weights, input_activation) + biases)
            activations.append(input_activation)
        final_input = np.dot(self.layer_weights[-1], input_activation) + self.layer_biases[-1]
        output_activation = softmax(final_input)
        activations.append(output_activation)
        # change
        return output_activation, activations

    def backward_propagation(self, input_val, true_val):
        weight_gradients = [np.zeros(weight.shape) for weight in self.layer_weights]
        bias_gradients = [np.zeros(bias.shape) for bias in self.layer_biases]
        
        # Forward pass to get activations
        final_act, activations = self.propagate_forward(input_val)
        
        # Start with the derivative of the loss function w.r.t. the final activation
        error = cross_entropy_derivative(final_act, true_val)
        
        # Update gradients for the output layer
        bias_gradients[-1] = error
        weight_gradients[-1] = np.dot(error, activations[-2].T)
        
        # Backpropagate the error
        for l in range(2, len(self.layer_sizes)):
            # The derivative of the activation function is applied to the output of the activation function
            # from the forward pass, hence 'activations[-l]'
            activation_derivative = self.activation_fn_derivative(activations[-l])
            
            # Correct error propagation
            error = np.dot(self.layer_weights[-l+1].T, error) * activation_derivative
            
            bias_gradients[-l] = error
            weight_gradients[-l] = np.dot(error, activations[-l-1].T)
        
        return weight_gradients, bias_gradients

    
    def update_batch(self, batch, learn_rate, regularization, total_size, optimization_method, beta, epsilon=1e-8):
        gradient_w = [np.zeros(weight.shape) for weight in self.layer_weights]
        gradient_b = [np.zeros(bias.shape) for bias in self.layer_biases]
        
        for input_val, true_val in batch:
            delta_gradient_w, delta_gradient_b = self.backward_propagation(input_val, true_val)
            gradient_w = [w + dw for w, dw in zip(gradient_w, delta_gradient_w)]
            gradient_b = [b + db for b, db in zip(gradient_b, delta_gradient_b)]

        # Update rule for weights and biases based on the optimization method
        if optimization_method == 'momentum':
            # Momentum initialization
            if not hasattr(self, 'velocity_weights'):
                self.velocity_weights = [np.zeros_like(w) for w in self.layer_weights]
                self.velocity_biases = [np.zeros_like(b) for b in self.layer_biases]

            # Update velocities
            self.velocity_weights = [beta * vw + (1 - beta) * gw / len(batch) for vw, gw in zip(self.velocity_weights, gradient_w)]
            self.velocity_biases = [beta * vb + (1 - beta) * gb / len(batch) for vb, gb in zip(self.velocity_biases, gradient_b)]
            
            # Update weights and biases
            self.layer_weights = [(1 - learn_rate * (regularization / total_size)) * w - learn_rate * vw
                                  for w, vw in zip(self.layer_weights, self.velocity_weights)]
            self.layer_biases = [b - learn_rate * vb for b, vb in zip(self.layer_biases, self.velocity_biases)]
        elif optimization_method == 'rmsprop':
            # RMSprop initialization
            if not hasattr(self, 'squared_gradients_weights'):
                self.squared_gradients_weights = [np.zeros_like(w) for w in self.layer_weights]
                self.squared_gradients_biases = [np.zeros_like(b) for b in self.layer_biases]

            # Update squared gradients
            self.squared_gradients_weights = [beta * sgw + (1 - beta) * (gw**2) / len(batch)
                                              for sgw, gw in zip(self.squared_gradients_weights, gradient_w)]
            self.squared_gradients_biases = [beta * sgb + (1 - beta) * (gb**2) / len(batch)
                                             for sgb, gb in zip(self.squared_gradients_biases, gradient_b)]
            
            # Update weights and biases
            self.layer_weights = [(1 - learn_rate * (regularization / total_size)) * w - 
                                  learn_rate * gw / (np.sqrt(sgw) + epsilon)
                                  for w, sgw, gw in zip(self.layer_weights, self.squared_gradients_weights, gradient_w)]
            self.layer_biases = [b - learn_rate * gb / (np.sqrt(sgb) + epsilon)
                                 for b, sgb, gb in zip(self.layer_biases, self.squared_gradients_biases, gradient_b)]

    def train(self, training_data, epochs, learn_rate, batch_size, regularization=0.0, optimization_method='rmsprop', beta=0.9, epsilon=1e-8, visual_interval=10, X_val=None, y_val=None, target = None,adaptive_learn_rate = True, decay_rate=0.1, decay_step=100):
        n = len(training_data)

        f1_history = []
        
        # Determine mini-batch size based on whether the batch_size_input is a percentage or fixed value
        if isinstance(batch_size, float):  # If batch_size_input is a float, treat it as a percentage
            batch_size = max(1, min(n, int(n * batch_size / 100)))
        elif isinstance(batch_size, int):  # If batch_size_input is an integer, treat it as a fixed size
            batch_size = max(1, min(n, batch_size))
        else:  # Raise an error if batch_size_input is neither float nor int
            raise ValueError("batch_size_input must be an integer (fixed size) or a float (percentage of dataset)")
        
        
        for epoch in range(epochs):
            np.random.shuffle(training_data)
            mini_batches = [training_data[k:k + batch_size] for k in range(0, n, batch_size)]
    
            for mini_batch in mini_batches:
                self.update_batch(mini_batch, learn_rate, regularization, n, optimization_method, beta, epsilon)
            if adaptive_learn_rate:
                # Decay the learning rate every decay_step epochs
                if epoch % decay_step == 0 and epoch > 0:
                    learn_rate *= (1. / (1. + decay_rate * epoch))
    
            if epoch % visual_interval == 0:
                predictions = np.argmax(np.array([self.propagate_forward(x.reshape(-1, 1))[0] for x in X_val]), axis=1)
                accuracy = np.mean(predictions == y_val)
                print(f'epoch: {epoch}', f'Test accuracy: {accuracy}')
                
                f1_weighted = f1_score(y_val, predictions, average='weighted')
                f1_history.append((epoch,f1_weighted))
                print(f"F1 Score (Weighted): {f1_weighted}")
                
                if f1_weighted > target:
                    break
        return f1_history

    def visualize_network(self):
        fig, ax = plt.subplots()
    
        # For each layer
        for i in range(len(self.layer_sizes)):
            # Draw the nodes of the layer
            ax.scatter([i]*self.layer_sizes[i], range(self.layer_sizes[i]))
    
            # Draw the weights connecting the nodes of the current layer to the next layer
            if i < len(self.layer_sizes) - 1:
                for j in range(self.layer_sizes[i]):
                    for k in range(self.layer_sizes[i+1]):
                        weight = self.layer_weights[i][k, j]
                        color = 'g' if weight >= 0 else 'r'
                        ax.plot([i, i+1], [j, k], color=color)
                        # Add weight value on the line
                        ax.text(i + 0.5, (j + k) / 2, f'{weight:.2f}', color=color, ha='center')
    
            
    
        plt.show()




In [18]:
class DataScaler:
    def __init__(self, method="standardization"):
        self.method = method
        self.min = None
        self.max = None
        self.mean = None
        self.std = None

    def fit_transform(self, data):
        if self.method == "min_max":
            return self.fit_transform_min_max(data)
        elif self.method == "standardization":
            return self.fit_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def transform(self, data):
        if self.method == "min_max":
            return self.transform_min_max(data)
        elif self.method == "standardization":
            return self.transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def inverse_transform(self, data):
        if self.method == "min_max":
            return self.inverse_transform_min_max(data)
        elif self.method == "standardization":
            return self.inverse_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def fit_transform_min_max(self, data):
        self.min = np.min(data, axis=0)
        self.max = np.max(data, axis=0)
        return (data - self.min) / (self.max - self.min)

    def transform_min_max(self, data):
        return (data - self.min) / (self.max - self.min)

    def inverse_transform_min_max(self, data):
        return data * (self.max - self.min) + self.min

    def fit_transform_standardization(self, data):
        self.mean = np.mean(data, axis=0)
        self.std = np.std(data, axis=0)
        return (data - self.mean) / self.std

    def transform_standardization(self, data):
        return (data - self.mean) / self.std

    def inverse_transform_standardization(self, data):
        return data * self.std + self.mean

In [19]:
def plot_mse(mse_history):
    plt.plot(mse_history)
    plt.title('MSE Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error')
    plt.show()

# Loading data

In [20]:
df_train_rings3_regular = pd.read_csv('./data/classification/rings3-regular-training.csv')
df_test_rings3_regular = pd.read_csv('./data/classification/rings3-regular-test.csv')

In [21]:
df_train_easy = pd.read_csv('./data/classification/easy-training.csv')
df_test_easy = pd.read_csv('./data/classification/easy-test.csv')

In [22]:
df_train_xor3 = pd.read_csv('./data/classification/xor3-training.csv')
df_test_xor3 = pd.read_csv('./data/classification/xor3-test.csv')

In [23]:
df_train_rings5_regular = pd.read_csv('./data/classification/rings5-regular-training.csv')
df_test_rings5_regular = pd.read_csv('./data/classification/rings5-regular-test.csv')

### rings 3 regular dataset

In [24]:
scaler_X = DataScaler("standardization")

In [25]:
# Scale features
X1_train_rings = df_train_rings3_regular[['x']].values.reshape(-1, 1)
X1_test_rings = df_test_rings3_regular[['x']].values.reshape(-1, 1)


In [26]:
X2_train_rings = df_train_rings3_regular[['y']].values.reshape(-1, 1)
X2_test_rings = df_test_rings3_regular[['y']].values.reshape(-1, 1)

In [27]:
X_train_rings = np.hstack((X1_train_rings, X2_train_rings))
X_test_rings = np.hstack((X1_test_rings, X2_test_rings))

In [28]:
X_train_rings_scaled = np.hstack((scaler_X.fit_transform(X1_train_rings), scaler_X.fit_transform(X2_train_rings)))
X_test_rings_scaled = np.hstack((scaler_X.transform(X1_test_rings), scaler_X.transform(X2_test_rings)))

In [29]:
y_train_rings = df_train_rings3_regular['c'].values.reshape(-1, 1)
y_test_rings = df_test_rings3_regular['c'].values.reshape(-1, 1)

In [30]:
# Encode the 'c' column into one-hot vectors for the training and test datasets
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded_rings = encoder.fit_transform(y_train_rings)
y_test_encoded_rings = encoder.transform(y_test_rings)

In [31]:
num_classes_rings = y_train_encoded_rings.shape[1] 
num_classes_rings

3

In [32]:
training_data_rings = [
    (X_train_rings[i].reshape(-1, 1), y_train_encoded_rings[i].reshape(-1, 1))
    for i in range(len(X_train_rings))
]

In [33]:
mlp_rings_sigmoid = MLP(sizes=[2, 10, 10, 3], activation_fn=sigmoid,
                activation_fn_derivative=sigmoid_derivative)  # Example layer setup

# Train the MLP using your training data

f1_sigmoid = mlp_rings_sigmoid.train(training_data=training_data_rings, epochs=1000, learn_rate=0.01, batch_size=64, X_val=X_test_rings,
                y_val=y_test_rings, visual_interval=10, target=0.75, decay_rate=0.01, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.585
F1 Score (Weighted): 0.5216714539031263


epoch: 10 Test accuracy: 0.6085
F1 Score (Weighted): 0.6105352864582114
epoch: 20 Test accuracy: 0.794
F1 Score (Weighted): 0.7915422324741564


In [34]:
mlp_rings_relu = MLP(sizes=[2, 10, 10, 3], activation_fn=relu,
                activation_fn_derivative=relu_derivative)  # Example layer setup

# Train the MLP using your training data

f1_relu = mlp_rings_relu.train(training_data=training_data_rings, epochs=1000, learn_rate=0.01, batch_size=64, X_val=X_test_rings,
                y_val=y_test_rings, visual_interval=10, target=0.75, decay_rate=0.01, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.384
F1 Score (Weighted): 0.21336416184971096
epoch: 10 Test accuracy: 0.4075
F1 Score (Weighted): 0.23595914742451154
epoch: 20 Test accuracy: 0.4075
F1 Score (Weighted): 0.23595914742451154
epoch: 30 Test accuracy: 0.208
F1 Score (Weighted): 0.07162913907284768
epoch: 40 Test accuracy: 0.208
F1 Score (Weighted): 0.07162913907284768
epoch: 50 Test accuracy: 0.208
F1 Score (Weighted): 0.07162913907284768
epoch: 60 Test accuracy: 0.4075
F1 Score (Weighted): 0.23595914742451154
epoch: 70 Test accuracy: 0.3845
F1 Score (Weighted): 0.21356482484651498
epoch: 80 Test accuracy: 0.208
F1 Score (Weighted): 0.07162913907284768
epoch: 90 Test accuracy: 0.208
F1 Score (Weighted): 0.07162913907284768
epoch: 100 Test accuracy: 0.3845
F1 Score (Weighted): 0.21356482484651498
epoch: 110 Test accuracy: 0.208
F1 Score (Weighted): 0.07162913907284768
epoch: 120 Test accuracy: 0.208
F1 Score (Weighted): 0.07162913907284768
epoch: 130 Test accuracy: 0.4075
F1 Score (Weighted): 0.2

KeyboardInterrupt: 

In [185]:
mlp_rings_tanh = MLP(sizes=[2, 10, 10, 3], activation_fn=tanh,
                activation_fn_derivative=tanh_derivative)  # Example layer setup

# Train the MLP using your training data

f1_tanh = mlp_rings_tanh.train(training_data=training_data_rings, epochs=1000, learn_rate=0.01, batch_size=64, X_val=X_test_rings,
                y_val=y_test_rings, visual_interval=10, target=0.75, decay_rate=0.01, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.4475
F1 Score (Weighted): 0.4453187956870763
epoch: 10 Test accuracy: 0.557
F1 Score (Weighted): 0.5121815168180229
epoch: 20 Test accuracy: 0.467
F1 Score (Weighted): 0.45612421598716346
epoch: 30 Test accuracy: 0.5515
F1 Score (Weighted): 0.5368322549359558
epoch: 40 Test accuracy: 0.384
F1 Score (Weighted): 0.3237195033451202
epoch: 50 Test accuracy: 0.4725
F1 Score (Weighted): 0.47680612123847055
epoch: 60 Test accuracy: 0.4285
F1 Score (Weighted): 0.34408223667297766
epoch: 70 Test accuracy: 0.395
F1 Score (Weighted): 0.3200558256274899
epoch: 80 Test accuracy: 0.599
F1 Score (Weighted): 0.5297909852453242
epoch: 90 Test accuracy: 0.46
F1 Score (Weighted): 0.46041553318120954
epoch: 100 Test accuracy: 0.532
F1 Score (Weighted): 0.5143005632440422
epoch: 110 Test accuracy: 0.4905
F1 Score (Weighted): 0.47672854109470014
epoch: 120 Test accuracy: 0.339
F1 Score (Weighted): 0.31388012720417413
epoch: 130 Test accuracy: 0.412
F1 Score (Weighted): 0.3261529781

In [186]:
mlp_rings_linear = MLP(sizes=[2, 10, 10, 3], activation_fn=linear,
                activation_fn_derivative=linear_derivative)  # Example layer setup

# Train the MLP using your training data

f1_linear = mlp_rings_linear.train(training_data=training_data_rings, epochs=1000, learn_rate=0.01, batch_size=64, X_val=X_test_rings,
                y_val=y_test_rings, visual_interval=10, target=0.75, decay_rate=0.01, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.389
F1 Score (Weighted): 0.36142709881682705
epoch: 10 Test accuracy: 0.393
F1 Score (Weighted): 0.391524981262858
epoch: 20 Test accuracy: 0.467
F1 Score (Weighted): 0.46195998830501567
epoch: 30 Test accuracy: 0.413
F1 Score (Weighted): 0.36929225049328845
epoch: 40 Test accuracy: 0.5445
F1 Score (Weighted): 0.48279220968491254
epoch: 50 Test accuracy: 0.401
F1 Score (Weighted): 0.32438783518262976
epoch: 60 Test accuracy: 0.4005
F1 Score (Weighted): 0.31994056311949626
epoch: 70 Test accuracy: 0.578
F1 Score (Weighted): 0.5227157838663086
epoch: 80 Test accuracy: 0.3515
F1 Score (Weighted): 0.3196537057376341
epoch: 90 Test accuracy: 0.4565
F1 Score (Weighted): 0.45232138557321117
epoch: 100 Test accuracy: 0.499
F1 Score (Weighted): 0.47782672024667866
epoch: 110 Test accuracy: 0.507
F1 Score (Weighted): 0.4495386711241431
epoch: 120 Test accuracy: 0.331
F1 Score (Weighted): 0.3319395751724311
epoch: 130 Test accuracy: 0.314
F1 Score (Weighted): 0.313672623

In [187]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_rings_sigmoid = np.argmax(np.array([mlp_rings_sigmoid.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_rings]), axis=1)

In [188]:
# Calculate accuracy or other metrics
accuracy_rings = np.mean(predictions_rings_sigmoid == y_test_rings)
print(f'Test accuracy: {accuracy_rings}')


Test accuracy: 0.777


In [189]:
# Calculate F1 Score
f1_weighted_rings = f1_score(y_test_rings, predictions_rings_sigmoid, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted_rings}")

F1 Score (Weighted): 0.7758968530554419


In [190]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_rings_relu = np.argmax(np.array([mlp_rings_relu.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_rings]), axis=1)

In [191]:
# Calculate accuracy or other metrics
accuracy_rings = np.mean(predictions_rings_relu == y_test_rings)
print(f'Test accuracy: {accuracy_rings}')


Test accuracy: 0.3845


In [192]:
# Calculate F1 Score
f1_weighted_rings = f1_score(y_test_rings, predictions_rings_relu, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted_rings}")

F1 Score (Weighted): 0.21356482484651498


### rings 5 regular dataset

In [195]:
scaler_X = DataScaler("standardization")

In [196]:
# Scale features
X1_train_rings = df_train_rings5_regular[['x']].values.reshape(-1, 1)
X1_test_rings = df_test_rings5_regular[['x']].values.reshape(-1, 1)


In [197]:
X2_train_rings = df_train_rings5_regular[['y']].values.reshape(-1, 1)
X2_test_rings = df_test_rings5_regular[['y']].values.reshape(-1, 1)

In [198]:
X_train_rings = np.hstack((X1_train_rings, X2_train_rings))
X_test_rings = np.hstack((X1_test_rings, X2_test_rings))

In [199]:
X_train_rings_scaled = np.hstack((scaler_X.fit_transform(X1_train_rings), scaler_X.fit_transform(X2_train_rings)))
X_test_rings_scaled = np.hstack((scaler_X.transform(X1_test_rings), scaler_X.transform(X2_test_rings)))

In [200]:
y_train_rings = df_train_rings5_regular['c'].values.reshape(-1, 1)
y_test_rings = df_test_rings5_regular['c'].values.reshape(-1, 1)

In [201]:
# Encode the 'c' column into one-hot vectors for the training and test datasets
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded_rings = encoder.fit_transform(y_train_rings)
y_test_encoded_rings = encoder.transform(y_test_rings)

In [203]:
num_classes_rings = y_train_encoded_rings.shape[1] 
num_classes_rings

5

In [204]:
training_data_rings = [
    (X_train_rings[i].reshape(-1, 1), y_train_encoded_rings[i].reshape(-1, 1))
    for i in range(len(X_train_rings))
]

In [205]:
mlp_rings_sigmoid_2 = MLP(sizes=[2, 10, 10, 5], activation_fn=sigmoid,
                activation_fn_derivative=sigmoid_derivative)  # Example layer setup

# Train the MLP using your training data

f1_sigmoid_2 = mlp_rings_sigmoid_2.train(training_data=training_data_rings, epochs=1000, learn_rate=0.01, batch_size=64, X_val=X_test_rings,
                y_val=y_test_rings, visual_interval=10, target=0.75, decay_rate=0.01, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.3865
F1 Score (Weighted): 0.327492427666276
epoch: 10 Test accuracy: 0.517
F1 Score (Weighted): 0.49566571246812774
epoch: 20 Test accuracy: 0.645
F1 Score (Weighted): 0.6342029991949544
epoch: 30 Test accuracy: 0.6595
F1 Score (Weighted): 0.6465113317170362
epoch: 40 Test accuracy: 0.644
F1 Score (Weighted): 0.6418488188416405
epoch: 50 Test accuracy: 0.655
F1 Score (Weighted): 0.65409953449881
epoch: 60 Test accuracy: 0.654
F1 Score (Weighted): 0.656715836580395
epoch: 70 Test accuracy: 0.5905
F1 Score (Weighted): 0.5835914714166471
epoch: 80 Test accuracy: 0.657
F1 Score (Weighted): 0.65422311954933
epoch: 90 Test accuracy: 0.6075
F1 Score (Weighted): 0.5795123141300352
epoch: 100 Test accuracy: 0.6525
F1 Score (Weighted): 0.6439482286786011
epoch: 110 Test accuracy: 0.661
F1 Score (Weighted): 0.657582159974674
epoch: 120 Test accuracy: 0.5695
F1 Score (Weighted): 0.5618587915211405
epoch: 130 Test accuracy: 0.6555
F1 Score (Weighted): 0.6541351381752766
ep

In [206]:
mlp_rings_relu_2 = MLP(sizes=[2, 10, 10, 5], activation_fn=relu,
                activation_fn_derivative=relu_derivative)  # Example layer setup

# Train the MLP using your training data

f1_relu_2 = mlp_rings_relu_2.train(training_data=training_data_rings, epochs=1000, learn_rate=0.01, batch_size=64, X_val=X_test_rings,
                y_val=y_test_rings, visual_interval=10, target=0.75, decay_rate=0.01, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.149
F1 Score (Weighted): 0.1308927361605541
epoch: 10 Test accuracy: 0.32
F1 Score (Weighted): 0.15515151515151515
epoch: 20 Test accuracy: 0.32
F1 Score (Weighted): 0.15515151515151515
epoch: 30 Test accuracy: 0.054
F1 Score (Weighted): 0.005533206831119544
epoch: 40 Test accuracy: 0.054
F1 Score (Weighted): 0.005533206831119544
epoch: 50 Test accuracy: 0.1255
F1 Score (Weighted): 0.027988005330964013
epoch: 60 Test accuracy: 0.346
F1 Score (Weighted): 0.17788410104011884
epoch: 70 Test accuracy: 0.1545
F1 Score (Weighted): 0.04135166738847986
epoch: 80 Test accuracy: 0.1545
F1 Score (Weighted): 0.04135166738847986
epoch: 90 Test accuracy: 0.1545
F1 Score (Weighted): 0.04135166738847986
epoch: 100 Test accuracy: 0.32
F1 Score (Weighted): 0.15515151515151515
epoch: 110 Test accuracy: 0.32
F1 Score (Weighted): 0.15515151515151515
epoch: 120 Test accuracy: 0.32
F1 Score (Weighted): 0.15515151515151515
epoch: 130 Test accuracy: 0.1255
F1 Score (Weighted): 0.02798

In [207]:
mlp_rings_tanh_2 = MLP(sizes=[2, 10, 10, 5], activation_fn=tanh,
                activation_fn_derivative=tanh_derivative)  # Example layer setup

# Train the MLP using your training data

f1_tanh_2 = mlp_rings_tanh_2.train(training_data=training_data_rings, epochs=1000, learn_rate=0.01, batch_size=64, X_val=X_test_rings,
                y_val=y_test_rings, visual_interval=10, target=0.75, decay_rate=0.01, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.261
F1 Score (Weighted): 0.22648686450496275
epoch: 10 Test accuracy: 0.3175
F1 Score (Weighted): 0.23466942598763205
epoch: 20 Test accuracy: 0.33
F1 Score (Weighted): 0.33966421076335834
epoch: 30 Test accuracy: 0.31
F1 Score (Weighted): 0.2934706126847865
epoch: 40 Test accuracy: 0.365
F1 Score (Weighted): 0.33309472642702576
epoch: 50 Test accuracy: 0.4025
F1 Score (Weighted): 0.37989024567877194
epoch: 60 Test accuracy: 0.4415
F1 Score (Weighted): 0.39839079442031605
epoch: 70 Test accuracy: 0.3435
F1 Score (Weighted): 0.27763011803899484
epoch: 80 Test accuracy: 0.43
F1 Score (Weighted): 0.40474476042420854
epoch: 90 Test accuracy: 0.3255
F1 Score (Weighted): 0.23872326447573655
epoch: 100 Test accuracy: 0.4055
F1 Score (Weighted): 0.36819335784060486
epoch: 110 Test accuracy: 0.359
F1 Score (Weighted): 0.34922718941109654
epoch: 120 Test accuracy: 0.318
F1 Score (Weighted): 0.2763762624125083
epoch: 130 Test accuracy: 0.4465
F1 Score (Weighted): 0.39795

In [208]:
mlp_rings_linear_2 = MLP(sizes=[2, 10, 10, 5], activation_fn=linear,
                activation_fn_derivative=linear_derivative)  # Example layer setup

# Train the MLP using your training data

f1_linear_2 = mlp_rings_linear_2.train(training_data=training_data_rings, epochs=1000, learn_rate=0.01, batch_size=64, X_val=X_test_rings,
                y_val=y_test_rings, visual_interval=10, target=0.75, decay_rate=0.01, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.383
F1 Score (Weighted): 0.28441312889004655
epoch: 10 Test accuracy: 0.3195
F1 Score (Weighted): 0.2386646829176723
epoch: 20 Test accuracy: 0.175
F1 Score (Weighted): 0.1876981485446615
epoch: 30 Test accuracy: 0.2565
F1 Score (Weighted): 0.1627732645699262
epoch: 40 Test accuracy: 0.1855
F1 Score (Weighted): 0.2238263609977174
epoch: 50 Test accuracy: 0.261
F1 Score (Weighted): 0.18086192807268628
epoch: 60 Test accuracy: 0.265
F1 Score (Weighted): 0.23549470427921562
epoch: 70 Test accuracy: 0.411
F1 Score (Weighted): 0.37807308312386
epoch: 80 Test accuracy: 0.32
F1 Score (Weighted): 0.26578508261490663
epoch: 90 Test accuracy: 0.3995
F1 Score (Weighted): 0.37221949028691204
epoch: 100 Test accuracy: 0.394
F1 Score (Weighted): 0.3431656175368678
epoch: 110 Test accuracy: 0.399
F1 Score (Weighted): 0.38835241611867277
epoch: 120 Test accuracy: 0.216
F1 Score (Weighted): 0.22181026371092075
epoch: 130 Test accuracy: 0.228
F1 Score (Weighted): 0.240387065636

In [209]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_rings_sigmoid = np.argmax(np.array([mlp_rings_sigmoid_2.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_rings]), axis=1)

In [210]:
# Calculate accuracy or other metrics
accuracy_rings = np.mean(predictions_rings_sigmoid == y_test_rings)
print(f'Test accuracy: {accuracy_rings}')


Test accuracy: 0.7565


In [211]:
# Calculate F1 Score
f1_weighted_rings = f1_score(y_test_rings, predictions_rings_sigmoid, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted_rings}")

F1 Score (Weighted): 0.7552693634940021


In [212]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_rings_relu = np.argmax(np.array([mlp_rings_relu_2.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_rings]), axis=1)

In [213]:
# Calculate accuracy or other metrics
accuracy_rings = np.mean(predictions_rings_relu == y_test_rings)
print(f'Test accuracy: {accuracy_rings}')


Test accuracy: 0.054


In [214]:
# Calculate F1 Score
f1_weighted_rings = f1_score(y_test_rings, predictions_rings_relu, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted_rings}")

F1 Score (Weighted): 0.005533206831119544
