# Defining MLP

In [302]:
import os
os.chdir('/Users/mikolajmroz/Developer/Computational_Intelligence_Methods')
print(os.getcwd())

/Users/mikolajmroz/Developer/Computational_Intelligence_Methods


In [303]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import f1_score

In [304]:
def relu(x):
    return np.maximum(0, x)

In [305]:
def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [309]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [310]:
def sigmoid_derivative(x):
    # Corrected sigmoid derivative to properly compute the derivative
    return sigmoid(x) * (1 - sigmoid(x))

In [311]:
def mse(predictions, targets):
    return np.mean((predictions - targets) ** 2)

In [312]:
def softmax(x):
    exp_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
    return exp_x / exp_x.sum(axis=0, keepdims=True)

In [313]:
def cross_entropy(softmax_output, y_true):
    # Assuming y_true is one-hot encoded
    m = y_true.shape[1]  # Number of examples
    log_likelihood = -np.log(softmax_output[y_true.argmax(axis=0), range(m)])
    loss = np.sum(log_likelihood) / m
    return loss

In [314]:
def cross_entropy_derivative(softmax_output, y_true):

    corrected_softmax_output = softmax_output - y_true
    
    return corrected_softmax_output
        


In [315]:
def softmax_derivative(softmax_output):
    # For softmax combined with cross-entropy loss, the derivative simplifies
    # the gradient calculation in backpropagation, directly using output error.
    return softmax_output

In [393]:
class MLP:
    def __init__(self, sizes, activation_fn=relu, activation_fn_derivative=relu_derivative):
        self.layer_sizes = sizes
        self.activation_fn = activation_fn
        self.layer_weights = [np.random.randn(y, x) * np.sqrt(2. / x) for x, y in zip(sizes[:-1], sizes[1:])]
        self.layer_biases = [np.zeros((y, 1)) for y in sizes[1:]]
        self.activation_fn_derivative = activation_fn_derivative

    def display_weights_biases(self):
        print("Final Weights and Biases:")
        for layer_index, (weights, biases) in enumerate(zip(self.layer_weights, self.layer_biases)):
            print(f"Layer {layer_index + 1} Weights:\n{weights}")
            print(f"Layer {layer_index + 1} Biases:\n{biases}")

    def propagate_forward(self, input_activation):
        activations = [input_activation]
        for biases, weights in zip(self.layer_biases, self.layer_weights[:-1]):
            input_activation = self.activation_fn(np.dot(weights, input_activation) + biases)
            activations.append(input_activation)
        final_input = np.dot(self.layer_weights[-1], input_activation) + self.layer_biases[-1]
        output_activation = softmax(final_input)
        activations.append(output_activation)
        # change
        return output_activation, activations

    def backward_propagation(self, input_val, true_val):
        weight_gradients = [np.zeros(weight.shape) for weight in self.layer_weights]
        bias_gradients = [np.zeros(bias.shape) for bias in self.layer_biases]
        final_act, activations = self.propagate_forward(input_val)
        error = cross_entropy_derivative(final_act, true_val)
        # change
        bias_gradients[-1] = error
        weight_gradients[-1] = np.dot(error, activations[-2].T)
        for l in range(2, len(self.layer_sizes)):
            z = np.dot(self.layer_weights[-l+1].T, error)
            error = self.activation_fn_derivative(z) * z
            bias_gradients[-l] = error
            weight_gradients[-l] = np.dot(error, activations[-l-1].T)
        return weight_gradients, bias_gradients
    
    def update_batch(self, batch, learn_rate, regularization, total_size, optimization_method, beta, epsilon=1e-8):
        gradient_w = [np.zeros(weight.shape) for weight in self.layer_weights]
        gradient_b = [np.zeros(bias.shape) for bias in self.layer_biases]
        
        for input_val, true_val in batch:
            delta_gradient_w, delta_gradient_b = self.backward_propagation(input_val, true_val)
            gradient_w = [w + dw for w, dw in zip(gradient_w, delta_gradient_w)]
            gradient_b = [b + db for b, db in zip(gradient_b, delta_gradient_b)]

        # Update rule for weights and biases based on the optimization method
        if optimization_method == 'momentum':
            # Momentum initialization
            if not hasattr(self, 'velocity_weights'):
                self.velocity_weights = [np.zeros_like(w) for w in self.layer_weights]
                self.velocity_biases = [np.zeros_like(b) for b in self.layer_biases]

            # Update velocities
            self.velocity_weights = [beta * vw + (1 - beta) * gw / len(batch) for vw, gw in zip(self.velocity_weights, gradient_w)]
            self.velocity_biases = [beta * vb + (1 - beta) * gb / len(batch) for vb, gb in zip(self.velocity_biases, gradient_b)]
            
            # Update weights and biases
            self.layer_weights = [(1 - learn_rate * (regularization / total_size)) * w - learn_rate * vw
                                  for w, vw in zip(self.layer_weights, self.velocity_weights)]
            self.layer_biases = [b - learn_rate * vb for b, vb in zip(self.layer_biases, self.velocity_biases)]
        elif optimization_method == 'rmsprop':
            # RMSprop initialization
            if not hasattr(self, 'squared_gradients_weights'):
                self.squared_gradients_weights = [np.zeros_like(w) for w in self.layer_weights]
                self.squared_gradients_biases = [np.zeros_like(b) for b in self.layer_biases]

            # Update squared gradients
            self.squared_gradients_weights = [beta * sgw + (1 - beta) * (gw**2) / len(batch)
                                              for sgw, gw in zip(self.squared_gradients_weights, gradient_w)]
            self.squared_gradients_biases = [beta * sgb + (1 - beta) * (gb**2) / len(batch)
                                             for sgb, gb in zip(self.squared_gradients_biases, gradient_b)]
            
            # Update weights and biases
            self.layer_weights = [(1 - learn_rate * (regularization / total_size)) * w - 
                                  learn_rate * gw / (np.sqrt(sgw) + epsilon)
                                  for w, sgw, gw in zip(self.layer_weights, self.squared_gradients_weights, gradient_w)]
            self.layer_biases = [b - learn_rate * gb / (np.sqrt(sgb) + epsilon)
                                 for b, sgb, gb in zip(self.layer_biases, self.squared_gradients_biases, gradient_b)]

    def train(self, training_data, epochs, learn_rate, batch_size, regularization=0.0, optimization_method='rmsprop', beta=0.9, epsilon=1e-8, visual_interval=10, X_val=None, y_val=None, target = None,adaptive_learn_rate = True, decay_rate=0.1, decay_step=100):
        n = len(training_data)
        
        # Determine mini-batch size based on whether the batch_size_input is a percentage or fixed value
        if isinstance(batch_size, float):  # If batch_size_input is a float, treat it as a percentage
            batch_size = max(1, min(n, int(n * batch_size / 100)))
        elif isinstance(batch_size, int):  # If batch_size_input is an integer, treat it as a fixed size
            batch_size = max(1, min(n, batch_size))
        else:  # Raise an error if batch_size_input is neither float nor int
            raise ValueError("batch_size_input must be an integer (fixed size) or a float (percentage of dataset)")
        
        
        for epoch in range(epochs):
            np.random.shuffle(training_data)
            mini_batches = [training_data[k:k + batch_size] for k in range(0, n, batch_size)]
    
            for mini_batch in mini_batches:
                self.update_batch(mini_batch, learn_rate, regularization, n, optimization_method, beta, epsilon)
            if adaptive_learn_rate:
                # Decay the learning rate every decay_step epochs
                if epoch % decay_step == 0 and epoch > 0:
                    learn_rate *= (1. / (1. + decay_rate * epoch))
    
            if epoch % visual_interval == 0:
                predictions = np.argmax(np.array([self.propagate_forward(x.reshape(-1, 1))[0] for x in X_val]), axis=1)
                accuracy = np.mean(predictions == y_val)
                print(f'epoch: {epoch}', f'Test accuracy: {accuracy}')
                f1_weighted = f1_score(y_val, predictions, average='weighted')
                print(f"F1 Score (Weighted): {f1_weighted}")
                
                if f1_weighted > target:
                    break



In [350]:
class DataScaler:
    def __init__(self, method="standardization"):
        self.method = method
        self.min = None
        self.max = None
        self.mean = None
        self.std = None

    def fit_transform(self, data):
        if self.method == "min_max":
            return self.fit_transform_min_max(data)
        elif self.method == "standardization":
            return self.fit_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def transform(self, data):
        if self.method == "min_max":
            return self.transform_min_max(data)
        elif self.method == "standardization":
            return self.transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def inverse_transform(self, data):
        if self.method == "min_max":
            return self.inverse_transform_min_max(data)
        elif self.method == "standardization":
            return self.inverse_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def fit_transform_min_max(self, data):
        self.min = np.min(data, axis=0)
        self.max = np.max(data, axis=0)
        return (data - self.min) / (self.max - self.min)

    def transform_min_max(self, data):
        return (data - self.min) / (self.max - self.min)

    def inverse_transform_min_max(self, data):
        return data * (self.max - self.min) + self.min

    def fit_transform_standardization(self, data):
        self.mean = np.mean(data, axis=0)
        self.std = np.std(data, axis=0)
        return (data - self.mean) / self.std

    def transform_standardization(self, data):
        return (data - self.mean) / self.std

    def inverse_transform_standardization(self, data):
        return data * self.std + self.mean

In [351]:
def plot_mse(mse_history):
    plt.plot(mse_history)
    plt.title('MSE Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error')
    plt.show()

# Loading data

In [352]:
df_train_rings3_regular = pd.read_csv('./data/classification/rings3-regular-training.csv')
df_test_rings3_regular = pd.read_csv('./data/classification/rings3-regular-test.csv')

In [353]:
df_train_easy = pd.read_csv('./data/classification/easy-training.csv')
df_test_easy = pd.read_csv('./data/classification/easy-test.csv')

In [354]:
df_train_xor3 = pd.read_csv('./data/classification/xor3-training.csv')
df_test_xor3 = pd.read_csv('./data/classification/xor3-test.csv')

### rings 3 regular dataset

In [394]:
scaler_X = DataScaler("standardization")

In [395]:
# Scale features
X1_train_rings = df_train_rings3_regular[['x']].values.reshape(-1, 1)
X1_test_rings = df_test_rings3_regular[['x']].values.reshape(-1, 1)


In [396]:
X2_train_rings = df_train_rings3_regular[['y']].values.reshape(-1, 1)
X2_test_rings = df_test_rings3_regular[['y']].values.reshape(-1, 1)

In [397]:
X_train_rings = np.hstack((X1_train_rings, X2_train_rings))
X_test_rings = np.hstack((X1_test_rings, X2_test_rings))

In [398]:
X_train_rings_scaled = np.hstack((scaler_X.fit_transform(X1_train_rings), scaler_X.fit_transform(X2_train_rings)))
X_test_rings_scaled = np.hstack((scaler_X.transform(X1_test_rings), scaler_X.transform(X2_test_rings)))

In [399]:
y_train_rings = df_train_rings3_regular['c'].values.reshape(-1, 1)
y_test_rings = df_test_rings3_regular['c'].values.reshape(-1, 1)

In [400]:
# Encode the 'c' column into one-hot vectors for the training and test datasets
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded_rings = encoder.fit_transform(y_train_rings)
y_test_encoded_rings = encoder.transform(y_test_rings)

In [401]:
num_classes_rings = y_train_encoded_rings.shape[1] 
num_classes_rings

3

In [402]:
training_data_rings = [
    (X_train_rings[i].reshape(-1, 1), y_train_encoded_rings[i].reshape(-1, 1))
    for i in range(len(X_train_rings))
]

In [None]:
import warnings

#suppress warnings
warnings.filterwarnings('ignore')



mlp_rings = MLP(sizes=[2, 5, 5, 3], activation_fn = sigmoid, activation_fn_derivative = sigmoid_derivative)  # Example layer setup

# Train the MLP using your training data

mlp_rings.train(training_data=training_data_rings, epochs=10000, learn_rate=0.1, batch_size=24, X_val=X_test_rings, y_val=y_test_rings, visual_interval=10, target = 0.75, decay_rate=0.01, adaptive_learn_rate=False)

In [271]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_rings = np.argmax(np.array([mlp_rings.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_rings]), axis=1)

In [272]:
# Calculate accuracy or other metrics
accuracy_rings = np.mean(predictions_rings == y_test_rings)
print(f'Test accuracy: {accuracy_rings}')


Test accuracy: 0.628


In [273]:
# Calculate F1 Score
f1_weighted_rings = f1_score(y_test_rings, predictions_rings, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted_rings}")

F1 Score (Weighted): 0.631703204250066


### easy dataset

In [372]:
# Scale features
X1_train_easy = df_train_easy[['x']].values.reshape(-1, 1)
X1_test_easy = df_test_easy[['x']].values.reshape(-1, 1)


In [373]:
X2_train_easy = df_train_easy[['y']].values.reshape(-1, 1)
X2_test_easy = df_test_easy[['y']].values.reshape(-1, 1)

In [374]:
X_train_easy= np.hstack((X1_train_easy, X2_train_easy))
X_test_easy = np.hstack((X1_test_easy, X2_test_easy))

In [375]:
y_train_easy = df_train_easy['c'].values.reshape(-1, 1)
y_test_easy = df_test_easy['c'].values.reshape(-1, 1)

In [376]:
# Encode the 'c' column into one-hot vectors for the training and test datasets
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded_easy = encoder.fit_transform(y_train_easy)
y_test_encoded_easy = encoder.transform(y_test_easy)

In [377]:
num_classes_easy = y_train_encoded_easy.shape[1] 
num_classes_easy

2

In [378]:
training_data_easy = [
    (X_train_easy[i].reshape(-1, 1), y_train_encoded_easy[i].reshape(-1, 1))
    for i in range(len(X_train_easy))
]

In [382]:
mlp_easy = MLP(sizes=[2, 2, 2])  

# Train the MLP using your training data
mlp_easy.train(training_data=training_data_easy, epochs=100, learn_rate=0.01, batch_size=20, X_val=X_test_easy, y_val=y_test_easy, visual_interval=10, target = 0.99, decay_rate=0.001, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.792
F1 Score (Weighted): 0.7850488078726637
epoch: 10 Test accuracy: 0.952
F1 Score (Weighted): 0.9519631147540983
epoch: 20 Test accuracy: 0.99
F1 Score (Weighted): 0.9899981994166109
epoch: 30 Test accuracy: 0.99
F1 Score (Weighted): 0.9899981994166109
epoch: 40 Test accuracy: 0.992
F1 Score (Weighted): 0.991998975737789


In [391]:
mlp_easy2 = MLP(sizes=[2, 2, 2], activation_fn=sigmoid, activation_fn_derivative=sigmoid_derivative)  # Example layer setup

# Train the MLP using your training data
mlp_easy2.train(training_data=training_data_easy, epochs=100, learn_rate=0.01, batch_size=20, X_val=X_test_easy, y_val=y_test_easy, visual_interval=10, target = 0.99, decay_rate=0.001, adaptive_learn_rate=False)

epoch: 0 Test accuracy: 0.914
F1 Score (Weighted): 0.9139845149828545
epoch: 10 Test accuracy: 0.956
F1 Score (Weighted): 0.9559302135567695


  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


epoch: 20 Test accuracy: 0.962
F1 Score (Weighted): 0.9619566409768793
epoch: 30 Test accuracy: 0.97
F1 Score (Weighted): 0.9699801904121593


  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


epoch: 40 Test accuracy: 0.968
F1 Score (Weighted): 0.9679754098360657
epoch: 50 Test accuracy: 0.97
F1 Score (Weighted): 0.97000036000144


  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


epoch: 60 Test accuracy: 0.986
F1 Score (Weighted): 0.9859956859119814
epoch: 70 Test accuracy: 0.98
F1 Score (Weighted): 0.9799887912123104


  return 1 / (1 + np.exp(-x))
  return 1 / (1 + np.exp(-x))


epoch: 80 Test accuracy: 0.978
F1 Score (Weighted): 0.9779932226716437
epoch: 90 Test accuracy: 0.988
F1 Score (Weighted): 0.987997118847539


  return 1 / (1 + np.exp(-x))


In [383]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_easy = np.argmax(np.array([mlp_easy.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_easy]), axis=1)

In [384]:

# Calculate accuracy or other metrics
accuracy_easy = np.mean(predictions_easy == y_test_easy)
print(f'Test accuracy: {accuracy_easy}')


Test accuracy: 0.992


In [385]:
# Calculate F1 Score
f1_weighted_easy = f1_score(y_test_easy, predictions_easy, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted_easy}")

F1 Score (Weighted): 0.991998975737789


### xor3 dataset

In [248]:
# Scale features
X1_train_xor3 = df_train_xor3[['x']].values.reshape(-1, 1)
X1_test_xor3 = df_test_xor3[['x']].values.reshape(-1, 1)


In [249]:
X2_train_xor3 = df_train_xor3[['y']].values.reshape(-1, 1)
X2_test_xor3 = df_test_xor3[['y']].values.reshape(-1, 1)

In [250]:
X_train_xor3= np.hstack((X1_train_xor3, X2_train_xor3))
X_test_xor3 = np.hstack((X1_test_xor3, X2_test_xor3))

In [251]:
y_train_xor3 = df_train_xor3['c'].values.reshape(-1, 1)
y_test_xor3 = df_test_xor3['c'].values.reshape(-1, 1)

In [252]:
# Encode the 'c' column into one-hot vectors for the training and test datasets
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded_xor3 = encoder.fit_transform(y_train_xor3)
y_test_encoded_xor3 = encoder.transform(y_test_xor3)

In [253]:
num_classes_xor3 = y_train_encoded_xor3.shape[1] 
num_classes_xor3

2

In [254]:
training_data_xor3 = [
    (X_train_xor3[i].reshape(-1, 1), y_train_encoded_xor3[i].reshape(-1, 1))
    for i in range(len(X_train_xor3))
]

In [255]:
# Initialize the MLP with the proper layer sizes.
# For example, with 2 input features, a hidden layer with 10 neurons, and output layer matching the number of classes
# num_classes_rings = y_train_encoded_rings.shape[1]  # Assuming y_train_encoded is one-hot encoded
mlp_xor3 = MLP(sizes=[2, 12, 2])  # Example layer setup

# Train the MLP using your training data
# This step will vary depending on the exact implementation of your `train` method
# For example:
mlp_xor3.train(training_data=training_data_xor3, epochs=100000, learn_rate=0.01, batch_size=20, X_val=X_test_xor3, y_val=y_test_xor3, visual_interval=100, target = 0.97)

epoch: 0 Test accuracy: 0.548
F1 Score (Weighted): 0.5476745547928767
epoch: 100 Test accuracy: 0.584
F1 Score (Weighted): 0.48200701754385966
epoch: 200 Test accuracy: 0.56
F1 Score (Weighted): 0.44939588433798716
epoch: 300 Test accuracy: 0.642
F1 Score (Weighted): 0.6044149259264541
epoch: 400 Test accuracy: 0.68
F1 Score (Weighted): 0.6613734692776609
epoch: 500 Test accuracy: 0.686
F1 Score (Weighted): 0.670225873201643
epoch: 600 Test accuracy: 0.686
F1 Score (Weighted): 0.670225873201643
epoch: 700 Test accuracy: 0.686
F1 Score (Weighted): 0.670225873201643
epoch: 800 Test accuracy: 0.686
F1 Score (Weighted): 0.670225873201643
epoch: 900 Test accuracy: 0.686
F1 Score (Weighted): 0.670225873201643
epoch: 1000 Test accuracy: 0.686
F1 Score (Weighted): 0.670225873201643
epoch: 1100 Test accuracy: 0.686
F1 Score (Weighted): 0.670225873201643
epoch: 1200 Test accuracy: 0.686
F1 Score (Weighted): 0.670225873201643
epoch: 1300 Test accuracy: 0.686
F1 Score (Weighted): 0.670225873201643

KeyboardInterrupt: 

In [256]:
# Predict on the test set
# This might involve looping through X_test_scaled and using your model's predict method
predictions_xor3 = np.argmax(np.array([mlp_xor3.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_xor3]), axis=1)

In [257]:

# Calculate accuracy or other metrics
accuracy_xor3 = np.mean(predictions_xor3 == y_test_xor3)
print(f'Test accuracy: {accuracy_xor3}')


Test accuracy: 0.686


In [258]:
# Calculate F1 Score
f1_weighted_xor3 = f1_score(y_test_xor3, predictions_xor3, average='weighted')

print(f"F1 Score (Weighted): {f1_weighted_xor3}")

F1 Score (Weighted): 0.670225873201643
