# Defining MLP

In [1]:
import os
os.chdir('/Users/mikolajmroz/Developer/Computational_Intelligence_Methods')
print(os.getcwd())

/Users/mikolajmroz/Developer/Computational_Intelligence_Methods


In [2]:
import numpy as np
import pandas as pd 
import matplotlib as plt

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [4]:
def sigmoid_derivative(x):
    # Corrected sigmoid derivative to properly compute the derivative
    return sigmoid(x) * (1 - sigmoid(x))

In [5]:
def mse(predictions, targets):
    return np.mean((predictions - targets) ** 2)

In [6]:
class MLP:
    def __init__(self, layer_sizes,function='sigmoid'):
        self.layer_sizes = layer_sizes
        self.weights = [np.random.randn(y, x) * np.sqrt(2. / x)
                        for x, y in zip(layer_sizes[:-1], layer_sizes[1:])]
        self.biases = [np.random.randn(y, 1) for y in layer_sizes[1:]]

        print(self.weights)

    def print_final_weights_and_biases(self):
        print("Final Weights and Biases:")
        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            print(f"Layer {i + 1} Weights:\n{w}")
            print(f"Layer {i + 1} Biases:\n{b}")

    def feedforward(self, a):
        activations = [a]  # Stores all activations
        for b, w in zip(self.biases[:-1], self.weights[:-1]):
            a = sigmoid(np.dot(w, a)+ b) 
            activations.append(a)
        # Linear activation for the last layer
        a = np.dot(self.weights[-1], a) + self.biases[-1]
        activations.append(a)
        return activations[-1], activations  # Return final activation and all activations

    def backprop(self, x, y):
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        final_output, activations = self.feedforward(x)
        zs = [np.dot(w, act) + b for w, b, act in zip(self.weights, self.biases, activations[:-1])]  # Z values

        # Output layer error
        delta = self.cost_derivative(final_output, y)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].T)

        # Backpropagate the error
        for l in range(2, len(self.layer_sizes)):
            sp = sigmoid_derivative(zs[-l])
            delta = np.dot(self.weights[-l + 1].T, delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].T)

        return nabla_w, nabla_b

    def update_mini_batch(self, mini_batch, learning_rate, lambda_, n):
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        for x, y in mini_batch:
            delta_nabla_w, delta_nabla_b = self.backprop(x, y)
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]

        # Update weights with L2 regularization
        self.weights = [(1 - learning_rate * (lambda_ / n)) * w - (learning_rate / len(mini_batch)) * nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (learning_rate / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    def train(self, training_data, epochs, learning_rate, batch_size, lambda_=0.0, update_method='batch',
              plot_interval=None):
        n = len(training_data)
        learning_rate_init = learning_rate
        for j in range(epochs):
            # Plot weights at the specified interval
            if plot_interval and j % plot_interval == 0:
                print(f"Epoch {j}:")
                self.plot_weights()

            np.random.shuffle(training_data)
            if update_method == 'batch':
                mini_batches = [training_data[k:k + batch_size] for k in range(0, n, batch_size)]
                for mini_batch in mini_batches:
                    self.update_mini_batch(mini_batch, learning_rate, lambda_, n)
            elif update_method == 'epoch':
                self.update_mini_batch(training_data, learning_rate, lambda_, n)
            # Learning rate schedule
            learning_rate = learning_rate_init / (1 + 0.01 * j)

    def cost_derivative(self, output_activations, y):
        return (output_activations - y)
    
    
    def plot_weights(self):
        # Plotting the average absolute weight values
        epochs, weights = zip(*self.weight_history)
        for layer_idx, layer_weights in enumerate(zip(*weights)):
            plt.plot(epochs, layer_weights, label=f'Layer {layer_idx + 1}')
        
        plt.xlabel('Epoch')
        plt.ylabel('Average Absolute Weight')
        plt.title('Weight Change Over Time')
        plt.legend()
        plt.show()

In [7]:
class DataScaler:
    def __init__(self, method="standardization"):
        self.method = method
        self.min = None
        self.max = None
        self.mean = None
        self.std = None

    def fit_transform(self, data):
        if self.method == "min_max":
            return self.fit_transform_min_max(data)
        elif self.method == "standardization":
            return self.fit_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def transform(self, data):
        if self.method == "min_max":
            return self.transform_min_max(data)
        elif self.method == "standardization":
            return self.transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def inverse_transform(self, data):
        if self.method == "min_max":
            return self.inverse_transform_min_max(data)
        elif self.method == "standardization":
            return self.inverse_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def fit_transform_min_max(self, data):
        self.min = np.min(data, axis=0)
        self.max = np.max(data, axis=0)
        return (data - self.min) / (self.max - self.min)

    def transform_min_max(self, data):
        return (data - self.min) / (self.max - self.min)

    def inverse_transform_min_max(self, data):
        return data * (self.max - self.min) + self.min

    def fit_transform_standardization(self, data):
        self.mean = np.mean(data, axis=0)
        self.std = np.std(data, axis=0)
        return (data - self.mean) / self.std

    def transform_standardization(self, data):
        return (data - self.mean) / self.std

    def inverse_transform_standardization(self, data):
        return data * self.std + self.mean


# Loading data

In [8]:
df_train_square_simple = pd.read_csv('./data/regression/square-simple-training.csv')
df_test_square_simple = pd.read_csv('./data/regression/square-simple-test.csv')

In [9]:
df_train_steps_large = pd.read_csv('./data/regression/steps-large-training.csv')
df_test_steps_large = pd.read_csv('./data/regression/steps-large-test.csv')

In [10]:
X_train_square_simple = df_train_square_simple['x'].values.reshape(-1, 1)
y_train_square_simple = df_train_square_simple['y'].values.reshape(-1, 1)

In [11]:
X_test_square_simple = df_test_square_simple['x'].values.reshape(-1, 1)
y_test_square_simple = df_test_square_simple['y'].values.reshape(-1, 1)

In [12]:
X_train_steps_large = df_train_steps_large['x'].values.reshape(-1, 1)
y_train_steps_large = df_train_steps_large['y'].values.reshape(-1, 1)

In [13]:
X_test_steps_large = df_test_steps_large['x'].values.reshape(-1, 1)
y_test_steps_large = df_test_steps_large['y'].values.reshape(-1, 1)

### square-simple dataset

In [14]:
# Initialize the scaler for X and y with the desired scaling method
scaler_X = DataScaler(method="standardization")
scaler_y = DataScaler(method="standardization")


# Fit and transform the training data
X_train_scaled = scaler_X.fit_transform(X_train_square_simple)
y_train_scaled = scaler_y.fit_transform(y_train_square_simple)


In [15]:
training_data = [(x.reshape(-1, 1), y) for x, y in zip(X_train_square_simple, y_train_square_simple)]

In [16]:
mlp_square_1_5 = MLP([1, 5, 5, 1])

[array([[-0.30928401],
       [-1.03162708],
       [ 0.22585629],
       [-1.93205162],
       [ 0.0724469 ]]), array([[ 0.14076145,  0.53548206,  0.33485327, -0.19264993,  0.55876384],
       [ 1.31150638, -0.13294195,  0.76188053, -0.46474823,  0.97924198],
       [-0.16537783, -0.78102817,  0.56696817, -0.69204738, -0.2596795 ],
       [-0.25947767, -0.1056244 , -0.21452221, -0.41172364, -0.86687076],
       [ 0.04183953,  0.18273437,  0.5208909 ,  0.08828803,  0.81789013]]), array([[-0.47307463,  0.40560605,  0.45939493,  0.1335294 ,  0.32059465]])]


In [17]:
training_data_scaled = [
    (x.reshape(-1, 1), y) for x, y in zip(X_train_scaled, y_train_scaled)
]

In [18]:
mlp_square_1_5.train(
    training_data_scaled, epochs=10000, learning_rate=1, batch_size=10
)

#### testing

In [19]:
# Scale the test data using the transform method
X_test_scaled = scaler_X.transform(X_test_square_simple)

In [20]:
# Generate predictions on the scaled test data
predictions_scaled = np.array(
    [mlp_square_1_5.feedforward(x.reshape(-1, 1))[0] for x in X_test_scaled]
)

In [21]:
# Correctly denormalize predictions
predictions = scaler_y.inverse_transform(predictions_scaled.reshape(-1, 1))

# Calculate MSE score
for i in range(len(predictions)):
    print("predicted value: ",  predictions[i], "actual value: ", y_test_square_simple[i])
mse_score = mse(predictions, y_test_square_simple)

print(f"MSE Score: {mse_score}")

predicted value:  [-41.69411424] actual value:  [-43.4869178]
predicted value:  [9.03814764] actual value:  [3.7528081]
predicted value:  [-126.41480027] actual value:  [-128.61072132]
predicted value:  [4.94585653] actual value:  [5.67182845]
predicted value:  [-29.18525623] actual value:  [-27.46916942]
predicted value:  [37.33228161] actual value:  [36.53905322]
predicted value:  [-56.0651424] actual value:  [-56.38035958]
predicted value:  [164.70683239] actual value:  [163.71667642]
predicted value:  [-112.22197307] actual value:  [-109.92253645]
predicted value:  [-42.12400405] actual value:  [-43.87280887]
predicted value:  [-38.55467583] actual value:  [-36.76289151]
predicted value:  [-119.49625384] actual value:  [-117.96377078]
predicted value:  [4.49553958] actual value:  [5.24004298]
predicted value:  [-127.98368752] actual value:  [-128.71571315]
predicted value:  [-62.53881745] actual value:  [-61.08193105]
predicted value:  [2.1039756] actual value:  [2.94504249]
predic

In [22]:
# now we can see that the mlp is able to learn and give us mse<4

### steps-large dataset

In [23]:
mlp_steps = MLP([1, 10, 5, 5, 1])

[array([[-1.79247161e+00],
       [ 2.04006677e+00],
       [ 2.40143855e-03],
       [ 7.33167002e-01],
       [-2.89432788e+00],
       [ 1.23374425e+00],
       [-3.46886882e-01],
       [ 1.59660700e+00],
       [ 1.19682664e+00],
       [-3.27430547e-01]]), array([[ 0.11736947,  0.15110849,  0.15908108,  0.52453264,  0.04734362,
         0.5207038 ,  0.43264569, -0.01873716, -0.0101868 ,  0.39511559],
       [ 0.16773907,  0.28280446, -0.07712897,  0.83825282,  0.65601633,
        -0.28309343, -0.43728905, -0.21323026, -0.67085444, -0.15657081],
       [ 0.59429469, -0.31465926, -0.41152953, -0.01929895, -0.15080366,
        -0.50873545,  0.52438177,  0.32906505,  0.51540403, -0.15620252],
       [ 0.47637948,  0.19261327, -0.38696045,  0.43862943, -0.13888629,
        -1.02148455, -0.503669  ,  0.31877931,  0.18163016, -0.61430501],
       [-0.30042575,  0.25629724,  0.80249395, -0.44277365, -0.37774083,
        -0.34882995,  0.16459672, -0.51188016,  0.00343354,  0.1025923 ]]), 

In [24]:
# Fit and transform the training data
X_train_scaled2 = scaler_X.fit_transform(X_train_steps_large)
y_train_scaled2 = scaler_y.fit_transform(y_train_steps_large)


In [25]:
training_data_scaled2 = [
    (x.reshape(-1, 1), y) for x, y in zip(X_train_scaled2, y_train_scaled2)
]

In [None]:
mlp_steps.train(
    training_data_scaled, epochs=10000, learning_rate=0.1, batch_size=10
)

#### testing

In [None]:
# Scale the test data using the transform method
X_test_scaled2 = scaler_X.transform(X_test_steps_large)

In [None]:
# Generate predictions on the scaled test data
predictions_scaled2 = np.array(
    [mlp_steps.feedforward(x.reshape(-1, 1))[0] for x in X_test_scaled2]
)

In [None]:
# Correctly denormalize predictions
predictions2 = scaler_y.inverse_transform(predictions_scaled2.reshape(-1, 1))

# Calculate MSE score
for i in range(len(predictions2)):
    print("predicted value: ",  predictions2[i], "actual value: ", y_test_steps_large[i])
mse_score2 = mse(predictions2, y_test_steps_large)

print(f"MSE Score: {mse_score2}")

### multimodal-large dataset