# Defining MLP

In [2]:
import os
os.chdir('/Users/mikolajmroz/Developer/Computational_Intelligence_Methods')
print(os.getcwd())

/Users/mikolajmroz/Developer/Computational_Intelligence_Methods


In [3]:
# to do
# dodanie wykresow danych
# sprobowanie dobrania wag na podstawie wizualizacji
# ipywidgets
# networkx
# einops
# nnanddl.com
# m.nielsen

In [4]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [5]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [6]:
def sigmoid_derivative(x):
    # Corrected sigmoid derivative to properly compute the derivative
    return sigmoid(x) * (1 - sigmoid(x))

In [7]:
def mse(predictions, targets):
    return np.mean((predictions - targets) ** 2)

In [8]:
class MLP:
    def __init__(self, layer_sizes,function='sigmoid'):
        self.layer_sizes = layer_sizes
        self.weights = [np.random.randn(y, x) * np.sqrt(1. / (x + y))
                        for x, y in zip(layer_sizes[:-1], layer_sizes[1:])]
        self.biases = [np.random.randn(y, 1) for y in layer_sizes[1:]]

        self.function = sigmoid

        print(self.weights)

    def print_final_weights_and_biases(self):
        print("Final Weights and Biases:")
        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            print(f"Layer {i + 1} Weights:\n{w}")
            print(f"Layer {i + 1} Biases:\n{b}")

    def feedforward(self, a):
        activations = [a]  # Stores all activations
        for b, w in zip(self.biases[:-1], self.weights[:-1]):
            a = self.function(np.dot(w, a)+ b) 
            activations.append(a)
        # Linear activation for the last layer
        a = np.dot(self.weights[-1], a) + self.biases[-1]
        activations.append(a)
        return activations[-1], activations  # Return final activation and all activations
    
    def backprop(self, x, y):
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        final_output, activations = self.feedforward(x)
        zs = [np.dot(w, act) + b for w, b, act in zip(self.weights, self.biases, activations[:-1])]  # Z values

        # Output layer error
        delta = self.cost_derivative(final_output, y)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].T)

        # Backpropagate the error
        for l in range(2, len(self.layer_sizes)):
            sp = sigmoid_derivative(zs[-l])
            delta = np.dot(self.weights[-l + 1].T, delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].T)

        return nabla_w, nabla_b

    def update_mini_batch(self, mini_batch, learning_rate, lambda_, n):
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        for x, y in mini_batch:
            delta_nabla_w, delta_nabla_b = self.backprop(x, y)
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]

        # Update weights with L2 regularization
        self.weights = [(1 - learning_rate * (lambda_ / n)) * w - (learning_rate / len(mini_batch)) * nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (learning_rate / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    def train(self, training_data, epochs, learning_rate, batch_size, lambda_=0.0, update_method='batch',
              plot_interval=None):
        n = len(training_data)
        learning_rate_init = learning_rate
        for j in range(epochs):
            # Plot weights at the specified interval
            if plot_interval and j % plot_interval == 0:
                print(f"Epoch {j}:")
                self.plot_weights()

            np.random.shuffle(training_data)
            if update_method == 'batch':
                mini_batches = [training_data[k:k + batch_size] for k in range(0, n, batch_size)]
                for mini_batch in mini_batches:
                    self.update_mini_batch(mini_batch, learning_rate, lambda_, n)
            elif update_method == 'epoch':
                self.update_mini_batch(training_data, learning_rate, lambda_, n)
            # Learning rate schedule
            learning_rate = learning_rate_init / (1 + 0.01 * j)

    

    

    def cost_derivative(self, output_activations, y):
        return (output_activations - y)

# Loading data

In [9]:
df_train_square_simple = pd.read_csv('./data/regression/square-simple-training.csv')
df_test_square_simple = pd.read_csv('./data/regression/square-simple-test.csv')

In [10]:
df_train_steps_large = pd.read_csv('./data/regression/steps-large-training.csv')
df_test_steps_large = pd.read_csv('./data/regression/steps-large-test.csv')

In [11]:
X_train_square_simple = df_train_square_simple['x'].values.reshape(-1, 1)
y_train_square_simple = df_train_square_simple['y'].values.reshape(-1, 1)

In [12]:
X_test_square_simple = df_test_square_simple['x'].values.reshape(-1, 1)
y_test_square_simple = df_test_square_simple['y'].values.reshape(-1, 1)

In [13]:
X_train_steps_large = df_train_steps_large['x'].values.reshape(-1, 1)
y_train_steps_large = df_train_steps_large['y'].values.reshape(-1, 1)

In [14]:
X_test_steps_large = df_test_steps_large['x'].values.reshape(-1, 1)
y_test_steps_large = df_test_steps_large['y'].values.reshape(-1, 1)

### square-simple dataset

In [15]:
architectures = [
    [1, 5, 1],  # 1 hidden layer, 5 neurons
    [1, 10, 1], # 1 hidden layer, 10 neurons
    [1, 5, 5, 1] # 2 hidden layers, 5 neurons each
]

#### testing

In [16]:
mlp_square_1_5 = MLP([1, 5, 1])

[array([[-0.4926634 ],
       [ 0.48363244],
       [-0.15798159],
       [ 0.52593425],
       [-0.58679239]]), array([[ 0.19955095,  0.19189285,  0.24128465, -0.31362163, -0.05226099]])]


In [17]:
def update_model_and_calculate_mse(weight1_1, weight1_2, weight1_3, weight1_4, weight1_5,  weight2_1, weight2_2, weight2_3, weight2_4, weight2_5, bias1_1, bias1_2, bias1_3, bias1_4, bias1_5, bias2_1):
    # Update model weights and biases
    mlp_square_1_5.weights = [np.array([[weight1_1 ],[weight1_2], [weight1_3], [weight1_4], [ weight1_5]]),
                        np.array([[ weight2_1, weight2_2, weight2_3, weight2_4, weight2_5 ]])]
    mlp_square_1_5.biases = [np.array([[ bias1_1 ],[ bias1_2], [bias1_3],[bias1_4 ],[bias1_5]]), 
                       np.array([[bias2_1]])]

    # Generate predictions
    predictions = np.array([mlp_square_1_5.feedforward(x.reshape(-1, 1))[0] for x in X_test_square_simple])
    predictions = predictions.reshape(-1, 1)

    # Calculate MSE
    mse_score = mse(predictions, y_test_square_simple)
    
    # Optionally, plot the predictions vs. actual values
    plt.scatter(X_test_square_simple, y_test_square_simple, label='Actual')
    plt.scatter(X_test_square_simple, predictions, label='Predicted')
    plt.legend()
    plt.show()
    
    print(f"MSE Score: {mse_score}")

In [18]:
import ipywidgets as widgets
from IPython.display import display

weight1_1_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=4.5, description='Weight 1_1:')
weight1_2_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=-5, description='Weight 1_2:')
weight1_3_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=-7.5, description='Weight 1_3:')
weight1_4_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=9.5, description='Weight 1_4:')
weight1_5_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=6.5, description='Weight 1_5:')
weight2_1_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=120, description='Weight 2_1:')
weight2_2_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=93, description='Weight 2_2:')
weight2_3_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=114, description='Weight 2_3:')
weight2_4_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=130.8, description='Weight 2_4:')
weight2_5_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=131.9, description='Weight 2_5:')

bias1_1_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=-3.5, description='Bias 1_1:')
bias1_2_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=-3.5, description='Bias 1_2:')
bias1_3_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=-9.3, description='Bias 1_3:')
bias1_4_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=-17.5, description='Bias 1_4:')
bias1_5_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=-9.1, description='Bias 1_5:')
bias2_1_slider = widgets.FloatSlider(min=-200, max=200, step=0.1, value=-133, description='Bias 2_1:')

ui = widgets.VBox([weight1_1_slider, weight1_2_slider, weight1_3_slider, weight1_4_slider, weight1_5_slider, weight2_1_slider, weight2_2_slider, weight2_3_slider,
                    weight2_4_slider, weight2_5_slider, bias1_1_slider, bias1_2_slider,bias1_3_slider, bias1_4_slider, bias1_5_slider, bias2_1_slider])
out = widgets.interactive_output(update_model_and_calculate_mse, {
    'weight1_1': weight1_1_slider, 'weight1_2': weight1_2_slider, 'weight1_3': weight1_3_slider, 
    'weight1_4': weight1_4_slider, 'weight1_5': weight1_5_slider, 'weight2_1': weight2_1_slider, 
    'weight2_2': weight2_2_slider, 'weight2_3': weight2_3_slider, 'weight2_4': weight2_4_slider, 
    'weight2_5': weight2_5_slider, 'bias1_1': bias1_1_slider, 'bias1_2': bias1_2_slider,
     'bias1_3': bias1_3_slider, 'bias1_4': bias1_4_slider, 'bias1_5': bias1_5_slider, 'bias2_1': bias2_1_slider
})

display(ui, out)


VBox(children=(FloatSlider(value=4.5, description='Weight 1_1:', max=200.0, min=-200.0), FloatSlider(value=-5.…

Output()

# ![title](./weights_1.png)

In [19]:
mlp_square_1_5.weights = [np.array([[4.5 ],
 [-5],
 [-7.5],
 [9.5],
 [ 6.5]]),
                        np.array([[ 120,93,114,130.8,131.9 ]])]

In [20]:
mlp_square_1_5.biases = [np.array([[-3.5 ],[ -3.5], [-9.3],[-17.5 ],[-9.1]]), 
                       np.array([[-133]])]

In [21]:
# Generate predictions
predictions = np.array([mlp_square_1_5.feedforward(x.reshape(-1, 1))[0] for x in X_test_square_simple])

# Flatten predictions to ensure it has the same shape as y_test
predictions = predictions.reshape(-1, 1)

# Calculate MSE score
for i in range(len(predictions)):
    print(predictions[i], y_test_square_simple[i])
mse_score = mse(predictions, y_test_square_simple)


mlp_square_1_5.print_final_weights_and_biases()

print(f"MSE Score: {mse_score}")

[-44.08085155] [-43.4869178]
[6.09369869] [3.7528081]
[-125.4578745] [-128.61072132]
[5.85938698] [5.67182845]
[-24.93012349] [-27.46916942]
[37.69076082] [36.53905322]
[-56.03180735] [-56.38035958]
[163.42415125] [163.71667642]
[-109.64648915] [-109.92253645]
[-44.4455946] [-43.87280887]
[-33.19326025] [-36.76289151]
[-117.42728281] [-117.96377078]
[5.43328322] [5.24004298]
[-126.04972326] [-128.71571315]
[-55.81149708] [-61.08193105]
[3.18074845] [2.94504249]
[-65.02151605] [-66.12375418]
[44.59374871] [45.94659944]
[-36.31217265] [-35.44343445]
[-54.95751323] [-60.21294992]
[151.10769622] [152.56039396]
[-111.93229028] [-113.61118977]
[-94.18945917] [-96.92892366]
[-69.4997245] [-70.87526302]
[-115.66872322] [-117.45634919]
[-60.85955171] [-61.6352831]
[-126.12561697] [-129.40068949]
[-115.87799078] [-116.2889997]
[-22.90707511] [-22.4051127]
[-64.14420423] [-65.18253908]
[-27.18471134] [-30.00706095]
[-126.63598479] [-129.69254744]
[-126.67372296] [-129.76750355]
[-124.16472706] [-

### steps-large dataset

In [22]:
mlp_steps_1_5 = MLP([1, 5, 1])

[array([[-0.0262937 ],
       [-0.80158368],
       [-0.34337898],
       [-0.81051365],
       [-0.35161635]]), array([[ 0.11244529,  0.26520627, -0.8873419 ,  0.17926583, -0.3877005 ]])]


In [23]:
def update_model_and_calculate_mse(weight1_1, weight1_2, weight1_3, weight1_4, weight1_5,  weight2_1, weight2_2, weight2_3, weight2_4, weight2_5, bias1_1, bias1_2, bias1_3, bias1_4, bias1_5, bias2_1):
    # Update model weights and biases
    mlp_steps_1_5.weights = [np.array([[weight1_1 ],[weight1_2], [weight1_3], [weight1_4], [ weight1_5]]),
                        np.array([[ weight2_1, weight2_2, weight2_3, weight2_4, weight2_5 ]])]
    mlp_steps_1_5.biases = [np.array([[ bias1_1 ],[ bias1_2], [bias1_3],[bias1_4 ],[bias1_5]]), 
                       np.array([[bias2_1]])]

    # Generate predictions
    predictions2 = np.array([mlp_steps_1_5.feedforward(x.reshape(-1, 1))[0] for x in X_test_steps_large])
    predictions2 = predictions2.reshape(-1, 1)

    # Calculate MSE
    mse_score2 = mse(predictions2, y_test_steps_large)
    
    # Optionally, plot the predictions vs. actual values
    plt.scatter(X_test_steps_large, y_test_steps_large, label='Actual')
    plt.scatter(X_test_steps_large, predictions2, label='Predicted')
    plt.legend()
    plt.show()
    
    print(f"MSE Score: {mse_score2}")

In [24]:
import ipywidgets as widgets
from IPython.display import display

weight1_1_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=268, description='Weight 1_1:')
weight1_2_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=-1, description='Weight 1_2:')
weight1_3_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=-2, description='Weight 1_3:')
weight1_4_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=220, description='Weight 1_4:')
weight1_5_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=330, description='Weight 1_5:')
weight2_1_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=80, description='Weight 2_1:')
weight2_2_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=2, description='Weight 2_2:')
weight2_3_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=35, description='Weight 2_3:')
weight2_4_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=81, description='Weight 2_4:')
weight2_5_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=81, description='Weight 2_5:')

bias1_1_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=133, description='Bias 1_1:')
bias1_2_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=0, description='Bias 1_2:')
bias1_3_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=8, description='Bias 1_3:')
bias1_4_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=-329, description='Bias 1_4:')
bias1_5_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=-164, description='Bias 1_5:')
bias2_1_slider2 = widgets.FloatSlider(min=-350, max=350, step=0.1, value=-117, description='Bias 2_1:')

ui2 = widgets.VBox([weight1_1_slider2, weight1_2_slider2, weight1_3_slider2, weight1_4_slider2, weight1_5_slider2, weight2_1_slider2, weight2_2_slider2, weight2_3_slider2,
                    weight2_4_slider2, weight2_5_slider2, bias1_1_slider2, bias1_2_slider2,bias1_3_slider2, bias1_4_slider2, bias1_5_slider2, bias2_1_slider2])
out2 = widgets.interactive_output(update_model_and_calculate_mse, {
    'weight1_1': weight1_1_slider2, 'weight1_2': weight1_2_slider2, 'weight1_3': weight1_3_slider2, 
    'weight1_4': weight1_4_slider2, 'weight1_5': weight1_5_slider2, 'weight2_1': weight2_1_slider2, 
    'weight2_2': weight2_2_slider2, 'weight2_3': weight2_3_slider2, 'weight2_4': weight2_4_slider2, 
    'weight2_5': weight2_5_slider2, 'bias1_1': bias1_1_slider2, 'bias1_2': bias1_2_slider2,
     'bias1_3': bias1_3_slider2, 'bias1_4': bias1_4_slider2, 'bias1_5': bias1_5_slider2, 'bias2_1': bias2_1_slider2
})

display(ui2, out2)


VBox(children=(FloatSlider(value=268.0, description='Weight 1_1:', max=350.0, min=-350.0), FloatSlider(value=-…

Output()

# ![title](./weights_2.png)

#### testing

In [25]:
mlp_steps_1_5 = MLP([1, 5, 1])

[array([[-0.32034403],
       [ 0.30144855],
       [ 0.44165741],
       [-0.22246852],
       [ 0.63889537]]), array([[ 0.01825946,  0.24727544,  0.61627315, -0.17863841,  0.01948348]])]


In [26]:
mlp_steps_1_5.weights = [np.array([[268 ],
 [-1],
 [-2],
 [220],
 [330]]),
                        np.array([[ 80,2,35,81,81 ]])]

In [27]:
mlp_steps_1_5.biases = [np.array([[133 ],[ 0], [8],[-329],[-164]]), 
                       np.array([[-117]])]

In [28]:

# Generate predictions
predictions2 = np.array([mlp_steps_1_5.feedforward(x.reshape(-1, 1))[0] for x in X_test_steps_large])

# Flatten predictions to ensure it has the same shape as y_test
predictions2 = predictions2.reshape(-1, 1)

# Calculate MSE score
for i in range(len(predictions2)):
    print(predictions2[i], y_test_steps_large[i])
mse_score2 = mse(predictions2, y_test_steps_large)

mlp_steps_1_5.print_final_weights_and_biases()
print(f"MSE Score: {mse_score2}")

[159.9539419] [160]
[-80.7100967] [-80]
[-80.67806932] [-80]
[79.24356359] [80]
[-80.38767339] [-80]
[79.36104278] [80]
[-0.7826168] [0]
[-80.38677205] [-80]
[-80.64388806] [-80]
[-80.61539697] [-80]
[-80.39572061] [-80]
[-80.40351471] [-80]
[79.68199083] [80]
[160.0145535] [160]
[-1.24238309] [0]
[-1.05642578] [0]
[-80.58072157] [-80]
[79.45259488] [80]
[159.68609923] [160]
[-80.42238189] [-80]
[159.91212457] [160]
[-1.25111297] [0]
[-80.7268688] [-80]
[159.72550196] [160]
[159.97698654] [160]
[-80.53952061] [-80]
[-80.5086482] [-80]
[79.47748707] [80]
[-1.25234926] [0]
[79.19311538] [80]
[159.66481986] [160]
[-80.41785094] [-80]
[79.2152488] [80]
[79.46549141] [80]
[-0.86831881] [0]
[79.49849498] [80]
[-80.38161263] [-80]
[-0.99469202] [0]
[79.34570415] [80]
[159.92696888] [160]
[-80.62725022] [-80]
[79.70126452] [80]
[159.81749817] [160]
[-0.96594095] [0]
[159.98919714] [160]
[-1.07399599] [0]
[79.59951907] [80]
[-80.53154774] [-80]
[159.93876826] [160]
[79.41912285] [80]
[-80.43530