# Defining MLP

In [1]:
import os
os.chdir('C:/Users/Mikol/OneDrive\Documents - one drive/PW/6 SEM/Metody Inteligencji obliczeniowej/Computational_Intelligence_Methods')
print(os.getcwd())

C:\Users\Mikol\OneDrive\Documents - one drive\PW\6 SEM\Metody Inteligencji obliczeniowej\Computational_Intelligence_Methods


In [55]:
# to do
# dodanie wykresow danych
# sprobowanie dobrania wag na podstawie wizualizacji
# ipywidgets
# networkx
# einops
# nnanddl.com
# m.nielsen

In [20]:
import numpy as np
import pandas as pd 

In [21]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [22]:
def sigmoid_derivative(x):
    # Corrected sigmoid derivative to properly compute the derivative
    return sigmoid(x) * (1 - sigmoid(x))

In [23]:
def mse(predictions, targets):
    return np.mean((predictions - targets) ** 2)

In [58]:
class MLP:
    def __init__(self, layer_sizes,function='sigmoid'):
        self.layer_sizes = layer_sizes
        self.weights = [np.random.randn(y, x) * np.sqrt(2. / x)
                        for x, y in zip(layer_sizes[:-1], layer_sizes[1:])]
        self.biases = [np.random.randn(y, 1) for y in layer_sizes[1:]]

        print(self.weights)

    def print_final_weights_and_biases(self):
        print("Final Weights and Biases:")
        for i, (w, b) in enumerate(zip(self.weights, self.biases)):
            print(f"Layer {i + 1} Weights:\n{w}")
            print(f"Layer {i + 1} Biases:\n{b}")

    def feedforward(self, a):
        activations = [a]  # Stores all activations
        for b, w in zip(self.biases[:-1], self.weights[:-1]):
            a = function(np.dot(w, a)+ b) 
            activations.append(a)
        # Linear activation for the last layer
        a = np.dot(self.weights[-1], a) + self.biases[-1]
        activations.append(a)
        return activations[-1], activations  # Return final activation and all activations

    def backprop(self, x, y):
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        final_output, activations = self.feedforward(x)
        zs = [np.dot(w, act) + b for w, b, act in zip(self.weights, self.biases, activations[:-1])]  # Z values

        # Output layer error
        delta = self.cost_derivative(final_output, y)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].T)

        # Backpropagate the error
        for l in range(2, len(self.layer_sizes)):
            sp = sigmoid_derivative(zs[-l])
            delta = np.dot(self.weights[-l + 1].T, delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].T)

        return nabla_w, nabla_b

    def update_mini_batch(self, mini_batch, learning_rate, lambda_, n):
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        for x, y in mini_batch:
            delta_nabla_w, delta_nabla_b = self.backprop(x, y)
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]

        # Update weights with L2 regularization
        self.weights = [(1 - learning_rate * (lambda_ / n)) * w - (learning_rate / len(mini_batch)) * nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (learning_rate / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    def train(self, training_data, epochs, learning_rate, batch_size, lambda_=0.0, update_method='batch',
              plot_interval=None):
        n = len(training_data)
        learning_rate_init = learning_rate
        for j in range(epochs):
            # Plot weights at the specified interval
            if plot_interval and j % plot_interval == 0:
                print(f"Epoch {j}:")
                self.plot_weights()

            np.random.shuffle(training_data)
            if update_method == 'batch':
                mini_batches = [training_data[k:k + batch_size] for k in range(0, n, batch_size)]
                for mini_batch in mini_batches:
                    self.update_mini_batch(mini_batch, learning_rate, lambda_, n)
            elif update_method == 'epoch':
                self.update_mini_batch(training_data, learning_rate, lambda_, n)
            # Learning rate schedule
            learning_rate = learning_rate_init / (1 + 0.01 * j)

    def cost_derivative(self, output_activations, y):
        return (output_activations - y)

# Loading data

In [25]:
df_train_square_simple = pd.read_csv('./data/regression/square-simple-training.csv')
df_test_square_simple = pd.read_csv('./data/regression/square-simple-test.csv')

In [26]:
df_train_steps_large = pd.read_csv('./data/regression/steps-large-training.csv')
df_test_steps_large = pd.read_csv('./data/regression/steps-large-test.csv')

In [27]:
X_train_square_simple = df_train_square_simple['x'].values.reshape(-1, 1)
y_train_square_simple = df_train_square_simple['y'].values.reshape(-1, 1)

In [28]:
X_test_square_simple = df_test_square_simple['x'].values.reshape(-1, 1)
y_test_square_simple = df_test_square_simple['y'].values.reshape(-1, 1)

In [29]:
X_train_steps_large = df_train_steps_large['x'].values.reshape(-1, 1)
y_train_steps_large = df_train_steps_large['y'].values.reshape(-1, 1)

In [30]:
X_test_steps_large = df_test_steps_large['x'].values.reshape(-1, 1)
y_test_steps_large = df_test_steps_large['y'].values.reshape(-1, 1)

### square-simple dataset

In [31]:
training_data = [(x.reshape(-1, 1), y) for x, y in zip(X_train_square_simple, y_train_square_simple)]

In [32]:
mlp_square_1_5 = MLP([1, 5, 5, 1])

[array([[-0.3178034 ],
       [-1.35860276],
       [-0.7973346 ],
       [ 2.51707077],
       [-3.48563842]]), array([[-0.39603628,  0.00842222, -1.07192039,  0.7687568 , -0.702686  ],
       [ 0.04410559, -0.05779429,  0.52977275, -0.05849673, -1.1478853 ],
       [ 0.93462841,  0.35455858, -0.65610594,  1.13065673, -1.33148223],
       [-0.76578203, -1.14361308,  0.49974988,  0.47948689, -0.3011706 ],
       [-0.2290973 , -0.51719187,  0.46233845,  0.637503  , -0.43814228]]), array([[-0.72595545, -0.75349239, -0.04695572,  0.05739594,  0.23746124]])]


In [33]:
architectures = [
    [1, 5, 1],  # 1 hidden layer, 5 neurons
    [1, 10, 1], # 1 hidden layer, 10 neurons
    [1, 5, 5, 1] # 2 hidden layers, 5 neurons each
]

#### testing

In [34]:
mlp_square_1_5.weights = [np.array([[-2.54942087],
                                  [-7.34844221],
                                  [5.66564003],
                                  [9.53355062],
                                  [-7.01508122]]),
                        np.array([[-0.17622762, 6.46607767, -1.73403919, -9.8232896 , 5.05751039],
                                  [15.41458997, -3.11079245, 2.84169739,3.3700297 , -2.49115115],
                                  [-16.41961746,2.87882694,-5.95882206, -6.04112884, 4.5638308],
                                  [1.23799655, 5.96920534, -3.85447961, -8.07376189,5.18789661],
                                  [18.66770321, -5.52870688,6.06619413,4.82976879,-5.13622883]]),
                        np.array([[-57.2694563 , 83.65861736, -55.64452185, -57.08448128, 91.94495515]])]

In [35]:
mlp_square_1_5.biases = [np.array([[ -3.12784542],[ 12.21773855], [ -2.29184127], [-18.85043553],[7.82582976]]), 
                       np.array([[ 2.29814006], [ 2.90077046],[-1.00267574], [ 0.83503552], [-0.45035457]]), 
                       np.array([[29.47562796]])]

In [51]:
# Generate predictions
predictions = np.array([mlp_square_1_5.feedforward(x.reshape(-1, 1))[0] for x in X_test_square_simple])

# Flatten predictions to ensure it has the same shape as y_test
predictions = predictions.reshape(-1, 1)

# Calculate MSE score
for i in range(len(predictions)):
    print(predictions[i], y_test_square_simple[i])
mse_score = mse(predictions, y_test_square_simple)


mlp_square_1_5.print_final_weights_and_biases()

print(f"MSE Score: {mse_score}")

[-43.26893623] [-43.4869178]
[1.74119844] [3.7528081]
[-127.61734773] [-128.61072132]
[5.41472804] [5.67182845]
[-27.13204899] [-27.46916942]
[36.58820409] [36.53905322]
[-55.56152496] [-56.38035958]
[162.914084] [163.71667642]
[-112.32091666] [-109.92253645]
[-43.6426712] [-43.87280887]
[-36.7480211] [-36.76289151]
[-119.46454895] [-117.96377078]
[4.98475999] [5.24004298]
[-127.26991658] [-128.71571315]
[-62.05060959] [-61.08193105]
[2.70645676] [2.94504249]
[-64.90283126] [-66.12375418]
[48.22632553] [45.94659944]
[-35.39413282] [-35.44343445]
[-61.19108615] [-60.21294992]
[151.41307231] [152.56039396]
[-114.44337346] [-113.61118977]
[-96.43017319] [-96.92892366]
[-69.6479941] [-70.87526302]
[-118.39191907] [-117.45634919]
[-60.55551575] [-61.6352831]
[-128.05524753] [-129.40068949]
[-118.07743236] [-116.2889997]
[-22.61677483] [-22.4051127]
[-63.98201014] [-65.18253908]
[-29.72086709] [-30.00706095]
[-127.96757772] [-129.69254744]
[-128.02697226] [-129.76750355]
[-126.61810339] [-12

### steps-large dataset

In [47]:
# to do
# zmien nazwe na mlp_steps_large_1_5

In [37]:
training_data2 = [(x.reshape(-1, 1), y) for x, y in zip(X_train_steps_large, y_train_steps_large)]

In [38]:
mlp_square_1_5_2 = MLP([1, 5, 5, 1])

[array([[ 1.60753003],
       [-1.43086049],
       [ 0.10372784],
       [-0.62329841],
       [ 2.28458598]]), array([[ 0.6275887 ,  0.4226137 ,  1.05106996, -0.33262062,  0.17416605],
       [ 1.45077147, -0.17089911, -0.2106906 ,  0.15254964,  1.07661801],
       [ 0.28463488, -0.04115922,  0.19126677, -1.05308009, -0.79096039],
       [ 0.44392627,  0.93142254,  0.82635424,  0.69077428,  0.20477678],
       [-0.12361262, -0.39711504, -0.48824269,  0.50718232, -0.51692142]]), array([[-0.28844713, -0.28106246,  0.67929255, -0.05915209, -0.07661509]])]


#### testing

In [39]:
mlp_square_1_5_2.weights = [np.array([[  69.5633445 ],
 [  89.18107156],
 [-112.87997316],
 [ -78.51056804],
 [-140.53959512]]),
                        np.array([[2.04677598, 3.51798345, -2.0474786, -3.25657759, -24.34403553],
    [-1.76993074, -2.15143162, 19.48018621, 16.47617387, -4.2941155],
    [12.16542258, 15.26459722, -1.12287186, -4.10102815, -5.0054569],
    [12.87472895, 16.42850268, -1.96435588, -3.13059337, -3.94471487],
    [2.1532062, 1.6002161, -11.70084567, 4.12314203, -2.83026925]]),
                        np.array([[ 67.83489868, -55.74155719,  38.7741544,   40.03413086, 37.66999107]])]

In [40]:
mlp_square_1_5_2.biases = [np.array([[-104.81690917], [-133.95385315], [-57.1567971] , [-39.12040927], [70.41873286]]), 
                       np.array([[ 13.1336255] , [-10.2443378] ,[-12.4314918] ,[-15.3197913] ,[3.43063974]]), 
                       np.array([[-24.33935921]])]

In [41]:
# mlp_square_1_5_2.train(training_data2, epochs=1000, learning_rate=0.01, batch_size=10)

In [53]:

# Generate predictions
predictions2 = np.array([mlp_square_1_5_2.feedforward(x.reshape(-1, 1))[0] for x in X_test_steps_large])

# Flatten predictions to ensure it has the same shape as y_test
predictions2 = predictions2.reshape(-1, 1)

# Calculate MSE score
for i in range(len(predictions2)):
    print(predictions2[i], y_test_steps_large[i])
mse_score2 = mse(predictions2, y_test_steps_large)

mlp_square_1_5_2.print_final_weights_and_biases()
print(f"MSE Score: {mse_score2}")

[159.94517618] [160]
[-80.04583449] [-80]
[-80.04580659] [-80]
[79.98259365] [80]
[-80.04580645] [-80]
[79.98256736] [80]
[0.74794104] [0]
[-80.04580645] [-80]
[-80.04580645] [-80]
[-80.04580645] [-80]
[-80.04580645] [-80]
[-80.04580645] [-80]
[79.77995393] [80]
[159.91756192] [160]
[-0.01098724] [0]
[-0.01340697] [0]
[-80.04580645] [-80]
[79.98256736] [80]
[159.94517624] [160]
[-80.04580645] [-80]
[159.94517624] [160]
[0.27195281] [0]
[-80.04601816] [-80]
[159.94517624] [160]
[159.94517593] [160]
[-80.04580645] [-80]
[-80.04580645] [-80]
[79.98256736] [80]
[0.02055484] [0]
[79.98746911] [80]
[159.94517624] [160]
[-80.04580645] [-80]
[79.98307071] [80]
[79.98256736] [80]
[-0.01340583] [0]
[79.98256736] [80]
[-80.04580645] [-80]
[-0.01340697] [0]
[79.98256736] [80]
[159.94517623] [160]
[-80.04580645] [-80]
[79.97074252] [80]
[159.94517624] [160]
[-0.01340697] [0]
[159.94517547] [160]
[-0.01340697] [0]
[79.98256736] [80]
[-80.04580645] [-80]
[159.94517622] [160]
[79.98256736] [80]
[-80.0