# Defining MLP

In [1]:
import os
os.chdir('/Users/mikolajmroz/Developer/Computational_Intelligence_Methods')
print(os.getcwd())

/Users/mikolajmroz/Developer/Computational_Intelligence_Methods


In [2]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [3]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [4]:
def sigmoid_derivative(x):
    # Corrected sigmoid derivative to properly compute the derivative
    return sigmoid(x) * (1 - sigmoid(x))

In [5]:
def mse(predictions, targets):
    return np.mean((predictions - targets) ** 2)

In [6]:
class MLP:
    def __init__(self, sizes, activation_fn='sigmoid'):
        self.layer_sizes = sizes
        
        # Initialize weights with He initialization and biases with zeros
        self.layer_weights = [
            np.random.randn(next_layer, prev_layer) * np.sqrt(2.0 / (prev_layer + next_layer))
            for prev_layer, next_layer in zip(sizes[:-1], sizes[1:])
        ]
        self.layer_biases = [np.zeros((neurons, 1)) for neurons in sizes[1:]]

    def display_weights_biases(self):
        # Print the final weights and biases after training
        print("Final Weights and Biases:")
        for layer_index, (weights, biases) in enumerate(zip(self.layer_weights, self.layer_biases)):
            print(f"Layer {layer_index + 1} Weights:\n{weights}")
            print(f"Layer {layer_index + 1} Biases:\n{biases}")

    def propagate_forward(self, input_activation):
        # Forward pass through the network to get the output
        activations = [input_activation]  # Store all layer activations
        for biases, weights in zip(self.layer_biases[:-1], self.layer_weights[:-1]):
            input_activation = sigmoid(np.dot(weights, input_activation) + biases)
            activations.append(input_activation)
        # Apply linear activation for the last layer
        input_activation = np.dot(self.layer_weights[-1], input_activation) + self.layer_biases[-1]
        activations.append(input_activation)
        return activations[-1], activations

    def backward_propagation(self, input_val, true_val):
        # Backpropagate error and calculate gradient for weights and biases
        weight_gradients = [np.zeros(weight.shape) for weight in self.layer_weights]
        bias_gradients = [np.zeros(bias.shape) for bias in self.layer_biases]
        final_act, acts = self.propagate_forward(input_val)
        z_values = [np.dot(weight, act) + bias for weight, bias, act in zip(self.layer_weights, self.layer_biases, acts[:-1])]

        # Calculate error at the output layer
        error = self.cost_derivative(final_act, true_val)
        bias_gradients[-1] = error
        weight_gradients[-1] = np.dot(error, acts[-2].T)

        # Backpropagate the error to previous layers
        for l in range(2, len(self.layer_sizes)):
            sigmoid_grad = sigmoid_derivative(z_values[-l])
            error = np.dot(self.layer_weights[-l + 1].T, error) * sigmoid_grad
            bias_gradients[-l] = error
            weight_gradients[-l] = np.dot(error, acts[-l - 1].T)

        return weight_gradients, bias_gradients

    def update_batch(self, batch, learn_rate, regularization, total_size, optimization_method, beta, epsilon=None):
        gradient_w = [np.zeros(weight.shape) for weight in self.layer_weights]
        gradient_b = [np.zeros(bias.shape) for bias in self.layer_biases]
        for input_val, true_val in batch:
            delta_gradient_w, delta_gradient_b = self.backward_propagation(input_val, true_val)
            gradient_w = [w + dw for w, dw in zip(gradient_w, delta_gradient_w)]
            gradient_b = [b + db for b, db in zip(gradient_b, delta_gradient_b)]

        if optimization_method == 'momentum':
            if not hasattr(self, 'velocity_weights'):
                self.velocity_weights = [np.zeros_like(w) for w in self.layer_weights]
                self.velocity_biases = [np.zeros_like(b) for b in self.layer_biases]
                
            self.velocity_weights = [beta * vw + (1 - beta) * gw for vw, gw in zip(self.velocity_weights, gradient_w)]
            self.velocity_biases = [beta * vb + (1 - beta) * gb for vb, gb in zip(self.velocity_biases, gradient_b)]
            self.layer_weights = [(1 - learn_rate * (regularization / total_size)) * w - learn_rate * vw
                                  for w, vw in zip(self.layer_weights, self.velocity_weights)]
            self.layer_biases = [b - learn_rate * vb for b, vb in zip(self.layer_biases, self.velocity_biases)]
            
        elif optimization_method == 'rmsprop':
            if not hasattr(self, 'squared_gradients_weights'):
                self.squared_gradients_weights = [np.zeros_like(w) for w in self.layer_weights]
                self.squared_gradients_biases = [np.zeros_like(b) for b in self.layer_biases]
                
            self.squared_gradients_weights = [beta * sgw + (1 - beta) * (gw**2)
                                              for sgw, gw in zip(self.squared_gradients_weights, gradient_w)]
            self.squared_gradients_biases = [beta * sgb + (1 - beta) * (gb**2)
                                             for sgb, gb in zip(self.squared_gradients_biases, gradient_b)]
            self.layer_weights = [(1 - learn_rate * (regularization / total_size)) * w - 
                                  (learn_rate / (np.sqrt(sgw) + epsilon)) * gw
                                  for w, sgw, gw in zip(self.layer_weights, self.squared_gradients_weights, gradient_w)]
            self.layer_biases = [b - (learn_rate / (np.sqrt(sgb) + epsilon)) * gb
                                 for b, sgb, gb in zip(self.layer_biases, self.squared_gradients_biases, gradient_b)]
    
    def train(self, training_set, epoch_count, learn_rate, batch_size_input, regularization=0.0,
              visual_interval=100, optimization_method='momentum', beta=0.9, epsilon=1e-8, X_val_scaled = None, y_val_scaled = None, X_val = None, y_val = None,
             scaler_X = None, scaler_y = None, mse_limit = None):

        mse_history = []
        data_size = len(training_set)
        rate_init = learn_rate
        mse_history = []  # To store MSE values if tracking is enabled
    
        # Determine mini_batch_size based on input type
        if isinstance(batch_size_input, float):  # Input is percentage
            mini_batch_size = max(1, min(data_size, int(data_size * batch_size_input / 100)))
        elif isinstance(batch_size_input, int):  # Input is fixed value
            mini_batch_size = max(1, min(data_size, batch_size_input))
        else:
            raise ValueError("batch_size_input must be an integer (fixed size) or a float (percentage of dataset)")
        
        for epoch in range(epoch_count):
            np.random.shuffle(training_set)
            mini_batches = [training_set[k:k + mini_batch_size] for k in range(0, data_size, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_batch(mini_batch, learn_rate, regularization, data_size, optimization_method, beta, epsilon)
    
            learn_rate = rate_init / (1 + 0.01 * epoch)

            if epoch % visual_interval == 0 or epoch == epoch_count - 1:
                # Generate predictions on the scaled test data
                predictions_scaled_mse = np.array(
                    [self.propagate_forward(x.reshape(-1, 1))[0] for x in X_val_scaled])            
        
                predictions_mse = scaler_y.inverse_transform(predictions_scaled_mse.reshape(-1, 1))
        
                current_mse = mse(predictions_mse, y_val)
                mse_history.append(current_mse)
                print(f'Epoch {epoch}, MSE: {current_mse}')

                if mse_limit!= None:
                    if current_mse < mse_limit:
                        break

        return nms_history



    def cost_derivative(self, output_acts, true_val):
        # Calculate the derivative of the cost function
        return (output_acts - true_val)

    
    
    
    # def visualize_weights(self):
    #     # Visualize how the weights change over time during training
    #     epochs, layer_weights = zip(*self.weight_history)
    #     for layer_index, weights in enumerate(zip(*layer_weights)):
    #         plt.plot(epochs, [np.mean(np.abs(weight)) for weight in weights], label=f'Layer {layer_index + 1}')
        
    #     plt.xlabel('Epoch')
    #     plt.ylabel('Average Absolute Weight')
    #     plt.title('Weight Change Over Time')
    #     plt.legend()
    #     plt.show()


In [7]:
class DataScaler:
    def __init__(self, method="standardization"):
        self.method = method
        self.min = None
        self.max = None
        self.mean = None
        self.std = None

    def fit_transform(self, data):
        if self.method == "min_max":
            return self.fit_transform_min_max(data)
        elif self.method == "standardization":
            return self.fit_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def transform(self, data):
        if self.method == "min_max":
            return self.transform_min_max(data)
        elif self.method == "standardization":
            return self.transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def inverse_transform(self, data):
        if self.method == "min_max":
            return self.inverse_transform_min_max(data)
        elif self.method == "standardization":
            return self.inverse_transform_standardization(data)
        else:
            raise ValueError("Unsupported scaling method")

    def fit_transform_min_max(self, data):
        self.min = np.min(data, axis=0)
        self.max = np.max(data, axis=0)
        return (data - self.min) / (self.max - self.min)

    def transform_min_max(self, data):
        return (data - self.min) / (self.max - self.min)

    def inverse_transform_min_max(self, data):
        return data * (self.max - self.min) + self.min

    def fit_transform_standardization(self, data):
        self.mean = np.mean(data, axis=0)
        self.std = np.std(data, axis=0)
        return (data - self.mean) / self.std

    def transform_standardization(self, data):
        return (data - self.mean) / self.std

    def inverse_transform_standardization(self, data):
        return data * self.std + self.mean

In [8]:
def plot_mse(mse_history):
    plt.plot(mse_history)
    plt.title('MSE Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Squared Error')
    plt.show()

# Loading data

In [9]:
df_train_square_large = pd.read_csv('./data/regression/square-large-training.csv')
df_test_square_large = pd.read_csv('./data/regression/square-large-test.csv')

In [10]:
df_train_steps_large = pd.read_csv('./data/regression/steps-large-training.csv')
df_test_steps_large = pd.read_csv('./data/regression/steps-large-test.csv')

In [11]:
df_train_multimodal_large = pd.read_csv('./data/regression/multimodal-large-training.csv')
df_test_multimodal_large = pd.read_csv('./data/regression/multimodal-large-test.csv')

In [12]:
X_train_square_large = df_train_square_large['x'].values.reshape(-1, 1)
y_train_square_large = df_train_square_large['y'].values.reshape(-1, 1)

In [13]:
X_test_square_large = df_test_square_large['x'].values.reshape(-1, 1)
y_test_square_large = df_test_square_large['y'].values.reshape(-1, 1)

In [14]:
X_train_steps_large = df_train_steps_large['x'].values.reshape(-1, 1)
y_train_steps_large = df_train_steps_large['y'].values.reshape(-1, 1)

In [15]:
X_test_steps_large = df_test_steps_large['x'].values.reshape(-1, 1)
y_test_steps_large = df_test_steps_large['y'].values.reshape(-1, 1)

In [16]:
X_train_multimodal_large = df_train_multimodal_large['x'].values.reshape(-1, 1)
y_train_multimodal_large = df_train_multimodal_large['y'].values.reshape(-1, 1)

In [17]:
X_test_multimodal_large = df_test_multimodal_large['x'].values.reshape(-1, 1)
y_test_multimodal_large = df_test_multimodal_large['y'].values.reshape(-1, 1)

### square-large dataset

In [40]:
scaler_X = DataScaler("standardization")
scaler_y = DataScaler("standardization")

In [41]:
# Fit and transform the training data
X_train_scaled = scaler_X.fit_transform(X_train_square_large)
y_train_scaled = scaler_y.fit_transform(y_train_square_large)


In [42]:
# Scale the test data using the transform method
X_test_scaled = scaler_X.transform(X_test_square_large)
y_test_scaled = scaler_y.transform(y_test_square_large)

In [43]:
training_data = [(x.reshape(-1, 1), y) for x, y in zip(X_train_square_large, y_train_square_large)]

In [44]:
mlp_square_1_5 = MLP([1, 6, 1] )

In [45]:
training_data_scaled = [
    (x.reshape(-1, 1), y) for x, y in zip(X_train_scaled, y_train_scaled)
]

In [46]:
mlp_square_1_5.train(
    training_data_scaled, epoch_count=100000, learn_rate=0.01, batch_size_input=10,
    optimization_method='rmsprop', beta=0.9, epsilon=1e-8, 
    X_val_scaled = X_test_scaled, y_val_scaled = y_test_scaled, X_val = X_test_square_large, y_val = y_test_square_large, scaler_y = scaler_y,
    mse_limit = 1
)

Epoch 0, MSE: 12511.82069943605
Epoch 100, MSE: 64.05912080639389
Epoch 200, MSE: 30.359719250086457
Epoch 300, MSE: 23.630608640812977
Epoch 400, MSE: 22.96054721148793
Epoch 500, MSE: 21.22253257778965
Epoch 600, MSE: 20.458765413373627
Epoch 700, MSE: 18.809284532618598
Epoch 800, MSE: 18.539251565602783
Epoch 900, MSE: 17.614731405595453
Epoch 1000, MSE: 17.01039762381977
Epoch 1100, MSE: 15.78937238871312
Epoch 1200, MSE: 14.596001264539261
Epoch 1300, MSE: 14.015458841216372
Epoch 1400, MSE: 13.22890993818409
Epoch 1500, MSE: 12.455293443780022
Epoch 1600, MSE: 12.875151384616304
Epoch 1700, MSE: 12.29359554837417
Epoch 1800, MSE: 11.974822347964881
Epoch 1900, MSE: 11.39754511433841
Epoch 2000, MSE: 10.721484318950308
Epoch 2100, MSE: 11.524914676225066
Epoch 2200, MSE: 10.240878895108183
Epoch 2300, MSE: 10.448946973301052
Epoch 2400, MSE: 9.811744997917879
Epoch 2500, MSE: 9.370001441148963
Epoch 2600, MSE: 9.804349443226222
Epoch 2700, MSE: 9.725980403376052
Epoch 2800, MSE: 

In [47]:
mlp_square_1_5.train(
    training_data_scaled, epoch_count=100000, learn_rate=0.01, batch_size_input=10,
    optimization_method='rmsprop', beta=0.9, epsilon=1e-8, 
    X_val_scaled = X_test_scaled, y_val_scaled = y_test_scaled, X_val = X_test_square_large, y_val = y_test_square_large, scaler_y = scaler_y,
    mse_limit = 1
)

Epoch 0, MSE: 44.67832051624681
Epoch 100, MSE: 0.7565299005584123


#### testing

In [48]:
# Generate predictions on the scaled test data
predictions_scaled = np.array(
    [mlp_square_1_5.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_scaled]
)

In [49]:
# Correctly denormalize predictions
predictions = scaler_y.inverse_transform(predictions_scaled.reshape(-1, 1))

# Calculate MSE score
for i in range(len(predictions)):
    print("predicted value: ",  predictions[i], "actual value: ", y_test_square_large[i])
mse_score = mse(predictions, y_test_square_large)

print(f"MSE Score: {mse_score}")

predicted value:  [38.49427634] actual value:  [38.0481491]
predicted value:  [-115.72082742] actual value:  [-116.30927998]
predicted value:  [-22.53286383] actual value:  [-22.98888333]
predicted value:  [164.10941715] actual value:  [163.49243459]
predicted value:  [-50.29650107] actual value:  [-50.79309017]
predicted value:  [26.6054464] actual value:  [26.14905307]
predicted value:  [-95.72393709] actual value:  [-95.99910634]
predicted value:  [-1.2115722] actual value:  [-1.67979449]
predicted value:  [12.47332111] actual value:  [11.84130953]
predicted value:  [-36.14264237] actual value:  [-36.5786339]
predicted value:  [-49.44451707] actual value:  [-49.85187483]
predicted value:  [-38.27769511] actual value:  [-38.70966897]
predicted value:  [-89.02209665] actual value:  [-89.65420844]
predicted value:  [151.78750905] actual value:  [151.89903669]
predicted value:  [-85.89259297] actual value:  [-86.1941224]
predicted value:  [171.59388175] actual value:  [172.6273849]
pred

### multimodal-large dataset

In [58]:
# Initialize the scaler for X and y with the desired scaling method
scaler_X = DataScaler(method="min_max")
scaler_y = DataScaler(method="min_max")

In [59]:
# Fit and transform the training data
X_train_scaled = scaler_X.fit_transform(X_train_multimodal_large)
y_train_scaled = scaler_y.fit_transform(y_train_multimodal_large)


In [60]:
# Scale the test data using the transform method
X_test_scaled = scaler_X.transform(X_test_multimodal_large)
y_test_scaled = scaler_y.transform(y_test_multimodal_large)

In [61]:
training_data = [(x.reshape(-1, 1), y) for x, y in zip(X_train_multimodal_large, y_train_multimodal_large)]

In [62]:
mlp_square_1_5 = MLP([1, 64, 32, 32, 1] )

In [63]:
training_data_scaled = [
    (x.reshape(-1, 1), y) for x, y in zip(X_train_scaled, y_train_scaled)
]

In [64]:
mlp_square_1_5.train(
    training_data_scaled, epoch_count=100000, learn_rate=0.01, batch_size_input=10,
    optimization_method='rmsprop', beta=0.9, epsilon=1e-8, 
    X_val_scaled = X_test_scaled, y_val_scaled = y_test_scaled, X_val = X_test_multimodal_large, y_val = y_test_multimodal_large, scaler_y = scaler_y,
    mse_limit = 9, visual_interval = 10
)

Epoch 0, MSE: 2571.1971656482024
Epoch 10, MSE: 348.41584548405484
Epoch 20, MSE: 263.2458292892844
Epoch 30, MSE: 409.9253375023777
Epoch 40, MSE: 38.93305934514824
Epoch 50, MSE: 16.587640718544957
Epoch 60, MSE: 28.80631621035437
Epoch 70, MSE: 96.12693360612931
Epoch 80, MSE: 53.86544386635945
Epoch 90, MSE: 10.879003919396458
Epoch 100, MSE: 13.706533099237326
Epoch 110, MSE: 32.58020158254048
Epoch 120, MSE: 12.455829040130626
Epoch 130, MSE: 14.884080465540016
Epoch 140, MSE: 9.285608209270947
Epoch 150, MSE: 10.671187652705187
Epoch 160, MSE: 20.159966162393573
Epoch 170, MSE: 5.853932407854354


#### testing

In [65]:
# Generate predictions on the scaled test data
predictions_scaled = np.array(
    [mlp_square_1_5.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_scaled]
)

In [67]:
# Correctly denormalize predictions
predictions = scaler_y.inverse_transform(predictions_scaled.reshape(-1, 1))

# Calculate MSE score
for i in range(len(predictions)):
    print("predicted value: ",  predictions[i], "actual value: ", y_test_multimodal_large[i])
mse_score = mse(predictions, y_test_multimodal_large)

print(f"MSE Score: {mse_score}")

predicted value:  [-97.52083727] actual value:  [-98.2081664]
predicted value:  [-54.25551153] actual value:  [-55.28389066]
predicted value:  [99.65196547] actual value:  [100.29999676]
predicted value:  [4.09228165] actual value:  [2.7206294]
predicted value:  [-73.86446255] actual value:  [-75.99163605]
predicted value:  [37.69087725] actual value:  [36.4875116]
predicted value:  [28.26103944] actual value:  [32.9071937]
predicted value:  [51.89040415] actual value:  [54.59378752]
predicted value:  [69.45158391] actual value:  [75.88595471]
predicted value:  [16.72811997] actual value:  [15.93020098]
predicted value:  [32.52955311] actual value:  [36.41048972]
predicted value:  [100.20211309] actual value:  [100.17958374]
predicted value:  [-95.59186227] actual value:  [-96.19000955]
predicted value:  [99.29064866] actual value:  [100.29859577]
predicted value:  [-63.8971626] actual value:  [-64.55778832]
predicted value:  [69.35791887] actual value:  [73.08482928]
predicted value: 

### steps-large dataset

In [101]:
scaler_X = DataScaler(method="standardization")
scaler_y = DataScaler(method="standardization")


In [102]:
# Fit and transform the training data
X_train_scaled = scaler_X.fit_transform(X_train_steps_large)
y_train_scaled = scaler_y.fit_transform(y_train_steps_large)


In [103]:
# Scale the test data using the transform method
X_test_scaled = scaler_X.transform(X_test_steps_large)
y_test_scaled = scaler_y.transform(y_test_steps_large)

In [104]:
training_data = [(x.reshape(-1, 1), y) for x, y in zip(X_train_steps_large, y_train_steps_large)]

In [105]:
mlp_square_1_5 = MLP([1, 64, 64, 1] )

In [106]:
training_data_scaled = [
    (x.reshape(-1, 1), y) for x, y in zip(X_train_scaled, y_train_scaled)
]

In [107]:
mlp_square_1_5.train(
    training_data_scaled, epoch_count=100000, learn_rate=0.01, batch_size_input=10,
    optimization_method='rmsprop', beta=0.9, epsilon=1e-8, 
    X_val_scaled = X_test_scaled, y_val_scaled = y_test_scaled, X_val = X_test_steps_large, y_val = y_test_steps_large, scaler_y = scaler_y,
    mse_limit = 3, visual_interval = 10
)

Epoch 0, MSE: 389.38104398917005
Epoch 10, MSE: 131.4631568409038
Epoch 20, MSE: 78.73668777124136
Epoch 30, MSE: 59.69330932649057
Epoch 40, MSE: 49.47363285603985
Epoch 50, MSE: 26.439567845801832
Epoch 60, MSE: 100.19287454377563
Epoch 70, MSE: 79.30133793804217
Epoch 80, MSE: 70.41169262209822
Epoch 90, MSE: 72.40865362621943
Epoch 100, MSE: 31.768227223722842
Epoch 110, MSE: 49.00744649140912
Epoch 120, MSE: 45.408671237979505
Epoch 130, MSE: 30.925341013682797
Epoch 140, MSE: 24.315648547748186
Epoch 150, MSE: 27.542804988015313
Epoch 160, MSE: 21.066193000285235
Epoch 170, MSE: 18.742684019331236
Epoch 180, MSE: 39.33370989471286
Epoch 190, MSE: 44.825338790023665
Epoch 200, MSE: 17.257452079863306
Epoch 210, MSE: 31.82506431917291
Epoch 220, MSE: 19.090811219310417
Epoch 230, MSE: 47.53458422379407
Epoch 240, MSE: 26.881143084053992
Epoch 250, MSE: 25.191847693130534
Epoch 260, MSE: 19.264624280995346
Epoch 270, MSE: 14.402354790996998
Epoch 280, MSE: 15.67970375371662
Epoch 29

KeyboardInterrupt: 

#### testing

In [65]:
# Generate predictions on the scaled test data
predictions_scaled = np.array(
    [mlp_square_1_5.propagate_forward(x.reshape(-1, 1))[0] for x in X_test_scaled]
)

In [67]:
# Correctly denormalize predictions
predictions = scaler_y.inverse_transform(predictions_scaled.reshape(-1, 1))

# Calculate MSE score
for i in range(len(predictions)):
    print("predicted value: ",  predictions[i], "actual value: ", y_test_steps_large[i])
mse_score = mse(predictions, y_test_steps_large)

print(f"MSE Score: {mse_score}")

predicted value:  [-97.52083727] actual value:  [-98.2081664]
predicted value:  [-54.25551153] actual value:  [-55.28389066]
predicted value:  [99.65196547] actual value:  [100.29999676]
predicted value:  [4.09228165] actual value:  [2.7206294]
predicted value:  [-73.86446255] actual value:  [-75.99163605]
predicted value:  [37.69087725] actual value:  [36.4875116]
predicted value:  [28.26103944] actual value:  [32.9071937]
predicted value:  [51.89040415] actual value:  [54.59378752]
predicted value:  [69.45158391] actual value:  [75.88595471]
predicted value:  [16.72811997] actual value:  [15.93020098]
predicted value:  [32.52955311] actual value:  [36.41048972]
predicted value:  [100.20211309] actual value:  [100.17958374]
predicted value:  [-95.59186227] actual value:  [-96.19000955]
predicted value:  [99.29064866] actual value:  [100.29859577]
predicted value:  [-63.8971626] actual value:  [-64.55778832]
predicted value:  [69.35791887] actual value:  [73.08482928]
predicted value: 

In [574]:
df_test_steps_large = pd.read_csv("data/regression/steps-large-test.csv")

X_test_steps_small = df_test_steps_large["x"].values.reshape(-1, 1)
y_test_steps_small = df_test_steps_large["y"].values.reshape(-1, 1)

In [575]:
mlp_steps = MLP([1, 5, 5, 1])

[array([[ 0.22499592],
       [ 0.66393297],
       [ 0.81579735],
       [-0.93381363],
       [-1.56887485]]), array([[ 0.48408992, -0.90893581,  0.74451279,  0.26491836, -0.60762049],
       [ 0.21036234,  0.76803741,  0.00444413,  0.28424456, -0.40550138],
       [ 0.40071939, -0.39049337,  0.0533959 ,  0.28357303, -0.51281637],
       [-0.09733522,  0.20233363, -0.15237367, -0.91242117, -0.4788253 ],
       [ 0.66472232,  0.17079079,  0.40099151,  0.12026128,  0.05577142]]), array([[ 0.33198014, -0.44144792, -0.19640651, -0.21308176,  0.46809915]])]


In [576]:
# Fit and transform the training data
X_train_scaled2 = scaler_X.fit_transform(X_train_steps_small)
y_train_scaled2 = scaler_y.fit_transform(y_train_steps_small)


In [577]:
training_data_scaled2 = [
    (x.reshape(-1, 1), y) for x, y in zip(X_train_scaled2, y_train_scaled2)
]

In [578]:
mlp_steps.train(
    training_data_scaled2, epochs=5000, learning_rate=0.1, batch_size=10
)

#### testing

In [579]:
# Scale the test data using the transform method
X_test_scaled2 = scaler_X.transform(X_test_steps_small)

In [580]:
# Generate predictions on the scaled test data
predictions_scaled2 = np.array(
    [mlp_steps.feedforward(x.reshape(-1, 1))[0] for x in X_test_scaled2]
)

In [581]:
# Correctly denormalize predictions
predictions2 = scaler_y.inverse_transform(predictions_scaled2.reshape(-1, 1))

# Calculate MSE score
for i in range(len(predictions2)):
    print("predicted value: ",  predictions2[i], "actual value: ", y_test_steps_small[i])
mse_score2 = mse(predictions2, y_test_steps_small)

print(f"MSE Score: {mse_score2}")

predicted value:  [-75.68118152] actual value:  [-80]
predicted value:  [92.72419888] actual value:  [80]
predicted value:  [-88.27325684] actual value:  [-80]
predicted value:  [-70.89335599] actual value:  [-80]
predicted value:  [-78.9464397] actual value:  [-80]
predicted value:  [18.76146075] actual value:  [0]
predicted value:  [21.79168549] actual value:  [0]
predicted value:  [-87.52498457] actual value:  [-80]
predicted value:  [-74.5456806] actual value:  [-80]
predicted value:  [25.60761617] actual value:  [0]
predicted value:  [93.10415354] actual value:  [80]
predicted value:  [62.59932508] actual value:  [80]
predicted value:  [96.49685706] actual value:  [80]
predicted value:  [-75.53582894] actual value:  [-80]
predicted value:  [46.21620138] actual value:  [80]
predicted value:  [78.11865653] actual value:  [80]
predicted value:  [-18.89331856] actual value:  [0]
predicted value:  [146.8935984] actual value:  [160]
predicted value:  [-81.84228729] actual value:  [-80]


In [524]:
# nie ma optymalnego mse :((((