In [121]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [122]:
class NeuralNetwork:
    def __init__(self, layers, activation, weights_initialize=None):
        """
        :param layers: A list specifying the number of neurons in each layer.
        :param activation: The activation function to use in the hidden layers.
        :param weights_initialize: Type of weight initialization (e.g., He, Xavier). If None -> random from uniform distribuiton U([0,1])
        """

        self.layers = layers
        self.weights = []
        self.bias = []
   
        for i in range(len(layers)-1):
            if weights_initialize == "Xavier":
                std = np.sqrt(2 / (layers[i] + layers[i+1]))
                self.weights.append(np.random.randn(layers[i], layers[i+1]) * std)
            elif weights_initialize == "He":
                std = np.sqrt(2 / layers[i])
                self.weights.append(np.random.randn(layers[i], layers[i+1]) * std)
            else:
                self.weights.append(np.random.uniform(0, 1, size=(layers[i], layers[i+1])))

        self.bias = [np.random.uniform(-0.5, 0.5, size=(layers[i+1],)) for i in range(len(layers)-1)]

        activation_functions = {
            "sigmoid": self.sigmoid,
            "tanh": self.tanh,
           
        }

        activation_functions_derivatives = {
            "sigmoid": self.sigmoid_derivative,
            "tanh": self.tanh_derivative,

        }

        self.activation_function = activation_functions.get(activation)
        self.activation_function_derivative=activation_functions_derivatives.get(activation)

    #activation functions
    def sigmoid(self, x): return 1 / (1 + np.exp(-x))
    def sigmoid_derivative(self, x): return x * (1 - x)
    
    def tanh(self, x): return np.tanh(x)
    def tanh_derivative(self, x): return 1-x**2
    
   
    
    def forward(self, X):
        self.a = [X]
        self.z = []
        for i in range(len(self.weights)-1):
            z = np.dot(self.a[-1], self.weights[i]) + self.bias[i]
            self.z.append(z)
            a = self.activation_function(z)
            self.a.append(a)

        z = np.dot(self.a[-1], self.weights[-1]) + self.bias[-1]
        self.z.append(z)
        self.a.append(z)
        return self.a[-1]

    def backpropagate(self, X, y, learning_rate):
        m = X.shape[0]
        delta = (self.forward(X) - y)/m 
        
        for i in reversed(range(len(self.weights))):
            delta_weights = np.dot(self.a[i].T, delta)
            delta_bias = np.sum(delta, axis=0)
            
            self.weights[i] -= learning_rate * delta_weights
            self.bias[i] -= learning_rate * delta_bias
            if i > 0:
                delta = np.dot(delta, self.weights[i].T) * self.activation_function_derivative(self.a[i])


    def train(self, X_train, y_train, learning_rate, epochs, batch_size=32):
        train_losses = []
       
        for epoch in range(epochs):
            if batch_size is None: 
                self.forward(X_train)
                self.backpropagate(X_train, y_train, learning_rate)
            else:
                permutation = np.random.permutation(X_train.shape[0]) #mini-batch
                for i in range(0, X_train.shape[0], batch_size):
                    indices = permutation[i:i+batch_size]
                    X_batch = X_train[indices]
                    y_batch = y_train[indices]
                    self.forward(X_batch)
                    self.backpropagate(X_batch, y_batch, learning_rate)
            

            train_loss = self.MSE(X_train, y_train)
            train_losses.append(train_loss)
   

        
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Training Loss: {train_loss:.6f}")
        

    def predict(self, X):
        return self.forward(X)
    
    def MSE(self, X, Y):
        return np.mean((self.predict(X) - Y) ** 2)
    
    

In [123]:
#Load data
train_df = pd.read_csv('data/square-simple-training.csv')
test_df = pd.read_csv('data/square-simple-test.csv')

X_train, Y_train=train_df['x'].to_numpy().reshape(-1,1), train_df['y'].to_numpy().reshape(-1,1)
X_test, Y_test=test_df['x'].to_numpy().reshape(-1,1), test_df['y'].to_numpy().reshape(-1,1)



In [124]:
#  normalize data
x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

X_train_normalized = x_scaler.fit_transform(X_train.reshape(-1, 1))
Y_train_normalized = y_scaler.fit_transform(Y_train.reshape(-1, 1))

X_test_normalized = x_scaler.transform(X_test.reshape(-1,1))
Y_test_normalized = y_scaler.transform(Y_test.reshape(-1,1))

In [125]:
nn = NeuralNetwork(layers=[1,5,5,1], activation='sigmoid', weights_initialize='Xavier')
nn.train(X_train_normalized, Y_train_normalized, learning_rate=0.2, epochs=10000)

Epoch 0, Training Loss: 0.093483
Epoch 100, Training Loss: 0.091561
Epoch 200, Training Loss: 0.086206
Epoch 300, Training Loss: 0.128454
Epoch 400, Training Loss: 0.079391
Epoch 500, Training Loss: 0.084157
Epoch 600, Training Loss: 0.090482
Epoch 700, Training Loss: 0.077022
Epoch 800, Training Loss: 0.092402
Epoch 900, Training Loss: 0.072608
Epoch 1000, Training Loss: 0.069576
Epoch 1100, Training Loss: 0.067901
Epoch 1200, Training Loss: 0.079148
Epoch 1300, Training Loss: 0.066874
Epoch 1400, Training Loss: 0.065604
Epoch 1500, Training Loss: 0.065881
Epoch 1600, Training Loss: 0.064656
Epoch 1700, Training Loss: 0.063570
Epoch 1800, Training Loss: 0.070645
Epoch 1900, Training Loss: 0.064815
Epoch 2000, Training Loss: 0.074552
Epoch 2100, Training Loss: 0.063679
Epoch 2200, Training Loss: 0.063415
Epoch 2300, Training Loss: 0.077514
Epoch 2400, Training Loss: 0.077848
Epoch 2500, Training Loss: 0.067930
Epoch 2600, Training Loss: 0.052179
Epoch 2700, Training Loss: 0.063155
Epoc

In [126]:
Y_pred_denorm=y_scaler.inverse_transform(nn.predict(X_test_normalized))
mse = np.mean((Y_pred_denorm - Y_test) ** 2)
print(f'Mean Squared Error (MSE): {mse}')

Mean Squared Error (MSE): 4.569081047598956
