# From a perceptron to a simple Feedforward neural network (FFNN)

Check the following code. Play with the hyperparameters (parameters manually set before training).

# 1- Perceptron

Can it learn the function x = x + 1?

In [1]:
# Load numpy library for efficient handle of vector operations
import numpy as np
np.random.seed(0) # check why we (sometimes) do this

In [2]:
# Define the training data
X_train = np.array([0, 1, 4]) # x (input 0, 1, 4)
Y_train = np.array([1, 2, 5]) # x + 1 (shoul return 1, 2 and 5)

# Initialize weights and bias
w = 0.5
b = 0.5

# Set the learning rate
learning_rate = 0.1

# Number of epochs
epochs = 10

In [3]:
# Training loop
def training_loop(X_train, Y_train, epochs, learning_rate, w, b):
 for epoch in range(epochs):
    # zip: combines two sequences into pairs
    for x, y in zip(X_train, Y_train):
        # Forward pass
        y_pred = w * x + b

        # Compute error
        error = y - y_pred

        # Update weights and bias
        w += learning_rate * error * x
        b += learning_rate * error

 return w, b

In [4]:
# Testing
w, b = training_loop(X_train, Y_train, epochs, learning_rate, w, b)
x_test = 15
y_pred_test = w * x_test + b

print("Predicted output for x =", x_test, "is:", y_pred_test)

Predicted output for x = 15 is: 16.08576748219717


In [5]:
# If you want to play with a larger dataset
X_train = np.random.rand(100,1)
Y_train = [x[0]+1 for x in X_train]


In [6]:
# Second Training
w, b = training_loop(X_train, Y_train, epochs, learning_rate, w, b)

In [7]:
# Test
x_test = 15
y_pred_test = w * x_test + b

print("Predicted output for x =", x_test, "is:", y_pred_test)

Predicted output for x = 15 is: [16.00076173]


## 2- Simple Feedforward Neural Network

Can it learn the function x + y?

In [19]:
class SimpleFFNN:
    def __init__(self, layer_sizes):
        # layer_sizes example: [2, 5, 3, 1] where 2 is input size, 5 and 3 are hidden sizes, and 1 is output size
        self.layer_sizes = layer_sizes
        self.weights = []
        self.biases = []
        self.activations = []

        # Initialize weights and biases for each layer
        for i in range(len(layer_sizes) - 1):
            self.weights.append(np.random.randn(layer_sizes[i], layer_sizes[i + 1]))
            self.biases.append(np.random.randn(layer_sizes[i + 1]))
        print(self.weights)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def forward(self, x):
        self.activations = [x]
        # Compute activations for each layer
        for i in range(len(self.weights)):
            if i == len(self.weights) - 1:  # Last layer (output)
                x = np.dot(x, self.weights[i]) + self.biases[i]  # No sigmoid
            else:  # Hidden layers
                x = self.sigmoid(np.dot(x, self.weights[i]) + self.biases[i])
            self.activations.append(x)
        return x

    def train(self, x, y, epochs, learning_rate):
        for epoch in range(epochs):
            for i in range(x.shape[0]):
                # Forward pass
                output = self.forward(x[i])

                # Calculate error
                errors = [y[i] - output]

                # Calculate errors for each layer
                for l in range(len(self.weights) - 1, 0, -1):
                    error = errors[0].dot(self.weights[l].T)
                    errors.insert(0, error)

                # Update weights and biases from output to input layer
                for j in range(len(self.weights)):
                    if j == len(self.weights) - 1:  # Last layer (output)
                        d_activations = errors[j]
                    else:  # Hidden layers
                        d_activations = errors[j] * self.sigmoid_derivative(self.activations[j + 1])
                    self.weights[j] += np.outer(self.activations[j], d_activations) * learning_rate
                    self.biases[j] += d_activations * learning_rate

            if epoch % 20 == 0:
                loss = np.mean(np.square(y - self.forward(x)))
                print(f'Epoch {epoch}, Loss {loss}')


## 2.1 Data and hyperparameters


In [20]:
# Data generation
np.random.seed(0)
x = np.random.rand(5000, 2)
y = np.sum(x, axis=1).reshape(-1, 1)

# Hyperparameters
epochs = 100
learning_rate = 0.01

## 2.2 One hidden layer

In [13]:
# Initialize and train the network
layer_sizes = [2, 5, 1] # input, hidden and output sizes
model = SimpleFFNN(layer_sizes)
model.train(x, y, epochs, learning_rate)

[array([[-0.72582032,  0.56347552, -0.43563209, -0.10455255, -2.32127283],
       [-0.21409437,  0.54340543, -0.5374541 , -0.38027534,  0.26547293]]), array([[9.15340020e-01],
       [7.68111362e-01],
       [1.01634801e-01],
       [8.09442102e-01],
       [4.55643468e-04]])]
Epoch 0, Loss 0.03137546109500572
Epoch 20, Loss 0.0003176648484288051
Epoch 40, Loss 0.0001492236641122185
Epoch 60, Loss 8.928305402009988e-05
Epoch 80, Loss 6.193299576284078e-05


In [16]:
# Test the model
test_input = np.array([[0.1, 0.2]])
predicted_sum = model.forward(test_input)
print(f"Test Input: {test_input}, Predicted Sum: {predicted_sum}")

Test Input: [[0.1 0.2]], Predicted Sum: [[0.30252436]]


## 2.3 More than one hidden layer

In [17]:
layer_sizes = [2, 5, 3, 1]  # Input layer, two hidden layers, output layer
model = SimpleFFNN(layer_sizes)
model.train(x, y, epochs=100, learning_rate=0.01)

[array([[-0.51881837, -0.78472602, -1.00429289, -0.07074956, -1.53699731],
       [-0.88115275, -0.90419757,  0.70402685,  1.19906028,  1.83767085]]), array([[ 0.39754008,  1.74940884,  1.72387824],
       [-1.32819827,  0.1908318 , -0.85504792],
       [ 0.14361984, -0.33082967,  0.13471032],
       [-0.59819425, -0.90361563,  0.84485094],
       [-0.59842614, -0.39716271, -0.19786197]]), array([[-0.56391728],
       [ 0.04603699],
       [-1.90700442]])]
Epoch 0, Loss 0.02261552875223575
Epoch 20, Loss 0.0007230334075234665
Epoch 40, Loss 0.0003807215902071204
Epoch 60, Loss 0.00022640420236578348
Epoch 80, Loss 0.00013607068607258846


In [18]:
# Testing
test_input = np.array([[0.1, 0.2]])
predicted_sum = model.forward(test_input)
print(f"Test Input: {test_input}, Predicted Sum: {predicted_sum}")


Test Input: [[0.1 0.2]], Predicted Sum: [[0.30206861]]
