# **FULLY CONECTED NET FROM SCRATCH (MLP)**

This program performs a fully conected net but only using elemental elements, no pytorch...
The dataset is going to be MNIST
Last modification: 11/08/2025

In [None]:
#Open the dataset
import numpy as np

from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

x_train = x_train / 255.0
x_test = x_test / 255.0

print("Train shape:", x_train.shape)
print("Test shape:", x_test.shape)

num_classes = 10
y_train_onehot = np.eye(num_classes)[y_train]
y_test_onehot  = np.eye(num_classes)[y_test]

Train shape: (60000, 784)
Test shape: (10000, 784)


Now, we are going to define all the class we will need for our MLP

In [3]:
class LayerDense:
  def __init__(self, num_inputs, num_neurons):
    self.weight = 0.1* np.random.randn(num_inputs, num_neurons)
    self.bias = np.zeros((1,num_neurons))
    self.dweight = np.zeros(self.weight.shape)
    self.dbias = np.zeros(self.bias.shape)
    self.dinputs = None

  def forward(self, inputs):
    self.inputs = inputs
    self.output = np.dot(inputs, self.weight) + self.bias

  def backward(self, dvalues):
    self.dinputs = np.dot(dvalues,self.weight.T)
    self.dweight = np.dot(self.inputs.T, dvalues)
    self.dbias = np.sum(dvalues, axis=0, keepdims=True)

class ActivationReLU:
  def __init__(self):
    self.inputs = None
    self.output = None
    self.dinputs = None

  def forward(self,inputs):
    self.inputs = inputs
    self.output = np.maximum(0, inputs)

  def backward(self, dvalues):
    self.dinputs = dvalues.copy()
    self.dinputs[self.inputs <= 0] = 0

class ActivationSoftmax:
  def forward(self, inputs):
    self.inputs = inputs
    self.output = np.exp(inputs)/ np.exp(inputs).sum(axis = 1, keepdims = True)

class LossFunction:
  def __init__(self):
    self.loss = None
    self.gradient = None
    self.delta = 1e-7

  def forward(self, y_true, y_pred):
    #To avoid 0, because the log will perform an infinite number
    stability = y_pred + self.delta
    # Cross entropy loss:
    sample_losses = -np.sum(y_true * np.log(stability), axis=1)
    self.loss = np.mean(sample_losses)

  def backward(self , y_true, y_pred):
    self.gradient = (y_pred - y_true)/y_true.shape[0]

class Optimizer:
  def __init__(self, learning_rate):
    self.learning_rate = learning_rate

  def update(self, layer):
    layer.weight -= self.learning_rate * layer.dweight
    layer.bias -= self.learning_rate * layer.dbias



class Mlp:
  def __init__(self, numInputs, hidden_layers, learningRate, batchSize, epochs, numOutputs ):


    if numOutputs <= 1:
        raise ValueError("The number of outputs must be greater than 1 for classification")

    self.layers = []
    self.numInputs = numInputs
    self.hidden_layers = hidden_layers
    self.learningRate = learningRate
    self.batchSize = batchSize
    self.epochs = epochs
    self.loss = LossFunction()
    self.prediction = None
    self.optimizer = Optimizer(self.learningRate)
    self.firstForward = False


    for i in self.hidden_layers[:-1]:
      self.layers.append(LayerDense(self.numInputs, i))
      self.layers.append(ActivationReLU())
      self.numInputs = i

    if self.hidden_layers:
      self.layers.append(LayerDense(self.numInputs, self.hidden_layers[-1]))
      self.layers.append(ActivationReLU())
      self.numInputs = self.hidden_layers[-1]

    self.layers.append(LayerDense(self.numInputs, numOutputs))
    self.layers.append(ActivationSoftmax())

  def forward(self, inputs):

    if not self.firstForward:
      self.firstForward = True

    for layer in self.layers:
      layer.forward(inputs)
      inputs = layer.output

    # Self.prediction is the last prediction of the net
    self.prediction = inputs

  def backward(self, y_true):

    # Step1 Calculate the actual gradient

    self.loss.backward(y_true, self.prediction)
    dvalues = self.loss.gradient

    # Step 2, run over all the layers except the softmac
    for layer in reversed(self.layers[:-1]):
      if hasattr(layer, "backward"):
        layer.backward(dvalues)
        dvalues = layer.dinputs

  def train(self, x_train, y_train):
    numBatch = int(np.ceil(x_train.shape[0] / self.batchSize))

    for epoch in range(self.epochs):
        epoch_loss = 0
        for batch_index in range(numBatch):
            start = batch_index * self.batchSize
            end = min(start + self.batchSize, x_train.shape[0])

            x_batch = x_train[start:end]
            y_batch = y_train[start:end]

            # Forward
            self.forward(x_batch)
            self.loss.forward(y_batch, self.prediction)
            epoch_loss += self.loss.loss * (end - start)

            # Backward
            self.backward(y_batch)

            # Optimize only the Dense layers
            for layer in self.layers:
                if isinstance(layer, LayerDense):
                    self.optimizer.update(layer)

        epoch_loss /= x_train.shape[0]
        print(f"Epoch {epoch+1}/{self.epochs}, Loss: {epoch_loss:.4f}")

  def predict(self, x_test):
    self.forward(x_test)
    return np.argmax(self.prediction, axis=1)

  def evaluate(self, x_test, y_test):
    predictions = self.predict(x_test)
    y_true_index = np.argmax(y_test, axis=1)
    accuracy = np.mean(predictions == y_true_index)
    return accuracy






Let's do the training to see if it works

In [4]:
mlp = Mlp(
    numInputs=784,
    hidden_layers=[128, 64],
    learningRate=0.01,
    batchSize=64,
    epochs=10,
    numOutputs=10
)

mlp.train(x_train, y_train_onehot)
accuracy = mlp.evaluate(x_test, y_test_onehot)
print("Test accuracy:", accuracy)


Epoch 1/10, Loss: 1.0152
Epoch 2/10, Loss: 0.4143
Epoch 3/10, Loss: 0.3384
Epoch 4/10, Loss: 0.2998
Epoch 5/10, Loss: 0.2728
Epoch 6/10, Loss: 0.2515
Epoch 7/10, Loss: 0.2338
Epoch 8/10, Loss: 0.2188
Epoch 9/10, Loss: 0.2057
Epoch 10/10, Loss: 0.1942
Test accuracy: 0.9442
