Backpropagation para Display de 7 segmentos

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#funcao de ativacao (sigmoide)
def sigmoid(x, derivative=False):
    s = 1.0 / (1.0 + np.exp(-x))
    #derivada da sigmoide
    if derivative:
        return s * (1 - s)
    return s

#funcao de ativacao (relu)
def relu ( x, derivative= False ):
  if derivative:
      return  1 * (x > 0 )
  return np.maximum( 0 , x)

#funcao de ativacao (softmax)
def softmax(x, derivative=False):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    probs = exps / np.sum(exps, axis=1, keepdims=True)
    if derivative:
        return probs
    return probs

#funcao de erro quadratico medio
def mse(y_true, y_pred, derivative=False):
    if derivative:
        return (y_pred - y_true)
    return np.mean(0.5 * (y_true - y_pred) ** 2)

In [None]:
#-Network parameters
m           = 0.9            #momentum
a           = 0.01           #taxa de aprendizado
init_method = 'xavier'       #tecnica para ajuste dos pesos
hidden_f    = sigmoid        #funcao de ativacao para camada oculta
output_f    = sigmoid        #funcao de ativacao camada de saida
epochs      = 1000
batch_size  = 0
e_threshold = 1E-5           #criterio de parada antecipada

network_topology = [7, 5, 10] # 7 neuronios de entrada, 5 na camada oculta, 10 na saida

# Entradas do display (segmentos)
display = np.array([
    [1,1,1,1,1,1,0],  # 0
    [0,1,1,0,0,0,0],  # 1
    [1,1,0,1,1,0,1],  # 2
    [1,1,1,1,0,0,1],  # 3
    [0,1,1,0,0,1,1],  # 4
    [1,0,1,1,0,1,1],  # 5
    [1,0,1,1,1,1,1],  # 6
    [1,1,1,0,0,0,0],  # 7
    [1,1,1,1,1,1,1],  # 8
    [1,1,1,1,0,1,1],  # 9
])

# Saídas esperadas (números em binário)
saida_esperada = np.array([
    [1,0,0,0,0,0,0,0,0,0],  # 0
    [0,1,0,0,0,0,0,0,0,0],  # 1
    [0,0,1,0,0,0,0,0,0,0],  # 2
    [0,0,0,1,0,0,0,0,0,0],  # 3
    [0,0,0,0,1,0,0,0,0,0],  # 4
    [0,0,0,0,0,1,0,0,0,0],  # 5
    [0,0,0,0,0,0,1,0,0,0],  # 6
    [0,0,0,0,0,0,0,1,0,0],  # 7
    [0,0,0,0,0,0,0,0,1,0],  # 8
    [0,0,0,0,0,0,0,0,0,1],  # 9
])

In [None]:
#Implementação do Backpropagation
class Backpropagation:

    def __init__(self,
                 topology,
                 learning_rate = 0.01,
                 momentum      = 0.9,
                 hidden_activation_func=sigmoid,
                 output_activation_func=sigmoid,
                 init_method='xavier',
                 seed=None):

        self.topology = topology
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.hidden_activation = hidden_activation_func
        self.output_activation = output_activation_func

        self.error_function = mse

        # Inicializações básicas
        self.size = len(topology) - 1
        self.weights = []
        self.bias = []

        for i in range(self.size):
            in_neurons = topology[i]
            out_neurons = topology[i + 1]
            limit = np.sqrt(1 / in_neurons)
            W = np.random.uniform(-limit, limit, (in_neurons, out_neurons))
            b = np.zeros((1, out_neurons))
            self.weights.append(W)
            self.bias.append(b)


    def feedforward(self, inputs):
        self.inputs = inputs
        self.netIns = []
        self.deltas = [None] * self.size

        layer_input = inputs # Inicializa com entradas da rede
        self.netOuts = []

        for i in range(self.size):
            # Calcula o net input
            netIn = np.dot(layer_input, self.weights[i]) + self.bias[i]
            self.netIns.append(netIn)

            # Aplica a função de ativação
            if i == self.size - 1:
                # Camada de saída
                netOut = self.output_activation(netIn)
            else:
                # Camadas ocultas
                netOut = self.hidden_activation(netIn)

            self.netOuts.append(netOut)
            layer_input = netOut

        return self.netOuts[-1] # Retorna a saída da última camada


    #calcular gradiente para camada de saida
    def backprop(self, target, output, error_func):
        # Percorre camadas de trás pra frente
        for i in range(self.size):
            back_index = self.size - 1 - i  # camada atual (de trás pra frente)

            # --- CAMADA DE SAÍDA ---
            if i == 0:
                # Derivadas
                d_activ = self.output_activation(self.netIns[back_index], derivative=True) #ativacao
                d_error = error_func(target, output, derivative=True)  #erro
                delta = d_error * d_activ  # erro local (gradiente)

            # --- CAMADAS OCULTAS ---
            else:
                next_index = back_index + 1
                W_next = self.weights[next_index]
                delta_next = self.deltas[next_index]
                d_activ = self.hidden_activation(self.netIns[back_index], derivative=True)

                # Propagação do erro
                delta = np.dot(delta_next, W_next.T) * d_activ

            # Guarda o delta atual
            self.deltas[back_index] = delta

            # --- GRADIENTES ---
            if back_index == 0:
                # entrada original da rede
                layer_input = self.inputs
            else:
                layer_input = self.netOuts[back_index - 1]

            gradient_mat = np.dot(layer_input.T, delta)
            bias_grad_mat = np.sum(delta, axis=0, keepdims=True)

            # --- ATUALIZA PESOS ---
            self._gradient_descent(
                layer_idx=back_index,
                gradient_mat=gradient_mat,
                bias_gradient=bias_grad_mat
            )


    def _gradient_descent(self, layer_idx, gradient_mat, bias_gradient):
        self.weights[layer_idx] -= self.learning_rate * gradient_mat
        self.bias[layer_idx] -= self.learning_rate * bias_gradient

    def train(self, inputs, targets, epochs=10000, error_threshold=1e-3):
        for epoch in range(epochs):
            # Feedforward
            outputs = self.feedforward(inputs)

            # Calcula erro
            error = self.error_function(targets, outputs)

            # Backpropagation
            self.backprop(targets, outputs, self.error_function)

            if epoch % 1000 == 0:
                print(f"Época {epoch} - Erro: {error:.6f}")
            if error <= error_threshold:
                break

In [None]:
nnet = Backpropagation(
    topology=network_topology,
    hidden_activation_func=hidden_f,
    output_activation_func=output_f,
    init_method=init_method,
    momentum=m,
    learning_rate=a
)

error = nnet.train(
    display,
    saida_esperada,
    epochs=epochs,
    error_threshold=e_threshold
)

print(f"\n\n{'='*40}\nTraining Display 7 Segmentos:\n{'='*40}\n")

for i, sample in enumerate(display):
    output = nnet.feedforward(sample.reshape(1, -1))
    sample_error = mse(saida_esperada[i:i+1], output)
    print(f"Testing Network:\n\tvetor de entrada : {sample}\n\tvetor de saída   : {output}\n\tsaída esperada   : {saida_esperada[i]}")
    print(f"\tNetwork error   : {sample_error:.3e}\n")

Época 0 - Erro: 0.115469


Training Display 7 Segmentos:

Testing Network:
	vetor de entrada : [1 1 1 1 1 1 0]
	vetor de saída   : [[0.10115417 0.11024169 0.10527132 0.10012655 0.10185235 0.09649601
  0.10176674 0.10254862 0.09396168 0.09615603]]
	saída esperada   : [1 0 0 0 0 0 0 0 0 0]
	Network error   : 4.499e-02

Testing Network:
	vetor de entrada : [0 1 1 0 0 0 0]
	vetor de saída   : [[0.12936969 0.14385287 0.11974921 0.11668467 0.13013149 0.11788968
  0.11505697 0.12798516 0.11391721 0.11467814]]
	saída esperada   : [0 1 0 0 0 0 0 0 0 0]
	Network error   : 4.321e-02

Testing Network:
	vetor de entrada : [1 1 0 1 1 0 1]
	vetor de saída   : [[0.09896585 0.1097547  0.11592469 0.09839992 0.10275146 0.10575156
  0.10792508 0.10670987 0.10113091 0.10172884]]
	saída esperada   : [0 0 1 0 0 0 0 0 0 0]
	Network error   : 4.392e-02

Testing Network:
	vetor de entrada : [1 1 1 1 0 0 1]
	vetor de saída   : [[0.09554758 0.10572832 0.10265417 0.09948844 0.09744279 0.0948044
  0.10019846 0.0999