In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#funcao de ativacao (sigmoide)
def sigmoid(x, derivative=False):
  s = 1.0 / (1.0 + np.exp(-x))
  if derivative:
      return s * (1 - s)
  return s

#funcao de ativacao (relu)
def relu ( x, derivative= False ):
  if derivative:
      return  1 * (x > 0 )

  return np.maximum( 0 , x)

#funcao de ativacao (softmax)
def softmax(x, derivative=False):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    probs = exps / np.sum(exps, axis=1, keepdims=True)
    if derivative:
        return probs
    return probs

#funcao de erro quadratico medio
def mse(y_true, y_pred, derivative=False):
    if derivative:
        return (y_pred - y_true)
    return np.mean(0.5 * (y_true - y_pred) ** 2)

In [None]:
#-Network parameters
m           = 0.9            #momentum
a           = 0.01           #taxa de aprendizado
init_method = 'xavier'       #tecnica para ajuste dos pesos
hidden_f    = sigmoid        #funcao de ativacao para camada oculta
output_f    = sigmoid        #funcao de ativacao camada de saida
epochs      = 1000
batch_size  = 0
e_threshold = 1E-5           #criterio de parada antecipada

network_topology = [2, 3, 1] #2 neuronios de entrada, 3 na camada oculta, 1 na saida

#porta xor (entrada)
xor = np.array([[0,0],
                [0,1],
                [1,0],
                [1,1]])

#saida esperada da porta xor
saida_esperada = np.array([[0],
                        [1],
                        [1],
                        [0]])

In [None]:
#Implementação do Backpropagation
class Backpropagation:

    def __init__(self,
                 topology,
                 learning_rate,
                 momentum,
                 hidden_activation_func=sigmoid,
                 output_activation_func=sigmoid,
                 error_function=mse,
                 init_method='xavier',
                 seed=None):

        self.topology = topology
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.hidden_activation = hidden_activation_func
        self.output_activation = output_activation_func

        self.error_function = mse

        # Inicializações básicas
        self.size = len(topology) - 1
        self.weights = []
        self.bias = []
        for i in range(self.size):
            in_neurons = topology[i]
            out_neurons = topology[i + 1]
            limit = np.sqrt(1 / in_neurons)
            W = np.random.uniform(-limit, limit, (in_neurons, out_neurons))
            b = np.zeros((1, out_neurons))
            self.weights.append(W)
            self.bias.append(b)


    def feedforward(self, inputs):
        self.inputs = inputs
        self.netIns = []
        self.deltas = [None] * self.size

        layer_input = inputs # Inicializa com entradas da rede
        self.netOuts = []

        for i in range(self.size):
            # Calcula o net input
            netIn = np.dot(layer_input, self.weights[i]) + self.bias[i]
            self.netIns.append(netIn)

            # Aplica a função de ativação
            if i == self.size - 1:
                # Camada de saída
                netOut = self.output_activation(netIn)
            else:
                # Camadas ocultas
                netOut = self.hidden_activation(netIn)

            self.netOuts.append(netOut)
            layer_input = netOut

        return self.netOuts[-1] # Retorna a saída da última camada


    #calcular gradiente para camada de saida
    def backprop(self, target, output, error_func):
        # Percorre camadas de trás pra frente
        for i in range(self.size):
            back_index = self.size - 1 - i  # camada atual (de trás pra frente)

            # --- CAMADA DE SAÍDA ---
            if i == 0:
                # Derivadas
                d_activ = self.output_activation(self.netIns[back_index], derivative=True) #ativacao
                d_error = error_func(target, output, derivative=True)  #erro
                delta = d_error * d_activ  # erro local (gradiente)

            # --- CAMADAS OCULTAS ---
            else:
                next_index = back_index + 1
                W_next = self.weights[next_index]
                delta_next = self.deltas[next_index]
                d_activ = self.hidden_activation(self.netIns[back_index], derivative=True)

                # Propagação do erro
                delta = np.dot(delta_next, W_next.T) * d_activ

            # Guarda o delta atual
            self.deltas[back_index] = delta

            # --- GRADIENTES ---
            if back_index == 0:
                # entrada original da rede
                layer_input = self.inputs
            else:
                layer_input = self.netOuts[back_index - 1]

            gradient_mat = np.dot(layer_input.T, delta)
            bias_grad_mat = np.sum(delta, axis=0, keepdims=True)

            # --- ATUALIZA PESOS ---
            self._gradient_descent(
                layer_idx=back_index,
                gradient_mat=gradient_mat,
                bias_gradient=bias_grad_mat
            )


    def _gradient_descent(self, layer_idx, gradient_mat, bias_gradient):
        self.weights[layer_idx] -= self.learning_rate * gradient_mat
        self.bias[layer_idx] -= self.learning_rate * bias_gradient

    def train(self, inputs, targets, epochs=10000, error_threshold=1e-3):
        for epoch in range(epochs):
            # Feedforward
            outputs = self.feedforward(inputs)

            # Calcula erro
            error = self.error_function(targets, outputs)

            # Backpropagation
            self.backprop(targets, outputs, self.error_function)

            if epoch % 1000 == 0:
                print(f"Época {epoch} - Erro: {error:.6f}")
            if error <= error_threshold:
                break

In [None]:
rede = Backpropagation(
    topology = network_topology,
    learning_rate = a,
    momentum = m,
    hidden_activation_func = hidden_f,
    output_activation_func = output_f,
    init_method = init_method
)

error = rede.train(
    xor,
    saida_esperada,
    epochs=epochs,
    error_threshold=e_threshold
)

print(f"\n\n{'='*40}\nTraining XOR gate:\n{'='*40}\n")

for i, sample in enumerate(xor):
    output = rede.feedforward(sample.reshape(1, -1))
    sample_error = mse(saida_esperada[i:i+1], output)
    print(f"Testing Network:\n\tvetor de entrada : {sample}\n\tvetor de saída   : {output}\n\tsaída esperada   : {saida_esperada[i]}")
    print(f"\tNetwork error    : {sample_error:.3e}\n")

Época 0 - Erro: 0.126440


Training XOR gate:

Testing Network:
	vetor de entrada : [0 0]
	vetor de saída   : [[0.4975045]]
	saída esperada   : [0]
	Network error    : 1.238e-01

Testing Network:
	vetor de entrada : [0 1]
	vetor de saída   : [[0.49628398]]
	saída esperada   : [1]
	Network error    : 1.269e-01

Testing Network:
	vetor de entrada : [1 0]
	vetor de saída   : [[0.50385124]]
	saída esperada   : [1]
	Network error    : 1.231e-01

Testing Network:
	vetor de entrada : [1 1]
	vetor de saída   : [[0.50267732]]
	saída esperada   : [0]
	Network error    : 1.263e-01



In [None]:
# teste com ruído
ruidoTeste = 0.1
ruido = np.random.uniform(-ruidoTeste, ruidoTeste, size=xor.shape)
xor_ruido = xor + ruido

print("Dados XOR originais:\n", xor)
print("\nDados XOR com ruído:\n", xor_ruido)

Dados XOR originais:
 [[0 0]
 [0 1]
 [1 0]
 [1 1]]

Dados XOR com ruído:
 [[-0.08231907 -0.04231923]
 [-0.0304607   1.04752482]
 [ 1.04379622  0.00460797]
 [ 0.92318684  0.91058128]]
