# Algoritmo para resolver problema XOR

Resolve o problema porta XOR como apresentado em Fausett p.294 a p.300
Exemplo da Seção 6.1.3

In [1]:
import pandas as pd
import numpy as np
import math
import os
import random
from random import sample

In [2]:
#Data inputs
#Função para receber a entrada do conjunto de dados para treinamento do Percepton
def data_input():
    
    directory = os.path.abspath(os.getcwd())
    
    print("Escolha o conjunto de dados para teste:\n")
    print("\t1 - XOR")
    escolha = 0
    while escolha not in [1]:
        try:
            escolha = int(input("\nDigite a sua opção: "))
            if escolha not in [1]:
                print("Valor inválido...")
        except ValueError:
            print("Valor inválido...")
            
    if escolha == 1:
         df = pd.read_csv(directory + "\Conjunto de dados\ProblemXOR.csv", header = None)
    
    return df;

In [3]:
def generate_random_values(size):
    '''Function to generate random values between -0.5 and 0.5'''
    if size == 1:
        return random.randrange(-50, 50)/100;
    aux = []
    for i in range(size):
        aux.append(random.randrange(-50, 50)/100)
    return aux;

def generate_random_weights(nlin, ncol):
    '''Function to generate weights matrix with random values between -0.5 and 0.5 at nlin x ncol shape'''
    rand_int = np.random.randint(-50,50,(nlin, ncol))
    return rand_int/100

In [4]:
#Função de ativação e sua derivada
#Bipolar sigmoid

def activation_function(arg):
    return 2/(1+math.exp(-arg)) - 1

def derivative_activation_function(arg):
    '''Apply derivative of activation funtion over a single value'''
    return 0.5*(1 + activation_function(arg))*(1 - activation_function(arg))
    
    
def activation_function_array(ndarray):
    '''Apply derivative of activation funtion over a numpy array'''
    vfunc = np.vectorize(activation_function)
    return vfunc(ndarray)

def derivative_activation_function_array(ndarray):
    '''Apply derivative of activation funtion over a numpy array'''
    vfunc = np.vectorize(derivative_activation_function)
    return vfunc(ndarray)

In [5]:
##Algoritmo de treinamento


def training_algorithm(dataset, labels, p, tol, a, itmax):
    #Based on Fausset p.294
    
    n = len(dataset.columns) - labels #Numero de atributos descritivos. Considerar apenas colunas com xi (a última coluna é rótulo)
    
    s = dataset.iloc[:, 0:n].copy().values #Atributos
    t = dataset.iloc[:, n:n+labels].copy().values #Rótulos
    
    m = labels #number of output units
    s_n = len(dataset.index) #Number of training pairs (= number of rows from dataset)
    
    #Initialize weights and bias (random values between -0.5 and 0.5)
    
    v = generate_random_weights(p,n+1)
    #v = np.array([-0.1, 0.1, -0.1, -0.1, 0.1, 0.1, 0.1, -0.1, -0.1]).reshape(3,3)
    
    w = generate_random_weights(m,p+1)
    #w = np.array([-0.1, 0.1, 0, 0.1, 0.1, -0.1, 0.1, -0.1]).reshape(2,4)
    
    #While stopping condition is false
    #stop = False
    sqError = 2*tol
    it = 0
    #while (stop == False):
    while (sqError > tol and it<itmax):
                       
        #For each training set s:t,
        for tset in range(s_n): #O laço for computa uma época
            
            sqError = 0
            
            #FEEDFORWARD
                                
            #Each input unit (Xi, i=[1, n])receives input signal xi
            xi = s[tset].copy()
            xi = np.insert(xi,0,1) #insere o valor 1 no início do vetor x (neurônio do bias)
                        
            #Each hidden unit (Zj, j=[1, p]) sum its weighted input signals
            #z_inj = v0 + np.dot(xi, v)
            z_inj = np.dot(v, xi)
            
            #Applies its activation function to compute its output signal
            zj = activation_function_array(z_inj)
            zj = np.insert(zj,0,1) #insere o valor 1 no início do vetor
            
            #Each output unit (Yk, k=[1,m]) sums its weighted input signals
            #y_ink = w0 + np.dot(zj, w)
            y_ink = np.dot(w, zj)
            
            #Applies its activation function to compute its output signal
            yk = activation_function_array(y_ink)
            
            error = yk - t[tset] #Calculate error for stopping condition

            #BACKPROPAGATION OF ERROR
            
            #Each output unit Yk receives a target pattern corresponding to the input training pattern
            #Compute its error (dk) information term
            dk = (t[tset] - yk)*derivative_activation_function_array(y_ink)
            
            #Calculate its weight and bias correction term
            delta_wjk = a*np.dot(dk[:,None],zj[None,:]) #Trick to multiply two 1D matrix in numpy
            
            #Each hidden unit Zj sums its delta inputs from the units above
            d_inj = np.dot(dk, w[:,1:])
            
            #multiplies by the derivative of its activation function to calculate its error information term,
            dj = d_inj*derivative_activation_function_array(z_inj)  
            
            #Calculate its weight correction term
            delta_vij = a*np.dot(dj[:,None],xi[None,:])
                        
            #Update weights and bias if an error ocurred for this pattern
            
            #Each output unit Yk updates its bias and weights
            w += delta_wjk
            
            #Each output unit Zj updates its bias and weights
            v += delta_vij
            
            #Test stopping condition
            sqError += np.sum(error**2)
        
        it+=1    
        if it == itmax:
            print("Número máximo de iterações atingido")
        sqError = sqError/s_n
    
    return v, w;

In [6]:
def application_procedure(v, w, inputs):
    #Get weights and bias from training algorithm --> parameters w and b of this function
    #dataset --> 
    
    #Set activation of inputs
    s_n = len(inputs) #Number of inputs (= number of rows from dataset)
    
    categorias =[]
    
    for aset in range (s_n): #For each input(aset = application set)
        
        #Compute response of output unit
        
        xi = inputs[aset].copy()
        xi = np.insert(xi,0,1)
        
        z_inj = np.dot(v, xi)
        zj = activation_function_array(z_inj)
        zj = np.insert(zj,0,1)
        
        y_ink = np.dot(w, zj)
        
        yk = activation_function_array(y_ink)
        categorias.append(yk)
    
    return list(categorias);

In [7]:
dataset = data_input()
labels = 1
p = 4 #Number of hidden units
tol = 1e-4
itmax = 10000
a=0.5 #Learning rate
v, w = training_algorithm(dataset, labels, p, tol, a, itmax)

inputs = [[-1,-1], [-1,1], [1, -1], [1,1]]

expected_results = [0, 1, 1, 0]

cat = application_procedure(v, w, inputs)
cat = np.round(cat)

print("expected results: ", expected_results)
print("results: ", cat)

Escolha o conjunto de dados para teste:

	1 - XOR

Digite a sua opção: 1
expected results:  [0, 1, 1, 0]
results:  [[0.]
 [1.]
 [1.]
 [0.]]
