In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Generating a XOR Neural Net

### Basic Structure

<img src='XORNeuralNetDiagram.png'/>

### Data for XOR

In [86]:
X_train = pd.DataFrame({
    'x1': [0, 0, 1, 1],
    'x2': [0, 1, 0, 1]
})
y_train = pd.DataFrame({
    'y': [0, 1, 1, 0]
})
XoriginalCol = X_train.columns
pd.concat([X_train, y_train], axis = 1)

Unnamed: 0,x1,x2,y
0,0,0,0
1,0,1,1
2,1,0,1
3,1,1,0


### Defining the activation function

In [87]:
def ReLU(x):
    return(np.maximum([np.zeros_like(x)],[x]))

In [88]:
def ReLU_der(x):
    x[x <= 0] = 0
    x[x > 0] = 1
    return x

In [89]:
def Identity(x):
    return([x])

### Defining the Forward Propagation function

In [90]:
def forwardPropagation(x, w, func):
    toActivate = np.matmul(x, w)
    activation = func(toActivate)
    return(activation)

In [91]:
#Arquitecture
j = [2,2,1]

w=[]
#Weights
len_w = [X_train.shape[1]+1, X_train.shape[1]+1, X_train.shape[1]+1]
for node in range(len(j)):
    w.append(np.random.normal(0, 1, size=(len_w[node],j[node])))

#adding bias
bias = pd.Series(np.ones_like(X_train['x1']), name = 'bias')
X_train_in = pd.concat([X_train, bias], axis = 1)

In [92]:
X_train_in

Unnamed: 0,x1,x2,bias
0,0,0,1
1,0,1,1
2,1,0,1
3,1,1,1


In [93]:
w

[array([[-0.3735404 , -0.44550593],
        [ 1.67399066,  1.69992907],
        [-0.35282506,  0.89975651]]), array([[-0.21110192, -0.41457579],
        [-0.54617198,  0.04404761],
        [ 1.11159994,  1.58866562]]), array([[1.51219425],
        [1.01351012],
        [0.50142959]])]

### Forward Propagation Algorithm

In [101]:
activation = X_train_in
act_func = [ReLU, ReLU, Identity]
act_log = [activation]
l=3
for layer in range(l):
    if layer >= l-1:
        print('Layer ', layer, '; Shape of inputs: ', activation.shape, sep = '')
        print('Layer ', layer, '; Shape of weight: ', w[layer].shape, sep = '')
        print('Activation function:', act_func[layer])
        activation = pd.DataFrame(forwardPropagation(np.array(activation), w[layer], act_func[layer])[0])
        activation_log = activation
        bias = pd.Series(np.ones(activation_log.shape[0]), name = 'bias')
        activation_log = pd.concat([activation_log, bias], axis = 1)
        act_log.append(activation_log)
        print('Layer ', layer, '; Shape of outputs: ', activation.shape, sep = '')
    else:
        print('Layer ', layer, '; Shape of inputs: ', activation.shape, sep = '')
        print('Layer ', layer, '; Shape of weight: ', w[layer].shape, sep = '')
        print('Activation function:', act_func[layer])
        activation = pd.DataFrame(forwardPropagation(np.array(activation), w[layer], act_func[layer])[0], columns = XoriginalCol)
        print('Layer ', layer, '; Shape of outputs: ', activation.shape, sep = '')
        bias = pd.Series(np.ones_like(activation.iloc[:,0]), name = 'bias')
        activation = pd.concat([activation, bias], axis = 1)
        activation_log = activation
        bias = pd.Series(np.ones(activation_log.shape[0]), name = 'bias')
        activation_log = pd.concat([activation_log, bias], axis = 1)
        act_log.append(activation_log)
        print('Layer ', layer, '; Shape of outputs + bias: ', activation.shape, sep = '')

Layer 0; Shape of inputs: (4, 3)
Layer 0; Shape of weight: (3, 2)
Activation function: <function ReLU at 0x11f7e2510>
Layer 0; Shape of outputs: (4, 2)
Layer 0; Shape of outputs + bias: (4, 3)
Layer 1; Shape of inputs: (4, 3)
Layer 1; Shape of weight: (3, 2)
Activation function: <function ReLU at 0x11f7e2510>
Layer 1; Shape of outputs: (4, 2)
Layer 1; Shape of outputs + bias: (4, 3)
Layer 2; Shape of inputs: (4, 3)
Layer 2; Shape of weight: (3, 1)
Activation function: <function Identity at 0x11f7e2a60>
Layer 2; Shape of outputs: (4, 1)


In [95]:
activation

Unnamed: 0,0
0,3.089556
1,1.672492
2,3.437618
3,1.809557


In [96]:
error = activation.iloc[:,0] - y_train.iloc[:,0]

In [97]:
error = pd.DataFrame(error)
error

Unnamed: 0,0
0,3.089556
1,0.672492
2,2.437618
3,1.809557


In [98]:
lr=0.01
l = 4
layer = 4
delta = error

prior_activation_w = act_log[layer-2].drop('bias', axis = 1)
prior_activation_b = act_log[layer-2]['bias']

update_w = np.matmul(np.array(prior_activation_w).T, np.array(delta))
update_b = np.matmul(np.array(prior_activation_b).T, np.array(delta))

print(update_w)
print(update_b)

print(w[2][0:len_w[2]-1] - lr*(update_w))
print(w[2][len_w[2]-1] - lr*(update_b))

#print('Weights of layer', layer, w[layer] - lr*np.matmul(prior_activation, delta))

[[ 4.0209609 ]
 [12.06465925]]
[[8.00922293]
 [8.00922293]]
[[1.47198464]
 [0.89286353]]
[[0.42133736]
 [0.42133736]]


In [99]:
w[2][len_w[2]-1]

array([0.50142959])

In [105]:
act_log[0]

Unnamed: 0,x1,x2,bias
0,0,0,1
1,0,1,1
2,1,0,1
3,1,1,1


In [76]:
w

[array([[-0.45178646,  0.33466487],
        [-0.05293319,  0.60469474],
        [-1.10395047,  0.55280368]]), array([[-0.01079013,  0.5004649 ],
        [ 0.61371013,  0.22921385],
        [-0.96514772, -1.17031305]]), array([[ 0.30469903],
        [-0.66990628],
        [ 0.10246307]])]

In [71]:
len_w

[3, 3, 3]