In [76]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Generating a XOR Neural Net

### Basic Structure

<img src='XORNeuralNetDiagram.png'/>

### Data for XOR

In [165]:
X_train = pd.DataFrame({
    'x1': [0, 0, 1, 1],
    'x2': [0, 1, 0, 1]
})
y_train = pd.DataFrame({
    'y': [0, 1, 1, 0]
})
XoriginalCol = X_train.columns
pd.concat([X_train, y_train], axis = 1)

Unnamed: 0,x1,x2,y
0,0,0,0
1,0,1,1
2,1,0,1
3,1,1,0


### Defining the activation function

In [122]:
def ReLU(x):
    return(np.maximum([np.zeros_like(x)],[x]))

In [306]:
def Identity(x):
    return([x])

### Defining the Forward Propagation function

In [240]:
def forwardPropagation(x, w, func):
    toActivate = np.matmul(x, w)
    activation = func(toActivate)
    return(activation)

In [313]:
#Arquitecture
j = [2,2,1]

w=[]
#Weights
len_w = [X_train.shape[1]+1, X_train.shape[1]+1, X_train.shape[1]]
for node in range(len(j)):
    w.append(np.random.normal(0, 1, size=(len_w[node],j[node])))

#adding bias
bias = pd.Series(np.ones_like(X_train['x1']), name = 'bias')
X_train_in = pd.concat([X_train, bias], axis = 1)

In [314]:
X_train_in

Unnamed: 0,x1,x2,bias
0,0,0,1
1,0,1,1
2,1,0,1
3,1,1,1


In [315]:
w

[array([[ 0.46340106,  1.10509881],
        [ 0.89323539, -0.6322855 ],
        [ 1.56199766,  1.47749094]]), array([[-1.88668892,  0.36552518],
        [ 0.19829845, -1.58008902],
        [-1.08478335,  1.65715283]]), array([[ 1.36335958],
        [-1.22865206]])]

### Forward Propagation Algorithm

In [388]:
activation = X_train_in
act_func = [ReLU, ReLU, Identity]
act_log = [activation]
l=3
for layer in range(l):
    if layer >= l-2:
        print('Layer ', layer, '; Shape of inputs: ', activation.shape, sep = '')
        print('Layer ', layer, '; Shape of weight: ', w[layer].shape, sep = '')
        print('Activation function:', act_func[layer])
        activation = pd.DataFrame(forwardPropagation(np.array(activation), w[layer], act_func[layer])[0])
        act_log.append(activation)
        print('Layer ', layer, '; Shape of outputs: ', activation.shape, sep = '')
    else:
        print('Layer ', layer, '; Shape of inputs: ', activation.shape, sep = '')
        print('Layer ', layer, '; Shape of weight: ', w[layer].shape, sep = '')
        print('Activation function:', act_func[layer])
        activation = pd.DataFrame(forwardPropagation(np.array(activation), w[layer], act_func[layer])[0], columns = XoriginalCol)
        print('Layer ', layer, '; Shape of outputs: ', activation.shape, sep = '')
        bias = pd.Series(np.ones_like(activation.iloc[:,0]), name = 'bias')
        activation = pd.concat([activation, bias], axis = 1)
        act_log.append(activation)
        print('Layer ', layer, '; Shape of outputs + bias: ', activation.shape, sep = '')

Layer 0; Shape of inputs: (4, 3)
Layer 0; Shape of weight: (3, 2)
Activation function: <function ReLU at 0x11756c6a8>
Layer 0; Shape of outputs: (4, 2)
Layer 0; Shape of outputs + bias: (4, 3)
Layer 1; Shape of inputs: (4, 3)
Layer 1; Shape of weight: (3, 2)
Activation function: <function ReLU at 0x11756c6a8>
Layer 1; Shape of outputs: (4, 2)
Layer 2; Shape of inputs: (4, 2)
Layer 2; Shape of weight: (2, 1)
Activation function: <function Identity at 0x1190d4c80>
Layer 2; Shape of outputs: (4, 1)


In [389]:
activation

Unnamed: 0,0
0,0.0
1,-1.497853
2,0.0
3,0.0


In [390]:
error = activation.iloc[:,0] - y_train.iloc[:,0]

In [391]:
error = pd.DataFrame(error)
error

Unnamed: 0,0
0,0.0
1,-2.497853
2,-1.0
3,0.0


In [399]:
lr=0.01
l = 3
layer = 2
delta = error
while layer >= 0:
    prior_activation = np.array(act_log[layer]).T
    if layer == l-1:
        delta = np.array(delta)
    else:
        print(delta.shape)
        print(w[layer].shape)
        print(act_log[layer+1].shape)
        delta = np.matmul(np.array(delta), w[layer].T) * act_log[layer+1] * (1-act_log[layer+1])
    
    print('Weights of layer', layer, w[layer] - lr*np.matmul(prior_activation, delta))
    layer -= 1

Weights of layer 2 [[ 1.36335958]
 [-1.19820067]]
(4, 1)
(3, 2)
(4, 2)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 1)

In [393]:
w

[array([[ 0.46340106,  1.10509881],
        [ 0.89323539, -0.6322855 ],
        [ 1.56199766,  1.47749094]]), array([[-1.88668892,  0.36552518],
        [ 0.19829845, -1.58008902],
        [-1.08478335,  1.65715283]]), array([[ 1.36335958],
        [-1.22865206]])]

In [394]:
act_log

[   x1  x2  bias
 0   0   0     1
 1   0   1     1
 2   1   0     1
 3   1   1     1,          x1        x2  bias
 0  1.561998  1.477491   1.0
 1  2.455233  0.845205   1.0
 2  2.025399  2.582590   1.0
 3  2.918634  1.950304   1.0,      0         1
 0  0.0  0.000000
 1  0.0  1.219103
 2  0.0  0.000000
 3  0.0  0.000000,           0
 0  0.000000
 1 -1.497853
 2  0.000000
 3  0.000000]