In [1]:
import numpy as np
from sklearn.datasets import load_iris

In [2]:
# Construct a list that contains the number of neurons of each layer in your network.
structure = input("Enter the number of neurons of each layer in your network").split(' ')
structure = [int(x) for x in structure]
print(structure)    

[4, 2, 2, 3]


In [9]:
# Construct a list that contains all the bias vectors of your network(_n means it has all vectors)
np.random.seed(123)
B_n = [np.random.randn(l, 1) for l in structure[1:]]
print("bias vector of each layer\n")
for B in B_n:
    print(B)
    print()

bias vector of each layer

[[-1.0856306 ]
 [ 0.99734545]]

[[ 0.2829785 ]
 [-1.50629471]]

[[-0.57860025]
 [ 1.65143654]
 [-2.42667924]]



In [10]:
# Construct a list that contains all the weight matrices of your network(_n means it has all matrices)
np.random.seed(123)
W_n = [np.random.randn(l, next_l) for l, next_l in zip(structure[:-1], structure[1:])]
print("weight matrix of each layer\n")
for W in W_n:
    print(W)
    print()

weight matrix of each layer

[[-1.0856306   0.99734545]
 [ 0.2829785  -1.50629471]
 [-0.57860025  1.65143654]
 [-2.42667924 -0.42891263]]

[[ 1.26593626 -0.8667404 ]
 [-0.67888615 -0.09470897]]

[[ 1.49138963 -0.638902   -0.44398196]
 [-0.43435128  2.20593008  2.18678609]]



##### Don't forget W_n & B_n is of list where a set of numpy.array are contained.

##### Load training and test datasets

In [11]:
load_iris().keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [12]:
data = load_iris()['data']
target = load_iris()['target']

print(type(data), type(target))
print(data.shape, target.shape)
print(len(data), len(target))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(150, 4) (150,)
150 150


In [13]:
# Just pick up 5 indexes out of dataset randomly
indexes = [0, 52, 108, 49, 142, 88]

data = data[indexes]
target = target[indexes]

print(data)
print(target)

[[5.1 3.5 1.4 0.2]
 [6.9 3.1 4.9 1.5]
 [6.7 2.5 5.8 1.8]
 [5.  3.3 1.4 0.2]
 [5.8 2.7 5.1 1.9]
 [5.6 3.  4.1 1.3]]
[0 1 2 0 2 1]


In [14]:
# Randomly shuffle the data
np.random.seed(123)
np.random.shuffle(data)

np.random.seed(123)
np.random.shuffle(target)

print(data)
print(target)   #2

[[6.9 3.1 4.9 1.5]
 [5.  3.3 1.4 0.2]
 [5.8 2.7 5.1 1.9]
 [5.1 3.5 1.4 0.2]
 [6.7 2.5 5.8 1.8]
 [5.6 3.  4.1 1.3]]
[1 0 2 0 2 1]


##### Forward Pass

In [15]:
# Provide sigmoid and sigmoid_derivative function
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1.0 - sigmoid(x))

In [34]:
chosen = 5
X = data[chosen]
print(X)

y = np.array([0, 0, 0])
y[target[chosen]] = 1
y = y.reshape(-1, 1)
print(y)

[5.6 3.  4.1 1.3]
[[0]
 [1]
 [0]]


In [26]:
np.dot(W_n[0].T, X).reshape(-1,1) + B_n[0]

array([[-11.84317054],
       [  8.27689919]])

In [27]:
# Create two lists that contain pre and post activation vector of each layer, respectively
Z_n, A_n = [], []

# Forward pass layer by layer from L=0(First hidden layer) thru L=H(Output layer)
for i, (b, W) in enumerate(zip(B_n, W_n)):
    if i == 0:
        z = np.dot(np.array(W).T, X).reshape(-1, 1) + np.array(b)
    else:
        z = np.dot(np.array(W).T, a).reshape(-1, 1) + np.array(b)
    a = sigmoid(z)

    Z_n.append(z)
    A_n.append(a)

In [28]:
for z, a in zip(Z_n, A_n):
    print(np.round(z, 4))
    print(np.round(a, 4))
    print()

[[-11.8432]
 [  8.2769]]
[[0.    ]
 [0.9997]]

[[-0.3957]
 [-1.601 ]]
[[0.4023]
 [0.1678]]

[[-0.0515]
 [ 1.7646]
 [-2.2383]]
[[0.4871]
 [0.8538]
 [0.0964]]



##### Backpropagation 

In [31]:
# Initialize a list called e_Je_W_ns that will contain e_Je_W matrices for each layer in the network.
e_Je_W_ns = [np.zeros(W.shape) for W in W_n]    # (4, 2), (2, 2), (2, 3)

# Initialize a list called e_Je_B_ns that will contain e_Je_B vectors for each layer in the network.
e_Je_B_ns = [np.zeros(B.shape) for B in B_n]    # (2, 1), (2, 1), (3, 1)

In [36]:
y, A_n[2]

(array([[0],
        [1],
        [0]]),
 array([[0.48713827],
        [0.85378893],
        [0.0963659 ]]))

In [43]:
np.dot(W_n[2], sigmoid_derivative(Z_n[2]) * (A_n[2] - y))

array([[ 0.18944364],
       [-0.07477456]])

In [44]:
H = len(structure) - 2

for L in range(H, -1, -1):
    if L != H:
        delta = sigmoid_derivative(Z_n[L]) * np.dot(W_n[L+1], delta)
    else:
        delta = sigmoid_derivative(Z_n[L]) * (A_n[L] - y)
    e_Je_B_ns[L] = delta
    # print(f"{L} : {delta}")

2 : [[ 0.12170398]
 [-0.01825202]
 [ 0.0083915 ]]
1 : [[ 0.04555408]
 [-0.01044394]]
0 : [[ 4.79546853e-07]
 [-7.60981914e-06]]


In [54]:
H = len(structure) - 2
deltas = []
e_Je_W_n, e_Je_B_n = [], []

for L in range(H, -1, -1):
    if L == H:
        delta = sigmoid_derivative(Z_n[L]) * (A_n[L] - y)                
    else:
        delta = sigmoid_derivative(Z_n[L]) * np.dot(W_n[L+1], delta)    
    
    if L == 0:
        e_Je_W = np.dot(X.reshape(-1,1), delta.reshape(-1,1).T)
    else:              
        e_Je_W = np.dot(A_n[L-1].reshape(-1,1), delta.reshape(-1,1).T)
    e_Je_B = delta
    
    deltas.append(delta)
    e_Je_W_n.append(e_Je_W)
    e_Je_B_n.append(e_Je_B)

(2, 1)

(1, 3)


In [57]:
print(deltas)
print(e_Je_B_n)
print(e_Je_W_n)

[array([ 0.02341772, -0.03778126,  0.14210594]), array([ 5.67217781e-06, -3.31249893e-07])]
[array([ 0.02341772, -0.03778126,  0.14210594]), array([ 5.67217781e-06, -3.31249893e-07])]
[array([[ 0.02341685, -0.03777985,  0.14210064],
       [ 0.02341766, -0.03778117,  0.14210559]]), array([[ 3.91380269e-05, -2.28562426e-06],
       [ 1.75837512e-05, -1.02687467e-06],
       [ 2.77936713e-05, -1.62312447e-06],
       [ 8.50826672e-06, -4.96874839e-07]])]


In [58]:
deltas, e_Je_W_n, e_Je_B_n = deltas[::-1], e_Je_W_n[::-1], e_Je_B_n[::-1]

print(deltas)
print()
print(e_Je_B_n)
print()
print(e_Je_W_n)

[array([ 5.67217781e-06, -3.31249893e-07]), array([ 0.02341772, -0.03778126,  0.14210594])]

[array([ 5.67217781e-06, -3.31249893e-07]), array([ 0.02341772, -0.03778126,  0.14210594])]

[array([[ 3.91380269e-05, -2.28562426e-06],
       [ 1.75837512e-05, -1.02687467e-06],
       [ 2.77936713e-05, -1.62312447e-06],
       [ 8.50826672e-06, -4.96874839e-07]]), array([[ 0.02341685, -0.03777985,  0.14210064],
       [ 0.02341766, -0.03778117,  0.14210559]])]


In [36]:
sigmoid_derivative(Z_n[1]) * (A_n[1] - y)  

array([ 0.02341772, -0.03778126,  0.14210594])

In [38]:
deltas[0] = sigmoid_derivative(Z_n[1]) * (A_n[1] - y) 

IndexError: list assignment index out of range

In [41]:
print(deltas)

[array([ 0.02341772, -0.03778126,  0.14210594]), array([ 5.67217781e-06, -3.31249893e-07])]


In [42]:
deltas[::-1]

[array([ 5.67217781e-06, -3.31249893e-07]),
 array([ 0.02341772, -0.03778126,  0.14210594])]

In [None]:
e_Je_W, e_Je_B = [], []

for L in range(H, -1, -1):
    if L == H:
        e_Je_W       

In [None]:
for L in range(H, -1, -1):
            if L != H:
                delta = sigmoid_derivative(Z_n[L]) * np.dot(self.W_n[L+1], delta)
            else:
                delta = sigmoid_derivative(Z_n[L]) * (A_n[L] - y)
                
if L != 0:
                e_Je_W_ns[L] = np.dot(A_n[L-1], delta.T)    
            else:
                e_Je_W_ns[L] = np.dot(x, delta.T) 

In [54]:
B_n

[array([[-0.07351368],
        [ 1.81403277],
        [-0.44199761]]),
 array([[ 1.38951142],
        [-1.07746533],
        [ 0.23848917]])]

In [14]:
q = np.array([[ 0.08290516,  1.15085031], [-0.46114854, -0.34309745],
              [-0.01330717, -1.15759124], [-1.68601068, -0.13406605]])

r = np.array([-0.6379268, -0.59526757])

t = np.dot(q.T, X) + r
u = sigmoid(t)

print(t)
print(u)

[-4.08966282  0.40870132]
[0.01646911 0.60077644]
