In [1]:
import numpy as np
from sklearn.datasets import load_iris

In [2]:
# Construct a list that contains the number of neurons of each layer in your network.
structure = input("Enter the number of neurons of each layer in your network").split(' ')
structure = [int(x) for x in structure]
print(structure)    

[4, 2, 2, 3]


In [20]:
# Construct a list that contains all the bias vectors of your network(_n means it has all vectors)
np.random.seed(123)
B_n = [np.random.randn(l, 1) for l in structure[1:]]
print("bias vector of each layer\n")
for B in B_n:
    print(B)
    print()

bias vector of each layer

[[-1.0856306 ]
 [ 0.99734545]]

[[ 0.2829785 ]
 [-1.50629471]]

[[-0.57860025]
 [ 1.65143654]
 [-2.42667924]]



In [21]:
# Construct a list that contains all the weight matrices of your network(_n means it has all matrices)
np.random.seed(123)
W_n = [np.random.randn(l, next_l) for l, next_l in zip(structure[:-1], structure[1:])]
print("weight matrix of each layer\n")
for W in W_n:
    print(W)
    print()

weight matrix of each layer

[[-1.0856306   0.99734545]
 [ 0.2829785  -1.50629471]
 [-0.57860025  1.65143654]
 [-2.42667924 -0.42891263]]

[[ 1.26593626 -0.8667404 ]
 [-0.67888615 -0.09470897]]

[[ 1.49138963 -0.638902   -0.44398196]
 [-0.43435128  2.20593008  2.18678609]]



##### Load training and test datasets

In [5]:
load_iris().keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [6]:
data = load_iris()['data']
target = load_iris()['target']

print(type(data), type(target))
print(data.shape, target.shape)
print(len(data), len(target))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(150, 4) (150,)
150 150


In [7]:
# Just pick up 5 indexes out of dataset randomly
indexes = [0, 52, 108, 49, 142, 88]

data = data[indexes]
target = target[indexes]

print(data)
print()
print(target)

[[5.1 3.5 1.4 0.2]
 [6.9 3.1 4.9 1.5]
 [6.7 2.5 5.8 1.8]
 [5.  3.3 1.4 0.2]
 [5.8 2.7 5.1 1.9]
 [5.6 3.  4.1 1.3]]

[0 1 2 0 2 1]


In [8]:
X = data
print(X)
print()

Y = np.zeros((6, 3))
for i in range(len(Y)):
    for j in range(len(Y[i])):
        Y[i, target[i]] = 1
print(Y)

[[5.1 3.5 1.4 0.2]
 [6.9 3.1 4.9 1.5]
 [6.7 2.5 5.8 1.8]
 [5.  3.3 1.4 0.2]
 [5.8 2.7 5.1 1.9]
 [5.6 3.  4.1 1.3]]

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]]


In [9]:
# B_n is a list of 3 np.ndarrays with (2, 1),(2, 1),(3, 1)
# W_n is a list of 3 np.ndarrays with (4, 2),(2, 2),(2, 3)

# X is a np.ndarray with shape (6,4)
# Y is a np.ndarray with shape (6,3)

# Z_n is a list of 3 np.ndarrays with (2,1),(2,1),(3,1)
# A_n is a list of 3 np.ndarrays with (2,1),(2,1),(3,1)

# e_Je_B_ns a list of 3 np.ndarrays with (2,1), (2,1), (3,1)
# e_Je_W_ns a list of 3 np.ndarrays with (4,2), (2,2), (2,3)

In [10]:
# Provide sigmoid and sigmoid_derivative function
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1.0 - sigmoid(x))

In [26]:
# Set the output layer to H and let L begin from the first hidden layer(not from the input layer)
H = len(structure) - 2
learning_rate = 10

for x, y in zip(X, Y):     # x의 shape:(4,),  y의 shape:(3,)
    
    # Forward Propagate
    # Create two lists that contain pre and post activation vector of each layer, respectively
    Z_n, A_n = [], []
    
    for i, (b, W) in enumerate(zip(B_n, W_n)):
        if i == 0:
            z = np.dot(np.array(W).T, x).reshape(-1, 1) + np.array(b)
        else:
            z = np.dot(np.array(W).T, a).reshape(-1, 1) + np.array(b)
        a = sigmoid(z)

        Z_n.append(z)
        A_n.append(a)
    # print(Z_n) 
    # print()  
    
    
    # Backpropagate
    # Initialize a list called e_Je_W_ns that will contain e_Je_W matrices for each layer
    e_Je_W_ns = [np.zeros(W.shape) for W in W_n]    # (4, 2), (2, 2), (2, 3)
    #for x in e_Je_W_ns:
    #    print(x.shape)
    #print()

    # Initialize a list called e_Je_B_ns that will contain e_Je_B vectors for each layer
    e_Je_B_ns = [np.zeros(B.shape) for B in B_n]    # (2, 1), (2, 1), (3, 1)
    
    for L in range(H, -1, -1):
        if L != H:
            delta = sigmoid_derivative(Z_n[L]) * np.dot(W_n[L+1], delta)
        else:
            delta = sigmoid_derivative(Z_n[L]) * (A_n[L] - y.reshape(-1, 1))
            
        e_Je_B_ns[L] = delta
        # print(f"{L} : {delta}")
        
        if L != 0:
            e_Je_W_ns[L] = np.dot(A_n[L-1], delta.T)
        else:
            e_Je_W_ns[L] = np.dot(x.reshape(-1, 1), delta.T)
    
    """
    for x in e_Je_W_ns:
        print(x)
    print()
    """
    
    for i, (wn, ejew, bn, ejeb) in enumerate(zip(W_n, e_Je_W_ns, B_n, e_Je_B_ns)):
        W_n[i] -= learning_rate/len(X) * ejew
        B_n[i] -= learning_rate/len(X) * ejeb
        print(wn)
        print(ejew)
        print(bn)
        print(ejeb)
        print()

[[-1.08465082  0.98290076]
 [ 0.2836509  -1.51620773]
 [-0.57833129  1.64747133]
 [-2.42664082 -0.42947909]]
[[-5.87872410e-04  8.66681145e-03]
 [-4.03441850e-04  5.94781178e-03]
 [-1.61376740e-04  2.37912471e-03]
 [-2.30538200e-05  3.39874959e-04]]
[[-1.08543849]
 [ 0.99451316]]
[[-0.00011527]
 [ 0.00169937]]

[[ 1.26603976 -0.86681109]
 [-0.57806606 -0.16356601]]
[[-6.21037426e-05  4.24149591e-05]
 [-6.04920568e-02  4.13142269e-02]]
[[ 0.38863094]
 [-1.57845211]]
[[-0.06339147]
 [ 0.04329444]]

[[ 1.57850233 -0.7119062  -0.44969403]
 [-0.39859391  2.17596387  2.18444144]]
[[-0.05226762  0.04380252  0.00342724]
 [-0.02145442  0.01797973  0.00140679]]
[[-0.36617398]
 [ 1.47341416]
 [-2.44060825]]
[[-0.12745576]
 [ 0.10681343]
 [ 0.0083574 ]]

[[-1.08465151  0.98291083]
 [ 0.28365059 -1.51620321]
 [-0.57833178  1.64747848]
 [-2.42664097 -0.4294769 ]]
[[ 4.17393938e-07 -6.04316926e-06]
 [ 1.87524813e-07 -2.71504706e-06]
 [ 2.96410188e-07 -4.29152600e-06]
 [ 9.07378126e-08 -1.31373245e-06

In [17]:
len(X)

6

In [25]:
wn

array([[ 1.49138963, -0.638902  , -0.44398196],
       [-0.43435128,  2.20593008,  2.18678609]])

In [27]:
for x in W_n:
    print(x)
    print()

[[-1.08351899  0.96795702]
 [ 0.28439881 -1.52608556]
 [-0.57801736  1.64333696]
 [-2.42659682 -0.43005548]]

[[ 1.26616273 -0.86689311]
 [-0.74103299 -0.20884277]]

[[ 1.31657222 -0.9086986  -0.36094287]
 [-0.47968939  2.10309494  2.214709  ]]



In [None]:
[[-1.0856306   0.99734545]
 [ 0.2829785  -1.50629471]
 [-0.57860025  1.65143654]
 [-2.42667924 -0.42891263]]

[[ 1.26593626 -0.8667404 ]
 [-0.67888615 -0.09470897]]

[[ 1.49138963 -0.638902   -0.44398196]
 [-0.43435128  2.20593008  2.18678609]]

In [37]:
for x in e_Je_W_ns:
    print(x)
    print()

[[ 2.68546237e-06 -4.26149872e-05]
 [ 1.43864056e-06 -2.28294574e-05]
 [ 1.96614210e-06 -3.12002585e-05]
 [ 6.23410908e-07 -9.89276488e-06]]

[[ 3.27416513e-07 -7.50649946e-08]
 [ 4.55425009e-02 -1.04412803e-02]]

[[ 0.04896634 -0.00734351  0.00337623]
 [ 0.02042727 -0.00306349  0.00140846]]

