In [1]:
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt

In [2]:
# Construct a list that contains the number of neurons of each layer in your network.
structure = input("Enter the number of neurons of each layer in your network").split(' ')
structure = [int(x) for x in structure]
print(structure)    

[4, 5, 5, 3]


In [3]:
# Construct a list that contains all the bias vectors of your network(_n means it has all vectors)
np.random.seed(123)
B_n = [np.random.randn(l, 1) for l in structure[1:]]
print("bias vector of each layer\n")
for B in B_n:
    print(B)
    print()

bias vector of each layer

[[-1.0856306 ]
 [ 0.99734545]
 [ 0.2829785 ]
 [-1.50629471]
 [-0.57860025]]

[[ 1.65143654]
 [-2.42667924]
 [-0.42891263]
 [ 1.26593626]
 [-0.8667404 ]]

[[-0.67888615]
 [-0.09470897]
 [ 1.49138963]]



In [4]:
# Construct a list that contains all the weight matrices of your network(_n means it has all matrices)
np.random.seed(123)
W_n = [np.random.randn(l, next_l) for l, next_l in zip(structure[:-1], structure[1:])]
print("weight matrix of each layer\n")
for W in W_n:
    print(W)
    print()

weight matrix of each layer

[[-1.0856306   0.99734545  0.2829785  -1.50629471 -0.57860025]
 [ 1.65143654 -2.42667924 -0.42891263  1.26593626 -0.8667404 ]
 [-0.67888615 -0.09470897  1.49138963 -0.638902   -0.44398196]
 [-0.43435128  2.20593008  2.18678609  1.0040539   0.3861864 ]]

[[ 0.73736858  1.49073203 -0.93583387  1.17582904 -1.25388067]
 [-0.6377515   0.9071052  -1.4286807  -0.14006872 -0.8617549 ]
 [-0.25561937 -2.79858911 -1.7715331  -0.69987723  0.92746243]
 [-0.17363568  0.00284592  0.68822271 -0.87953634  0.28362732]
 [-0.80536652 -1.72766949 -0.39089979  0.57380586  0.33858905]]

[[-0.01183049  2.39236527  0.41291216]
 [ 0.97873601  2.23814334 -1.29408532]
 [-1.03878821  1.74371223 -0.79806274]
 [ 0.02968323  1.06931597  0.89070639]
 [ 1.75488618  1.49564414  1.06939267]]



In [8]:
load_iris().keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [9]:
data = load_iris()['data']
target = load_iris()['target']

print(type(data), type(target))
print(data.shape, target.shape)
print(len(data), len(target))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(150, 4) (150,)
150 150


In [10]:
# Just pick up 5 indexes out of dataset randomly
ind = [73, 41, 106]

data = data[ind]
target = target[ind]

print(data)
print()
print(target)

[[6.1 2.8 4.7 1.2]
 [4.5 2.3 1.3 0.3]
 [4.9 2.5 4.5 1.7]]

[1 0 2]


In [11]:
kind_labels, count_labels = np.unique(target, return_counts=True)
num_labels = len(kind_labels)
print(f"label의 종류 : {kind_labels},  label종류의 갯수 : {num_labels}")

label의 종류 : [0 1 2],  label종류의 갯수 : 3


In [12]:
one_hot_labels = np.zeros((len(target), num_labels))
for i in range(len(one_hot_labels)):
    for j in range(len(one_hot_labels[i])):
        one_hot_labels[i, target[i]] = 1
print(one_hot_labels)

[[0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]]


In [14]:
# B_n is a list of 3 np.ndarrays with (2, 1),(2, 1),(3, 1)
# W_n is a list of 3 np.ndarrays with (4, 2),(2, 2),(2, 3)

# X is a np.ndarray with shape (150,4)
# Y is a np.ndarray with shape (150,3)

# Z_n is a list of 3 np.ndarrays with (2,1),(2,1),(3,1)
# A_n is a list of 3 np.ndarrays with (2,1),(2,1),(3,1)

# e_Je_B_ns a list of 3 np.ndarrays with (2,1), (2,1), (3,1)
# e_Je_W_ns a list of 3 np.ndarrays with (4,2), (2,2), (2,3)

In [13]:
# Provide activation and activation_derivative funcitions
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1.0 - sigmoid(x))

def softmax(x):
    exp_total = np.sum([np.exp(i) for i in x])
    exp_x = np.array([np.exp(i)/exp_total for i in x])
    return exp_x

In [26]:
# Set the output layer to H and let L begin from the first hidden layer(not from the input layer)
H = len(structure) - 2
learning_rate = 0.10
epochs = 1

all_epoch_errors = []

for epoch in range(epochs):
    # Create a list that contains errors of all data points for each epoch
    per_epoch_errors = []
    
    W_n = [np.random.randn(l, next_l) for l, next_l in zip(structure[:-1], structure[1:])]
    B_n = [np.random.randn(l, 1) for l in structure[1:]]
    
    for x, y in zip(data, one_hot_labels):       # x : (4,), y : (3,)
        # Forward Propagate
        # Create two lists that contain pre and post activation vector of each layer, respectively
        Z_n, A_n = [], []
        
        for i, (b, w) in enumerate(zip(B_n, W_n)):
            if i == 0:
                z = np.dot(w.T, x).reshape(-1,1) + np.array(b)                
                a = sigmoid(z)
            elif (i > 0) and (i < H):
                z = np.dot(w.T, a)
                a = sigmoid(z)
            else:
                z = np.dot(w.T, a)
                a = softmax(z)

            Z_n.append(z)
            A_n.append(a)
            
        # Backpropagate
        e_Je_W_ns = [np.zeros(w.shape) for w in W_n]
        e_Je_B_ns = [np.zeros(b.shape) for b in B_n]
        
        for L in range(H, -1, -1):
            if L != H:
                delta = sigmoid_derivative(Z_n[L]) * np.dot(W_n[L+1], delta)
            else:
                delta = A_n[L] - y.reshape(-1,1)
            
            e_Je_B_ns = delta
            
            if L != 0:
                e_Je_W_ns[L] = np.dot(A_n[L-1], delta.T)
            else:
                e_Je_W_ns[L] = np.dot(x.reshape(-1,1), delta.T)
                
            for i, (wn, ejew, bn, ejeb) in enumerate(zip(W_n, e_Je_W_ns, B_n, e_Je_B_ns)):
                W_n[i] -= learning_rate/len(data) * ejew
            B_n[i] -= learning_rate/len(data) * ejeb
            """
            print(wn)
            print(ejew)
            print(bn)
            print(ejeb)
            print()
            """    
    
            datapoint_error = calculate_loss(A_n[-1], y)
            per_epoch_errors.append(datapoint_error)
        total_errors = np.sum(per_epoch_errors)
        all_epoch_errors.append(total_errors)
        print(f"{epoch:3d}th epoch total error : {total_errors:.3f}")

NameError: name 'calculate_loss' is not defined

In [17]:
for z in Z_n:
    print(z)
    print()

[[  1.83154471]
 [ -0.96947549]
 [  6.09361791]
 [-15.10743014]
 [ -3.23182179]]

[[ 1.95534369]
 [-2.13481311]
 [ 1.30590787]
 [ 2.54368828]
 [-0.56127734]]

[[ 1.87108937]
 [ 0.37920913]
 [-3.28865198]]



In [18]:
for a in A_n:
    print(a)
    print()

[[8.61945643e-01]
 [2.74985060e-01]
 [9.97747856e-01]
 [2.74742811e-07]
 [3.79856179e-02]]

[[0.87602815]
 [0.10575893]
 [0.78682759]
 [0.92714834]
 [0.36325196]]

[[0.81255067]
 [0.1827827 ]
 [0.00466663]]



In [6]:
qb = np.array([[-0.67888615],[-0.09470897],[ 1.49138963]])

qw = np.array([[-0.01183049, 2.39236527, 0.41291216],
               [ 0.97873601, 2.23814334,-1.29408532],
               [-1.03878821, 1.74371223, -0.79806274],
               [ 0.02968323, 1.06931597, 0.89070639],
               [ 1.75488618, 1.49564414, 1.06939267]])

qa = np.array()

In [7]:
qw

array([[-0.01183049,  2.39236527,  0.41291216],
       [ 0.97873601,  2.23814334, -1.29408532],
       [-1.03878821,  1.74371223, -0.79806274],
       [ 0.02968323,  1.06931597,  0.89070639],
       [ 1.75488618,  1.49564414,  1.06939267]])

In [19]:
qr = np.array([[ 1.87108937],[ 0.37920913], [-3.28865198]])
softmax(qr)

array([[0.81255067],
       [0.1827827 ],
       [0.00466663]])

In [21]:
np.exp(1.87108937)+np.exp(0.37920913)+np.exp(-3.28865198)

7.993801077857318

In [23]:
np.exp(1.87108937) / 7.993801077857318

0.8125506680475049

In [157]:
for z in Z_n:
    print(z)
print()
    
for a in A_n:
    print(a)

[[ -3.08463061]
 [-10.99406957]
 [  1.47841105]]
[[ 0.20035037]
 [-0.61525204]
 [-1.41450794]]

[[4.37456981e-02]
 [1.68007611e-05]
 [8.14332459e-01]]
[[0.60927625]
 [0.26952722]
 [0.12119653]]


In [159]:
w1 = np.exp(4.37456981e-02)
w2 = np.exp(1.68007611e-05)
w3 = np.exp(8.14332459e-01)
w = w1 + w2 + w3
print(w1/w, w2/w, w3/w)

0.24282174500874198 0.23243223430314788 0.5247460206881103


In [145]:
q1 = np.array([[ 1.49073203, -1.25388067, -1.4286807,  -0.25561937],
               [-0.93583387, -0.6377515,  -0.14006872, -2.79858911],
               [ 1.17582904,  0.9071052,  -0.8617549,  -1.7715331 ]])
q2 = np.array([[6.1], [2.8], [4.7], [1.2]])
q3 = np.array([[-0.39089979],[ 0.57380586],[ 0.33858905]])

np.dot(q1, q2) + q3

array([[ -1.82984282],
       [-10.93711486],
       [  3.874953  ]])

In [125]:
np.dot(np.array([[ 0.17872861, -0.6912346, -0.7062834, -1.16791303],[1.48206927, 0.64496092, -1.64362799, -0.18604671], [-1.62516337, 0.58674198, 0.05285727, -0.81424207]]), 
       np.array([[6.1], [2.8], [4.7], [1.2]])) + np.array([[0.82334188],[1.95710475],[0.28557921]])

array([[-4.7428981 ],
       [ 4.85531027],
       [-8.71370112]])

In [60]:
data[0]

array([6.5, 3.2, 5.1, 2. ])

In [37]:
for w in W_n:
    print(w)
    print()

[[-0.83029444  1.74819998  0.15065959]
 [-0.64190703 -0.23828275  0.73563826]
 [ 1.14152941 -0.40749578  0.96474426]
 [ 0.34891558  0.46111748 -1.33873322]]

[[ 1.31356114 -0.37492699 -2.61059038]
 [-0.62066217  0.02652033 -3.25676797]
 [ 2.15103401 -0.22474109 -0.19109542]]



In [39]:
for b in B_n:
    print(b)
    print()

[[-0.30884409]
 [-0.29061399]
 [ 0.70296863]]

[[-0.65857087]
 [-0.41272655]
 [-0.51388702]]



In [None]:
data[0]

In [42]:
# Provide softmax and softmax_derivative function
def softmax(x):
    exp_sum = np.sum([np.exp(i) for i in x])
    softmax_x = np.array([np.exp(i)/exp_sum for i in x])
    return softmax_x

In [59]:
# Calculate the Loss
def calculate_loss(actual, expected):
    loss = - np.dot(actual.T, expected)
    return loss

In [52]:
a = np.array([2,5,1]).reshape(-1,1)
y = np.array([0,1,0]).reshape(-1,1)

exp_sum = np.sum([np.exp(i)[0] for i in a])
print(exp_sum)
softmax_x = [np.exp(i[0])/exp_sum for i in a]
print(softmax_x)

158.5204970299663
[0.04661262257797389, 0.9362395518765058, 0.01714782554552039]


In [62]:
 - np.dot(np.array([0,1,0]).T, np.array([0.04661262257797389, 0.9362395518765058, 0.01714782554552039]))

-0.9362395518765058

In [53]:
[np.exp(i)[0] for i in a]

[7.38905609893065, 148.4131591025766, 2.718281828459045]

In [57]:
2.718281828459045 /158.5204970299663

0.01714782554552039

In [63]:
softmax(a)

array([[0.04661262],
       [0.93623955],
       [0.01714783]])

In [66]:
calculate_loss(softmax(a), y)[0][0]

-0.9362395518765058