# Single Layer Neural Network

이번 시간에는 딥러닝 알고리즘의 가장 기본이 되는 인공신경망(artificial neural network, ANN), 그 중에서도 single-layer neural network 모델을 구현합니다. 오늘은 크게 크게 세 가지 방식, 1) Random Search, 2) h-step Search, 3) Gradient Descent 로 모델을 학습하는 법을 배우며, 이 중에 어떤 것이 가장 좋고 어떤 것을 선택해야하는지를 배웁니다.


In [1]:
import numpy as np

## Case 1 - 0.3 x X1 + 0.5 x X2

### Load Dataset

In [2]:
x1 = np.random.rand(100)

print(x1.shape)
x1[:10]

(100,)


array([ 0.87501009,  0.93544611,  0.22861649,  0.83660367,  0.52134873,
        0.07942708,  0.61426657,  0.54570074,  0.79643769,  0.47304706])

In [3]:
x2 = np.random.rand(100)

print(x2.shape)
x2[:10]

(100,)


array([ 0.12552812,  0.17650489,  0.60231202,  0.43837184,  0.27706285,
        0.21046588,  0.23374773,  0.40589112,  0.64111608,  0.72430626])

In [4]:
y = 0.3 * x1 + 0.5 * x2 + 0.1

print(y.shape)
y[:10]

(100,)


array([ 0.42526709,  0.46888628,  0.46974096,  0.57016702,  0.39493604,
        0.22906107,  0.40115384,  0.46665578,  0.65948935,  0.60406725])

### First idea: Random Search

In [5]:
num_epoch = 10000

best_error = np.inf
best_epoch = None
best_w1 = None
best_w2 = None
best_b = None

for epoch in range(num_epoch):
    w1 = np.random.uniform(low=-1.0, high=1.0)
    w2 = np.random.uniform(low=-1.0, high=1.0)
    b = np.random.uniform(low=-1.0, high=1.0)

    y_predict = x1 * w1 + x2 * w2 + b
    
    error = np.abs(y_predict - y).mean()
    
    if error < best_error:
        best_error = error
        best_epoch = epoch
        best_w1 = w1
        best_w2 = w2
        best_b = b

        print("{0:4} w1 = {1:.5f}, w2 = {2:.5f}, b = {3:.5f}, error = {4:.5f}".format(epoch, w1, w2, b, error))

print("----" * 15)
print("{0:4} w1 = {1:.5f}, w2 = {2:.5f}, b = {3:.5f}, error = {4:.5f}".format(best_epoch, best_w1, best_w2, best_b, best_error))

   0 w1 = 0.24811, w2 = -0.35289, b = -0.57835, error = 1.10892
   1 w1 = -0.86683, w2 = -0.37232, b = 0.01836, error = 1.08027
   3 w1 = 0.96589, w2 = -0.70855, b = 0.18263, error = 0.34246
   5 w1 = -0.33856, w2 = 0.01300, b = 0.83380, error = 0.23888
  20 w1 = 0.84390, w2 = 0.71370, b = -0.37859, error = 0.15919
  26 w1 = 0.74251, w2 = 0.44318, b = -0.22392, error = 0.14365
  44 w1 = 0.38717, w2 = 0.20687, b = 0.30972, error = 0.12332
 139 w1 = 0.55367, w2 = 0.31878, b = 0.03360, error = 0.07334
 393 w1 = 0.35406, w2 = 0.47005, b = 0.07242, error = 0.01834
------------------------------------------------------------
 393 w1 = 0.35406, w2 = 0.47005, b = 0.07242, error = 0.01834


### Case 2 - h-step Search

In [6]:
num_epoch = 10000

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

h = 0.01

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b
    current_error = np.abs(y_predict - y).mean()

    if current_error < 0.005:
        break

    y_predict = x1 * (w1 + h) + x2 * w2 + b
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        w1 = w1 + h
    else:
        y_predict = x1 * (w1 - h) + x2 * w2 + b
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            w1 = w1 - h
            
    y_predict = x1 * w1 + x2 * (w2 + h) + b
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        w2 = w2 + h
    else:
        y_predict = x1 * w1 + x2 * (w2 - h) + b
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            w2 = w2 - h

    y_predict = x1 * w1 + x2 * w2 + (b + h)
    h_plus_error = np.abs(y_predict - y).mean()
    if h_plus_error < current_error:
        b = b + h
    else:
        y_predict = x1 * w1 + x2 * w2 + (b - h)
        h_minus_error = np.abs(y_predict - y).mean()
        if h_minus_error < current_error:
            b = b - h

print("{0} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, current_error))

9999 w1 = 0.29563, w2 = 0.47930 b = 0.11422 error = 0.00575


### Third Idea - Gradient Descent

In [7]:
num_epoch = 100
learning_rate = 1.1

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b

    error = np.abs(y_predict - y).mean()
    if error < 0.005:
        break

    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    b = b - learning_rate * (y_predict - y).mean()

    if epoch % 10 == 0:
        print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))
    
print("----" * 15)
print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))

 0 w1 = -0.02221, w2 = 1.39444 b = 0.68339 error = 1.29798
10 w1 = 0.03895, w2 = 0.72976 b = 0.13437 error = 0.08916
20 w1 = 0.18386, w2 = 0.58134 b = 0.12047 error = 0.03578
30 w1 = 0.24780, w2 = 0.52802 b = 0.11343 error = 0.01506
40 w1 = 0.27592, w2 = 0.50891 b = 0.10822 error = 0.00659
------------------------------------------------------------
44 w1 = 0.28081, w2 = 0.50616 b = 0.10703 error = 0.00479


In [8]:
num_epoch = 100
learning_rate = 1.1

w1 = np.random.uniform(low=-1.0, high=1.0)
w2 = np.random.uniform(low=-1.0, high=1.0)
b = np.random.uniform(low=-1.0, high=1.0)

for epoch in range(num_epoch):
    y_predict = x1 * w1 + x2 * w2 + b

    error = np.abs(y_predict - y).mean()
    if error < 0.005:
        break

    w1 = w1 - learning_rate * ((y_predict - y) * x1).mean()
    w2 = w2 - learning_rate * ((y_predict - y) * x2).mean()
    b = b - learning_rate * (y_predict - y).mean()

    if epoch % 10 == 0:
        print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))

print("----" * 15)
print("{0:2} w1 = {1:.5f}, w2 = {2:.5f} b = {3:.5f} error = {4:.5f}".format(epoch, w1, w2, b, error))

 0 w1 = 0.50063, w2 = 0.46433 b = 1.20119 error = 1.76257
10 w1 = 0.16942, w2 = 0.30835 b = 0.28277 error = 0.06494
20 w1 = 0.21699, w2 = 0.40647 b = 0.19127 error = 0.03228
30 w1 = 0.25138, w2 = 0.45446 b = 0.14865 error = 0.01703
40 w1 = 0.27224, w2 = 0.47735 b = 0.12609 error = 0.00910
------------------------------------------------------------
50 w1 = 0.28347, w2 = 0.48771 b = 0.11493 error = 0.00489
