# Gradient Descent

In [29]:
import numpy as np

### Load Dataset

y(label) = 0.3(weight) * x(feature)

In [30]:
x1 = np.random.uniform(low = -1.0, high = 1.0, size = 100)

print(x1.shape)
x1[:10]

(100,)


array([ 0.40813975,  0.30417597,  0.51924879, -0.51279441, -0.85439036,
       -0.58365537,  0.92831435,  0.06351102, -0.65990622, -0.77224845])

In [31]:
x2 = np.random.uniform(low = -1.0, high = 1.0, size = 100)

print(x1.shape)
x1[:10]

(100,)


array([ 0.40813975,  0.30417597,  0.51924879, -0.51279441, -0.85439036,
       -0.58365537,  0.92831435,  0.06351102, -0.65990622, -0.77224845])

In [40]:
# y == label, 0.3, 0.5 == weight, x1 == feature, 0.1 == bias
y = 0.3 * x1 + 0.5 * x2 + 0.1

print(y.shape)
y[:10]

(100,)


array([ 0.05730338,  0.65852775,  0.73821945, -0.48326039, -0.29297003,
        0.36186463,  0.02824399,  0.39147665, -0.51708804,  0.16775779])

### Random Search

In [44]:
num_epoch = 1000000

best_error = 9999
best_w1 = None
best_w2 = None
best_epoch = None

for epoch in range(num_epoch):
    w1 = np.random.uniform(low = -1.0, high = 1.0)
    w2 = np.random.uniform(low = -1.0, high = 1.0)
    b = np.random.uniform(low = -1.0, high = 1.0)
    
    y_predict = w1 * x1 + w2 * x2 + b
    
    error = np.abs(y_predict - y).mean()
    
    if error < best_error:
        best_error = error
        best_w1 = w1
        best_w2 = w2
        best_b = b
        best_epoch = epoch
        
        print(f"{best_epoch}, w1 = {best_w1:.6f}, w2 = {best_w2:.6f}, b = {best_b:.6f}, error = {best_error:.6f}")
        
print("____" * 10)
print(f"{best_epoch}, w1 = {best_w1:.6f}, w2 = {best_w2:.6f}, b = {best_b:.6f}, error = {best_error:.6f}")

0, w1 = 0.463623, w2 = 0.207027, b = 0.906408, error = 0.823845
1, w1 = -0.480681, w2 = 0.664055, b = -0.676942, error = 0.764949
2, w1 = 0.985197, w2 = 0.058394, b = -0.620715, error = 0.715907
5, w1 = 0.030290, w2 = 0.274146, b = -0.637660, error = 0.711718
6, w1 = 0.142581, w2 = 0.088174, b = -0.193458, error = 0.302438
18, w1 = 0.097667, w2 = 0.654438, b = 0.179266, error = 0.126915
170, w1 = 0.385855, w2 = 0.435261, b = 0.107406, error = 0.046345
8483, w1 = 0.285073, w2 = 0.548138, b = 0.143664, error = 0.040884
14829, w1 = 0.275467, w2 = 0.432356, b = 0.118732, error = 0.040610
16626, w1 = 0.358551, w2 = 0.458130, b = 0.128159, error = 0.038059
16659, w1 = 0.360052, w2 = 0.512365, b = 0.094084, error = 0.032383
21075, w1 = 0.332206, w2 = 0.521309, b = 0.097148, error = 0.020565
31148, w1 = 0.311279, w2 = 0.496604, b = 0.111656, error = 0.011562
411684, w1 = 0.293698, w2 = 0.489857, b = 0.105367, error = 0.008008
528950, w1 = 0.293332, w2 = 0.506252, b = 0.099829, error = 0.003917

In [6]:
y_example = [-1, 0, +1]
y_predict_example = [+1, -1, 0]

y_example = np.array(y_example)
y_predict_example = np.array(y_predict_example)

np.abs(y_predict_example - y_example).mean()

1.3333333333333333

### h-step search

In [7]:
# num_epoch = 100
# h = 0.1

# w1 = np.random.uniform(low = -1.0, high = 1.0)

# for epoch in range(num_epoch):
#     y_predict = w1 * x1
    
#     current_error = np.abs(y_predict - y).mean()
    
#     y_predict = (w1 + h) * x1
    
#     h_plus_error = np.abs(y_predict - y).mean()
    
#     if h_plus_error < current_error:
#         w1 = w1 + h
#         print(f"{epoch}, w1 = {w1:.6f}, error = {h_plus_error:.6f}")    
#         continue
    
#     y_predict = (w1 - h) * x1
    
#     h_minus_error = np.abs(y_predict - y).mean()
    
#     if h_plus_error < current_error:
#         w1 = w1 - h
#         print(f"{epoch}, w1 = {w1:.6f}, error = {h_minus_error:.6f}")    
#         continue    
    
#     break
    
# print("____" * 10)        
# print(f"{epoch}, w1 = {w1:.6f}, error = {current_error:.6f}")

________________________________________
0, w1 = 0.392849, error = 0.042388


### Gradient Descent

In [50]:
num_epoch = 100
learing_late = 1.0

w1 = np.random.uniform(low = -1.0, high = 1.0)
w2 = np.random.uniform(low = -1.0, high = 1.0)
b = np.random.uniform(low = -1.0, high = 1.0)

for epoch in range(num_epoch):
    y_predict = w1 * x1 + w2 * x2 + b
    
    error = np.abs(y_predict - y).mean()
    
    if error < 0.0005:
        break
        
    print(f"{epoch}, w1 = {w1:.6f}, w2 = {w2:.6f}, b = {best_b:.6f}, error = {error:.6f}")
    
    w1 = w1 - learing_late * ((y_predict - y) * x1).mean()
    w2 = w2 - learing_late * ((y_predict - y) * x2).mean()
    b = b - learing_late * (y_predict - y).mean()
    
print("____" * 10)
print(f"{epoch}, w1 = {w1:.6f}, w2 = {w2:.6f}, b = {best_b:.6f}, error = {error:.6f}")

0, w1 = 0.991098, w2 = 0.614512, b = 0.099829, error = 0.359791
1, w1 = 0.758947, w2 = 0.536334, b = 0.099829, error = 0.231654
2, w1 = 0.607287, w2 = 0.497597, b = 0.099829, error = 0.151034
3, w1 = 0.507333, w2 = 0.480289, b = 0.099829, error = 0.099874
4, w1 = 0.440968, w2 = 0.474453, b = 0.099829, error = 0.067051
5, w1 = 0.396569, w2 = 0.474408, b = 0.099829, error = 0.045736
6, w1 = 0.366635, w2 = 0.476990, b = 0.099829, error = 0.031705
7, w1 = 0.346298, w2 = 0.480499, b = 0.099829, error = 0.022331
8, w1 = 0.332377, w2 = 0.484072, b = 0.099829, error = 0.015900
9, w1 = 0.322778, w2 = 0.487311, b = 0.099829, error = 0.011383
10, w1 = 0.316114, w2 = 0.490068, b = 0.099829, error = 0.008201
11, w1 = 0.311456, w2 = 0.492327, b = 0.099829, error = 0.005945
12, w1 = 0.308181, w2 = 0.494131, b = 0.099829, error = 0.004321
13, w1 = 0.305865, w2 = 0.495545, b = 0.099829, error = 0.003149
14, w1 = 0.304220, w2 = 0.496639, b = 0.099829, error = 0.002300
15, w1 = 0.303045, w2 = 0.497476, b