## Gradient Descent

In [1]:
import numpy as np
import matplotlib.pyplot as plt


x_data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
y_data = [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0]

w = 3.0  # a random guess: random value, 1.0

# our model for the forward pass
def forward(x):
    return x * w

# Loss function
def loss(x, y):
    y_pred = forward(x)
    return (y_pred - y) * (y_pred - y)

# compute gradient
def gradient(x, y):  # d_loss/d_w
    return 2 * x * (x * w - y)

# Before training
print("predict (before training)",  4, forward(4))

# Training loop
for epoch in range(100):
    grad = 0
    for x_val, y_val in zip(x_data, y_data):
        gradi = gradient(x_val, y_val)
        grad += gradi #把所有樣本的梯度相加起來
        print("\tgrad: ", x_val, y_val, grad)
        l = loss(x_val, y_val)
    w = w - 0.01 * grad/len(x_data) # sgd是把每一批的梯度相加除以總數，這邊用整體的梯度總和
    print("progress:", epoch, "w=", w, "loss=", l)

# After training
print("predict (after training)",  "4 hours", forward(4))


predict (before training) 4 12.0
	grad:  1.0 2.0 2.0
	grad:  2.0 4.0 10.0
	grad:  3.0 6.0 28.0
	grad:  4.0 8.0 60.0
	grad:  5.0 10.0 110.0
	grad:  6.0 12.0 182.0
	grad:  7.0 14.0 280.0
	grad:  8.0 16.0 408.0
progress: 0 w= 2.49 loss= 64.0
	grad:  1.0 2.0 0.9800000000000004
	grad:  2.0 4.0 4.900000000000002
	grad:  3.0 6.0 13.720000000000006
	grad:  4.0 8.0 29.400000000000013
	grad:  5.0 10.0 53.90000000000002
	grad:  6.0 12.0 89.18000000000004
	grad:  7.0 14.0 137.20000000000005
	grad:  8.0 16.0 199.92000000000007
progress: 1 w= 2.2401 loss= 15.366400000000013
	grad:  1.0 2.0 0.48019999999999996
	grad:  2.0 4.0 2.401
	grad:  3.0 6.0 6.722799999999999
	grad:  4.0 8.0 14.405999999999999
	grad:  5.0 10.0 26.410999999999998
	grad:  6.0 12.0 43.6982
	grad:  7.0 14.0 67.228
	grad:  8.0 16.0 97.96079999999999
progress: 2 w= 2.117649 loss= 3.6894726399999995
	grad:  1.0 2.0 0.23529800000000023
	grad:  2.0 4.0 1.1764900000000011
	grad:  3.0 6.0 3.294172000000003
	grad:  4.0 8.0 7.05894000000000

## Multi-variable gradient descent

In [5]:
import numpy as np
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]


w1 = 1.0  # a random guess: random value, 1.0
w2 = 1.0
b = 1.0

# our model for the forward pass
def forward(x):
    return x ** 2 * w2 + x * w1 +b

# Loss function
def loss(x, y):
    y_pred = forward(x)
    return (y_pred - y) * (y_pred - y)

# compute gradient
def gradientb(x, y):  # d_loss/d_b
    return 2 * (b - y + w2 * x **2 + w1 * x)

# compute gradient
def gradientw1(x, y):  # d_loss/d_w1
    return 2 * x * (x * w1 - y + w2 * x ** 2 + b)

# compute gradient
def gradientw2(x, y):  # d_loss/d_w2
    return 2 * x ** 2 * (x ** 2 * w2 - y + w1 * x + b)


# Before training
print("predict (before training)",  4, forward(4))

# Training loop
for epoch in range(100):
    gradbsum = 0
    gradw1sum = 0
    gradw2sum = 0
    for x_val, y_val in zip(x_data, y_data):
        gradb = gradientb(x_val, y_val)
        gradw1 = gradientw1(x_val, y_val)
        gradw2 = gradientw2(x_val, y_val)
        gradbsum += gradb
        gradw1sum += gradw1
        gradw2sum += gradw2
    b = b - 0.01 * gradbsum/len(x_data)
    w1 = w1 - 0.01 * gradw1sum/len(x_data)
    w2 = w2 - 0.01 * gradw2sum/len(x_data)
    print("\tw,y: ", x_val, y_val)
    print("\tgradw1: ",gradw1)
    print("\tgradw2: ",gradw2)
    print("\tgradb: ", gradb)
    l = loss(x_val, y_val)

    print("progress:", epoch, "w1,w2=", w1,w2, "loss=", l)

# After training
print("predict (after training)",  "Some epochs", forward(4))


predict (before training) 4 21.0
	w,y:  3.0 6.0
	gradw1:  42.0
	gradw2:  126.0
	gradb:  14.0
progress: 0 w1,w2= 0.8133333333333334 0.4933333333333333 loss= 3.264044444444441
	w,y:  3.0 6.0
	gradw1:  10.839999999999996
	gradw2:  32.51999999999999
	gradb:  3.6133333333333324
progress: 1 w1,w2= 0.7686222222222222 0.36933333333333335 loss= 0.29097633382716026
	w,y:  3.0 6.0
	gradw1:  3.2365333333333357
	gradw2:  9.709600000000007
	gradb:  1.078844444444445
progress: 2 w1,w2= 0.7585285925925925 0.33867437037037035 loss= 0.052809674145975406
	w,y:  3.0 6.0
	gradw1:  1.3788213333333326
	gradw2:  4.1364639999999975
	gradb:  0.4596071111111115
progress: 3 w1,w2= 0.7568714706172839 0.33078639802469134 loss= 0.02364420357208466
	w,y:  3.0 6.0
	gradw1:  0.922600308148144
	gradw2:  2.767800924444432
	gradb:  0.3075334360493809
progress: 4 w1,w2= 0.7572650755423868 0.32845649079835393 loss= 0.018146411577649638
	w,y:  3.0 6.0
	gradw1:  0.8082517038617283
	gradw2:  2.424755111585185
	gradb:  0.269417