## Gradient Descent

In [None]:
import numpy as np
import matplotlib.pyplot as plt


x_data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
y_data = [2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0]

w = 3.0  # a random guess: random value, 1.0

# our model for the forward pass
def forward(x):
    return x * w

# Loss function
def loss(x, y):
    y_pred = forward(x)
    return (y_pred - y) * (y_pred - y)

# compute gradient
def gradient(x, y):  # d_loss/d_w
    return 2 * x * (x * w - y)

# Before training
print("predict (before training)",  4, forward(4))

# Training loop
for epoch in range(100):
    grad = 0
    for x_val, y_val in zip(x_data, y_data):
        gradi = gradient(x_val, y_val)
        grad += gradi #把所有樣本的梯度相加起來
        print("\tgrad: ", x_val, y_val, grad)
        l = loss(x_val, y_val)
    w = w - 0.01 * grad/len(x_data) # sgd是把每一批的梯度相加除以總數，這邊用整體的梯度總和
    print("progress:", epoch, "w=", w, "loss=", l)

# After training
print("predict (after training)",  "4 hours", forward(4))


## Multi-variable gradient descent

In [None]:
import numpy as np
import matplotlib.pyplot as plt
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]


w1 = 1.0  # a random guess: random value, 1.0
w2 = 1.0
b = 1.0

# our model for the forward pass
def forward(x):
    return x ** 2 * w2 + x * w1 +b

# Loss function
def loss(x, y):
    y_pred = forward(x)
    return (y_pred - y) * (y_pred - y)

# compute gradient
def gradientb(x, y):  # d_loss/d_b
    return 2 * (b - y + w2 * x **2 + w1 * x)

# compute gradient
def gradientw1(x, y):  # d_loss/d_w1
    return 2 * x * (x * w1 - y + w2 * x ** 2 + b)

# compute gradient
def gradientw2(x, y):  # d_loss/d_w2
    return 2 * x ** 2 * (x ** 2 * w2 - y + w1 * x + b)


# Before training
print("predict (before training)",  4, forward(4))

# Training loop
for epoch in range(100):
    gradbsum = 0
    gradw1sum = 0
    gradw2sum = 0
    for x_val, y_val in zip(x_data, y_data):
        gradb = gradientb(x_val, y_val)
        gradw1 = gradientw1(x_val, y_val)
        gradw2 = gradientw2(x_val, y_val)
        gradbsum += gradb
        gradw1sum += gradw1
        gradw2sum += gradw2
    b = b - 0.01 * gradbsum/len(x_data)
    w1 = w1 - 0.01 * gradw1sum/len(x_data)
    w2 = w2 - 0.01 * gradw2sum/len(x_data)
    print("\tw,y: ", x_val, y_val)
    print("\tgradw1: ",gradw1)
    print("\tgradw2: ",gradw2)
    print("\tgradb: ", gradb)
    l = loss(x_val, y_val)

    print("progress:", epoch, "w1,w2=", w1,w2, "loss=", l)

# After training
print("predict (after training)",  "Some epochs", forward(4))
