In [263]:
import numpy as np

Define variables and functions

In [264]:
x_1 = 2
x_2 = -1
y = 1

In [265]:
w = np.asanyarray([-0.5, 0.5, 1.5])

In [266]:
alpha = 0.1

Forward pass: Calculate the predicted output $\hat{y}$ and the loss $L$ using the formula from the lecture.

In [267]:
def y_hat(x_1, x_2, w):
    return x_1 * w[0] + x_2 * w[1] + w[2]

Backpropagation: Calculate the gradients for $w_1$, $w_2$ and $b$ with respect to the loss $L$ using the chain rule

In [268]:
def gradient_w1(y_hat, y, x_1):
    return (y_hat - y) * x_1

In [269]:
def gradient_w2(y_hat, y, x_2):
    return (y_hat - y) * x_2

In [270]:
def gradient_b(y_hat, y):
    return (y_hat - y)

Update: As we know from the lecture, the gradient is the direction of steepest ascent. To get to a minimum, we want to make a step into the opposite direction using the learning rate $\alpha$. Update the weights and biases $w_1$, $w_2$ and $b$ using the gradient and the learning rate

In [271]:
y_hat_val = y_hat(x_1, x_2, w)
y_hat_val

0.0

In [272]:
w1_gradient = gradient_w1(
    y_hat=y_hat_val,
    y=y,
    x_1=x_1
)
w1_gradient

-2.0

In [273]:
w2_gradient = gradient_w2(
    y_hat=y_hat_val,
    y=y,
    x_2=x_2
)
w2_gradient

1.0

In [274]:
b_gradient = gradient_b(
    y_hat=y_hat_val,
    y=y
)
b_gradient

-1.0

In [275]:
w[0] = w[0] - (alpha * w1_gradient)
w[1] = w[1] - (alpha * w2_gradient)
w[2] = w[2] - (alpha * b_gradient)

In [276]:
w

array([-0.3,  0.4,  1.6])

In [277]:
new_y = y_hat(x_1=x_1, x_2=x_2, w=w)
new_y

0.6000000000000001

Try to adapt weights in loop

In [285]:
def adapt_weights(x_1, x_2, y, inital_weights, epochs):

    x_1 = x_1
    x_2 = x_2
    y = y
    w = inital_weights

    for epoch in range(epochs):
        y_hat_val = y_hat(x_1, x_2, w)

        w1_gradient = gradient_w1(
            y_hat=y_hat_val,
            y=y,
            x_1=x_1
        )

        w2_gradient = gradient_w2(
            y_hat=y_hat_val,
            y=y,
            x_2=x_2
        )

        b_gradient = gradient_b(
            y_hat=y_hat_val,
            y=y
        )


        w[0] = w[0] - (alpha * w1_gradient)
        w[1] = w[1] - (alpha * w2_gradient)
        w[2] = w[2] - (alpha * b_gradient)

        print(f'Epoch: {epoch} - Prediction: {y_hat(x_1, x_2, w)} - Weights: {w}')

In [292]:
adapt_weights(
    x_1=2,
    x_2=-1,
    y=1,
    inital_weights=np.asanyarray([-0.5, 0.5, 1.5]),
    epochs=1000
)

Epoch: 0 - Prediction: 0.6000000000000001 - Weights: [-0.3  0.4  1.6]
Epoch: 1 - Prediction: 0.8400000000000001 - Weights: [-0.22  0.36  1.64]
Epoch: 2 - Prediction: 0.9360000000000002 - Weights: [-0.188  0.344  1.656]
Epoch: 3 - Prediction: 0.9743999999999999 - Weights: [-0.1752  0.3376  1.6624]
Epoch: 4 - Prediction: 0.9897600000000002 - Weights: [-0.17008  0.33504  1.66496]
Epoch: 5 - Prediction: 0.9959039999999999 - Weights: [-0.168032  0.334016  1.665984]
Epoch: 6 - Prediction: 0.9983616 - Weights: [-0.1672128  0.3336064  1.6663936]
Epoch: 7 - Prediction: 0.9993446399999999 - Weights: [-0.16688512  0.33344256  1.66655744]
Epoch: 8 - Prediction: 0.9997378559999999 - Weights: [-0.16675405  0.33337702  1.66662298]
Epoch: 9 - Prediction: 0.9998951424 - Weights: [-0.16670162  0.33335081  1.66664919]
Epoch: 10 - Prediction: 0.9999580569600002 - Weights: [-0.16668065  0.33334032  1.66665968]
Epoch: 11 - Prediction: 0.999983222784 - Weights: [-0.16667226  0.33333613  1.66666387]
Epoch: 12