In [8]:
# From the tutorial
# https://pytorch.org/tutorials/beginner/pytorch_with_examples.html
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 30 == 0:
        print(f"Epoch {t:3d} loss {loss:12.2f}")

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

Epoch   0 loss  34355354.64
Epoch  30 loss     77138.53
Epoch  60 loss      9466.16
Epoch  90 loss      2097.32
Epoch 120 loss       579.41
Epoch 150 loss       176.98
Epoch 180 loss        57.05
Epoch 210 loss        18.95
Epoch 240 loss         6.42
Epoch 270 loss         2.21
Epoch 300 loss         0.77
Epoch 330 loss         0.27
Epoch 360 loss         0.10
Epoch 390 loss         0.03
Epoch 420 loss         0.01
Epoch 450 loss         0.00
Epoch 480 loss         0.00
