In [1]:
# For tips on running notebooks in Google Colab, see
# https://docs.pytorch.org/tutorials/beginner/colab
%matplotlib inline

PyTorch: optim
==============

A third order polynomial, trained to predict $y=\sin(x)$ from $-\pi$ to
$\pi$ by minimizing squared Euclidean distance.

This implementation uses the nn package from PyTorch to build the
network.

Rather than manually updating the weights of the model as we have been
doing, we use the optim package to define an Optimizer that will update
the weights for us. The optim package defines many optimization
algorithms that are commonly used for deep learning, including
SGD+momentum, RMSProp, Adam, etc.


In [2]:
import torch
import math


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Prepare the input tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction='sum')

# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use RMSprop; the optim package contains many other
# optimization algorithms. The first argument to the RMSprop constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(xx)

    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable
    # weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward()
    # is called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()


linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 44053.05859375
199 23541.119140625
299 11738.2041015625
399 5148.92333984375
499 2081.39453125
599 1086.286865234375
699 874.7413330078125
799 772.8781127929688
899 655.7171020507812
999 530.6312255859375
1099 411.5893859863281
1199 306.6992492675781
1299 218.80564880371094
1399 148.18389892578125
1499 94.09329986572266
1599 55.30048751831055
1699 30.11427879333496
1799 16.290241241455078
1899 10.492270469665527
1999 9.022298812866211
Result: y = 6.376117411832638e-09 + 0.8442161679267883 x + -1.1338995520304707e-08 x^2 + -0.09116604179143906 x^3


In [3]:
import torch
import math


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

xx = x.unsqueeze(-1)

# Use the nn package to define our model and loss function.
model = torch.nn.Sequential(
    torch.nn.Linear(1, 4),
    torch.nn.ReLU(),
    torch.nn.Linear(4, 1),
    torch.nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction='sum')

# Use the optim package to define an Optimizer that will update the weights of
# the model for us. Here we will use RMSprop; the optim package contains many other
# optimization algorithms. The first argument to the RMSprop constructor tells the
# optimizer which Tensors it should update.
learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model(xx)

    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable
    # weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward()
    # is called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()


99 378.9967346191406
199 236.29583740234375
299 220.4607696533203
399 204.55612182617188
499 180.48464965820312
599 147.78109741210938
699 110.41084289550781
799 75.55534362792969
899 48.785335540771484
999 31.466646194458008
1099 22.016956329345703
1199 17.04204750061035
1299 13.597269058227539
1399 10.42985725402832
1499 7.673007488250732
1599 5.823636531829834
1699 4.908355712890625
1799 4.549322128295898
1899 4.42875862121582
1999 4.394473075866699
