In [None]:
"""
https://pytorch.org/tutorials/beginner/pytorch_with_examples.html#pytorch-custom-nn-modules
"""

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
class TwoLayerNet(nn.Module):

    def __init__(self, D_in, H, D_out):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables
        :param D_in: input dimension
        :param H: hidden dimension
        :param D_out: output dimension
        """
        super(TwoLayerNet, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as well
        as arbitrary operators on Tensors.
        :param x: input data
        :return: output data
        """
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred

In [15]:
# N is the batch size, D_in is the input dimension
# H is the hidden dimension, D_out is the output dimension
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = nn.MSELoss(reduction='sum')
optimizer = optim.SGD(model.parameters(), lr=2e-4)

### Training

In [16]:
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model
    y_pred = model(x)

    # compute and print loss
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print("{}, {:.15f}".format(t, loss.item()))

    # zero gradient, perform a backward pass, and update weights
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

99, 0.030925258994102
199, 0.000018127344447
299, 0.000000022634291
399, 0.000000000390407
499, 0.000000000110511
