SOURCE: http://seba1511.net/tutorials/beginner/pytorch_with_examples.html#annotations:E9HdvPynEemYwidYvwe30g

Up to this point we have updated the weights of our models by manually 
mutating the .data member for Variables holding learnable parameters. 
This is not a huge burden for simple optimization algorithms like stochastic 
gradient descent, but in practice we often train neural networks using more 
sophisticated optimizers like AdaGrad, RMSProp, Adam, etc.

In [15]:
import random 
import torch
from torch.autograd import Variable 

In [3]:
# N = batch size
# D_int = input dimension
# H = hidden dimension
# D_out = output dimension
N, D_in, H, D_out = 64, 1000, 100, 10

In [4]:
# Create placeholders for the input and target data; these will be filled
# with real data when we execute the graph.
X = Variable(torch.randn(N, D_in))
Y = Variable(torch.randn(N, D_out), requires_grad=False)
print(X)

tensor([[ 0.4982, -0.0304, -1.0053,  ...,  0.5441,  0.1433,  0.6231],
        [-0.9197,  1.0919, -0.0601,  ..., -0.0545, -1.3559,  1.0987],
        [ 0.9304, -2.2657, -0.1634,  ...,  1.8251, -0.8529, -1.2458],
        ...,
        [-0.0934,  1.6510, -0.9247,  ...,  1.3982,  1.1994, -0.7206],
        [ 0.2294, -1.0295,  1.2909,  ..., -0.5983,  1.1539, -0.9727],
        [ 1.8106, -0.0495,  0.2552,  ...,  1.0680, -0.5148, -0.0756]])


In [16]:
class DynamicNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we construct three nn.Linear instances to use
        in the forward pass. 
        :param D_in: 
        :param H: 
        :param D_out: 
        """
        super(DynamicNet, self).__init__()
        self.inputLinear = torch.nn.Linear(D_in, H)
        self.middleLinear = torch.nn.Linear(H, H)
        self.outputLinear = torch.nn.Linear(H, D_out)
        
    def forward(self, X):
        """
        For the forward pass of the model, we randomly choose either 0,1,2, or 3
        and reuse the middle_linear Module that many times to compute the 
        hidden layer representations. 
        
        Since each forward pass builds a dynamic computation graph, 
        we can use normal python control-flow operators (loops etc) when
        defining the forward pass of the model. 
        
        Safe to reuse the same Model many times when defining a computational
        graph (improvement over Lua Torch)
        
        :param self: 
        :param X: N x I matrix of input data: has N of the I-dimensional 
        input vectors on the rows
        :return: 
        """
        hiddenRELU = self.inputLinear(X).clamp(min = 0)
        
        for _ in range(random.randint(0, 3)):
            hiddenRELU = self.middleLinear(hiddenRELU).clamp(min = 0)
            
        yPred = self.outputLinear(hiddenRELU)
        
        return yPred 

In [17]:
# Construct our model by instantiating the class defined above
model = DynamicNet(D_in, H, D_out)

In [18]:
print(model)

DynamicNet(
  (inputLinear): Linear(in_features=1000, out_features=100, bias=True)
  (middleLinear): Linear(in_features=100, out_features=100, bias=True)
  (outputLinear): Linear(in_features=100, out_features=10, bias=True)
)


In [14]:


learningRate = 1e-4
NUM_ITER = 500

# The nn package contains definitions of commonly used loss functions
# In this case we use Mean Squared Error (MSE)
lossFunction = torch.nn.MSELoss(size_average=False)

# Using the Adam optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = learningRate, 
                            momentum=0.9)
# note: using momentum since training this strange model with SGD is hard

for t in range(NUM_ITER):
    # Forward pass: compute predicted y by passing x to the model. 
    yPred = model(X) # Variable type of output data
    
    # Compute and print loss. 
    loss = lossFunction(yPred, Y)

    if t % 50 == 0:
        print("iter = ", t, "; iter = ", loss.data[0])
    
    # Zero gradients, do backward pass,a nd update the weights
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


NameError: name 'random' is not defined