In [1]:
%matplotlib inline


PyTorch: Custom nn Modules
--------------------------

A fully-connected ReLU network with one hidden layer, trained to predict y from x
by minimizing squared Euclidean distance.

This implementation defines the model as a custom Module subclass. Whenever you
want a model more complex than a simple sequence of existing Modules you will
need to define your model this way.



In [2]:
import torch

In [3]:
class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)
        
    def forward(self, x):
        h_relu = self.linear1(x).clamp(min =0)
        y_pred = self.linear2(h_relu)
        return y_pred       

In [5]:
N, D_in, H, D_out = 64, 1000, 100, 10 
learning_rate = 1e-4

In [6]:
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

In [7]:
model = TwoLayerNet(D_in, H, D_out)

In [9]:
criterion = torch.nn.MSELoss(size_average = True)
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [11]:
for t in range(500):
    y_pred = model(x)
    loss = criterion(y_pred,y)
    print(t, loss.item())
    model.zero_grad()
    loss.backward()
    optimizer.step()

0 0.9655946493148804
1 0.965490996837616
2 0.9653887748718262
3 0.9652856588363647
4 0.9651826024055481
5 0.9650802612304688
6 0.9649775624275208
7 0.9648748636245728
8 0.9647719264030457
9 0.9646687507629395
10 0.9645661115646362
11 0.964464545249939
12 0.9643615484237671
13 0.9642587900161743
14 0.9641561508178711
15 0.9640539288520813
16 0.9639504551887512
17 0.9638481140136719
18 0.9637454748153687
19 0.9636428952217102
20 0.963540256023407
21 0.9634381532669067
22 0.9633353352546692
23 0.9632333517074585
24 0.9631308317184448
25 0.9630283117294312
26 0.9629265666007996
27 0.9628232717514038
28 0.9627203941345215
29 0.9626185297966003
30 0.9625163078308105
31 0.9624133110046387
32 0.9623115658760071
33 0.9622087478637695
34 0.9621056318283081
35 0.9620047807693481
36 0.9619023203849792
37 0.961800754070282
38 0.9616984128952026
39 0.961596667766571
40 0.9614948034286499
41 0.9613922834396362
42 0.9612910151481628
43 0.9611890912055969
44 0.9610875844955444
45 0.960985541343689
46 0

393 0.9266740083694458
394 0.9265782237052917
395 0.9264830350875854
396 0.9263880848884583
397 0.9262923002243042
398 0.9261962175369263
399 0.926101803779602
400 0.9260061979293823
401 0.9259108304977417
402 0.9258154630661011
403 0.9257208704948425
404 0.9256254434585571
405 0.9255297780036926
406 0.9254347085952759
407 0.9253396987915039
408 0.9252440333366394
409 0.9251489639282227
410 0.9250540733337402
411 0.9249591827392578
412 0.9248638153076172
413 0.9247690439224243
414 0.9246742129325867
415 0.9245792627334595
416 0.9244837760925293
417 0.9243882298469543
418 0.9242933988571167
419 0.9241987466812134
420 0.9241039156913757
421 0.9240094423294067
422 0.9239141345024109
423 0.9238198399543762
424 0.9237247705459595
425 0.9236294031143188
426 0.9235346913337708
427 0.9234403371810913
428 0.9233449101448059
429 0.9232503771781921
430 0.9231551885604858
431 0.9230605959892273
432 0.9229663014411926
433 0.9228718876838684
434 0.9227768778800964
435 0.9226819276809692
436 0.922587

In [None]:
import torch


class TwoLayerNet(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(TwoLayerNet, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, D_out)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        h_relu = self.linear1(x).clamp(min=0)
        y_pred = self.linear2(h_relu)
        return y_pred


# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.MSELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
for t in range(500):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()