In [1]:
import torch 
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        #1 input image channel, 6 output channels, 5x5 square convolutional kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)),2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
print(net)

Net (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)


In [2]:
params = list(net.parameters())

In [4]:
print(len(params))

10


In [6]:
print(params[0].size())

torch.Size([6, 1, 5, 5])


In [8]:
input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(out)

Variable containing:
 0.1129  0.0227  0.0112  0.0528  0.0080 -0.0374 -0.0213  0.0778  0.0475 -0.1033
[torch.FloatTensor of size 1x10]



In [9]:
net.zero_grad()

In [11]:
print(net)

Net (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)


In [12]:
out.backward(torch.randn(1,10))

In [13]:
print(out)

Variable containing:
 0.1129  0.0227  0.0112  0.0528  0.0080 -0.0374 -0.0213  0.0778  0.0475 -0.1033
[torch.FloatTensor of size 1x10]



In [14]:
output = net(input)
target = Variable(torch.range(1, 10))
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

Variable containing:
 38.4863
[torch.FloatTensor of size 1]



In [15]:
print(loss.creator)

<torch.nn._functions.thnn.auto.MSELoss object at 0x7fdda0015668>


In [16]:
print(loss.creator.previous_functions[0][1])

0


In [17]:
print(loss.creator.previous_functions[0][0])

<torch.nn._functions.linear.Linear object at 0x7fdda00154a8>


In [18]:
print(loss.creator.previous_functions[0][0])

<torch.nn._functions.linear.Linear object at 0x7fdda00154a8>


In [19]:
print(loss.creator.previous_functions[0][0].previous_functions[0][0])

<torch.nn._functions.thnn.auto.Threshold object at 0x7fdda00153c8>


In [20]:
net.zero_grad()

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
Variable containing:
 0
 0
 0
 0
 0
 0
[torch.FloatTensor of size 6]

conv1.bias.grad after backward
Variable containing:
 0.0265
-0.0734
-0.0454
-0.0740
 0.1944
 0.1470
[torch.FloatTensor of size 6]



In [21]:
print(net.conv2.bias.grad)

Variable containing:
-0.0192
-0.0429
 0.0026
-0.0026
 0.2553
 0.0195
 0.0324
 0.0839
-0.0230
-0.0344
 0.0253
 0.0309
-0.0457
 0.0954
 0.0755
 0.0278
[torch.FloatTensor of size 16]



In [22]:
print(net.fc3.bias.grad)

Variable containing:
-0.1774
-0.3955
-0.5978
-0.7894
-0.9984
-1.2075
-1.4043
-1.5844
-1.7905
-2.0207
[torch.FloatTensor of size 10]



In [23]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [24]:
print(f)

Parameter containing:
 0.0321
 0.0899
-0.0477
 0.1054
 0.1120
 0.0598
-0.0388
 0.0847
 0.1203
-0.0341
[torch.FloatTensor of size 10]



In [25]:
import torch.optim as optim

In [26]:
optimizer = optim.SGD(net.parameters(), lr=0.01)

In [27]:
#in training loop:

In [28]:
optimizer.zero_grad()     #zero the gradient buffers
output = net(input)

In [32]:
loss = criterion(output, target)
loss.backward()
optimizer.step() #does the update

RuntimeError: Trying to backward through the graph second time, but the buffers have already been freed. Please specify retain_variables=True when calling backward for the first time.

In [33]:
import torch
import torchvision
import torchvision.transforms as transforms