In [1]:
#https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
            
        return num_features

In [4]:
net = Net()

In [5]:
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [6]:
params = list(net.parameters())

In [7]:
print(len(params))

10


In [8]:
print(params[0].size())

torch.Size([6, 1, 5, 5])


In [9]:
input = torch.randn(1, 1, 32, 32)

In [10]:
input

tensor([[[[-3.6777e-02, -1.5759e+00, -8.3049e-01,  ...,  9.7108e-01,
           -1.0438e-02, -3.1723e-01],
          [ 1.1913e+00,  4.5891e-02, -1.9957e-01,  ...,  1.3491e+00,
            2.7154e-01, -9.1257e-02],
          [ 1.2028e+00,  2.0688e+00,  3.5501e-01,  ..., -6.6973e-01,
           -5.8035e-01,  1.9300e-01],
          ...,
          [-1.0324e+00, -7.2281e-01,  3.8218e-01,  ...,  1.8009e-01,
           -1.1884e-01,  5.3104e-01],
          [ 1.7706e+00,  1.0478e+00,  5.6744e-02,  ..., -1.2359e-01,
            4.3303e-01, -8.9871e-01],
          [ 1.2281e+00, -8.7393e-01,  2.2251e-01,  ...,  4.3049e-01,
            8.4921e-01, -2.2699e-01]]]])

In [11]:
out = net(input)

In [12]:
out

tensor([[-0.1511,  0.0547,  0.0148,  0.0079,  0.0648, -0.1065,  0.0079,  0.0408,
          0.0398, -0.0847]], grad_fn=<ThAddmmBackward>)

In [13]:
net.zero_grad()

In [14]:
out.backward(torch.randn(1, 10))

In [15]:
output = net(input)

In [22]:
target = torch.arange(1, 11, dtype=torch.float)

In [23]:
target

tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.])

In [24]:
target = target.view(1, -1)

In [25]:
target

tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]])

In [26]:
criterion = nn.MSELoss()

In [27]:
loss = criterion(output, target)

In [28]:
loss

tensor(38.5828, grad_fn=<MseLossBackward>)

In [29]:
loss.grad_fn

<MseLossBackward at 0x10c6ed160>

In [30]:
loss.grad_fn.next_functions[0][0]

<ThAddmmBackward at 0x10c6ed400>

In [31]:
loss.grad_fn.next_functions[0][0].next_functions[0][0]

<ExpandBackward at 0x10c6ed630>

In [32]:
net.zero_grad()

In [33]:
print('conv1 bias grad before backward')
print(net.conv1.bias.grad)

conv1 bias grad before backward
tensor([0., 0., 0., 0., 0., 0.])


In [34]:
loss.backward()

In [35]:
print('conv1 bias grad after backward')
print(net.conv1.bias.grad)

conv1 bias grad after backward
tensor([ 0.0230, -0.0347,  0.0514,  0.0391, -0.0974,  0.0789])


In [36]:
learning_rate = 0.01

In [37]:
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [38]:
import torch.optim as optim

In [39]:
optimizer = optim.SGD(net.parameters(), lr=0.01)

In [40]:
optimizer.zero_grad()

In [41]:
output = net(input)

In [42]:
loss = criterion(output, target)

In [43]:
loss.backward()

In [44]:
optimizer.step()