## Network

In [68]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        
        # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,10)
        
    
    def forward(self, x):
        # Max pooling over a (2,2) winwdow
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        
        # if the size is a square you ca only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        print(f"x {x.size()}")
        print(f"size: {size}")
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [87]:
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's weight

10
torch.Size([6, 1, 5, 5])


In [70]:
input = torch.randn(1,1,32,32)
print(input.size())
print(input)

torch.Size([1, 1, 32, 32])
tensor([[[[-0.1549,  0.2317,  0.7331,  ...,  0.3165,  2.9773,  0.6735],
          [ 1.9057, -1.1844,  1.8970,  ..., -0.9171, -0.6107, -0.7593],
          [ 0.1875, -0.1552,  0.8687,  ..., -0.0136,  1.1841,  0.9335],
          ...,
          [-1.8315,  0.9363,  0.7049,  ...,  1.3065, -0.2141, -0.3227],
          [-0.4135,  0.9510, -0.6166,  ..., -1.1781, -0.3725,  0.3292],
          [-0.1789, -0.7750, -0.3001,  ..., -0.9187, -0.2137, -0.4583]]]])


In [71]:
out = net(input)
print(out)

x torch.Size([1, 16, 5, 5])
size: torch.Size([16, 5, 5])
tensor([[ 1.3309e-01, -9.1314e-05, -1.3529e-01, -9.4567e-02,  3.2741e-02,
          1.4250e-02, -1.0340e-01, -2.3946e-01, -1.8203e-02, -1.1430e-01]],
       grad_fn=<AddmmBackward>)


In [72]:
net.zero_grad()
out.backward(torch.randn(1,10))

## Loss Function

In [77]:
output = net(input)
target = torch.randn(10) # a dummy target, e.g.
print(f"output: {output}")
print(f"target: {target}")

x torch.Size([1, 16, 5, 5])
size: torch.Size([16, 5, 5])
output: tensor([[ 1.3309e-01, -9.1314e-05, -1.3529e-01, -9.4567e-02,  3.2741e-02,
          1.4250e-02, -1.0340e-01, -2.3946e-01, -1.8203e-02, -1.1430e-01]],
       grad_fn=<AddmmBackward>)
target: tensor([-0.6125,  1.8788, -0.1974,  1.1310,  0.1582, -0.0340,  0.0494, -0.0104,
         1.0798,  1.7806])


In [89]:
target = target.view(1, -1)
print(target)

tensor([[-0.6125,  1.8788, -0.1974,  1.1310,  0.1582, -0.0340,  0.0494, -0.0104,
          1.0798,  1.7806]])


In [80]:
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(1.0482, grad_fn=<MseLossBackward>)


In [81]:
print(loss.grad_fn) # MSE loss

<MseLossBackward object at 0x11ce97860>


In [82]:
print(loss.grad_fn.next_functions[0][0]) # Linear

<AddmmBackward object at 0x11cbb56d8>


In [83]:
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<AccumulateGrad object at 0x11cbba518>


## Backprop

In [84]:
net.zero_grad()  # zeroes the gradient buffers of all parameters

print('conv2.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad aferter backward')
print(net.conv1.bias.grad)


conv2.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad aferter backward
tensor([ 0.0175,  0.0441,  0.0084, -0.0087,  0.0126, -0.0050])


## Update the weights

In [85]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [92]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad() # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # Does the update


x torch.Size([1, 16, 5, 5])
size: torch.Size([16, 5, 5])
