In [19]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
import torchvision.datasets as dsets
from torch.autograd import Variable
import torch.nn.functional as F


In [2]:
train_dataset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)

In [4]:
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

### class torch.nn.Module:
-  Base class for all neural network
-  Your model should be a subclass of this class

A typical training procedure for a neural network is as follows:

- Define the neural network that has some learnable parameters (or weights)
- Iterate over a dataset of inputs
- Process input through the network
- Compute the loss (how far is the output from being correct)
- Propagate gradients back into the network’s parameters
- Update the weights of the network, typically using a simple update rule:<br> weight = weight - learning_rate * gradient

In [21]:
# Define a model

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1= nn.Conv2d(1,6,5)
        self.conv2= nn.Conv2d(6,16,5)
        # Fully connected layer
        self.fc1= nn.Linear(16*5*5, 120)
        self.fc2= nn.Linear(120, 84)
        self.fc3= nn.Linear(84, 10)
        
    def forward(self, x):
        # Max-pooling over a 2*2 window
        x= F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x= F.max_pool2d(F.relu(self.conv2(x)), (2))
        x= x.view(-1, self.num_flat_features(x))
        x= F.relu(self.fc1(x))
        x= F.relu(self.fc2(x))
        x= self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size= x.size()[1:]
        num_features=1
        for s in size:
            num_features *=s
        return num_features
    
net= Net()
print(net)
        

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


You just have to define forward function and backward function (where gradients are computed) is automatically defined using autograd. You can use any of the tensor operations in the forward function. 

In [16]:
# the learnable parameters of model are returned by net.parameters()

params= list(net.parameters())
print(len(params))
print(type(params))
print(params[0].size())

10
<class 'list'>
torch.Size([6, 1, 5, 5])


In [17]:
print(params)

[Parameter containing:
tensor([[[[ 0.0043,  0.1449, -0.0757,  0.1509, -0.1416],
          [-0.0628, -0.1500, -0.1048,  0.1889,  0.0139],
          [-0.1857,  0.1333, -0.1004, -0.0047,  0.0245],
          [ 0.0720,  0.0767,  0.1357,  0.1698, -0.0366],
          [-0.1198,  0.1219, -0.0371,  0.0031, -0.0397]]],


        [[[-0.0076,  0.1303,  0.1844,  0.1236, -0.0340],
          [ 0.1379, -0.0718,  0.1191, -0.0846, -0.1061],
          [-0.0607,  0.1391, -0.1566,  0.0469,  0.1271],
          [ 0.0205,  0.0887, -0.1513,  0.1967, -0.0682],
          [ 0.1568,  0.1424,  0.0641, -0.1676, -0.0275]]],


        [[[ 0.1285,  0.1013,  0.0299,  0.0101,  0.1316],
          [-0.0561, -0.0435,  0.0160, -0.1659, -0.0466],
          [-0.1446,  0.0914,  0.1892,  0.0248,  0.1240],
          [ 0.1655,  0.0301,  0.1945, -0.0931, -0.0078],
          [ 0.0942, -0.1644, -0.0446, -0.0619,  0.1189]]],


        [[[ 0.0284,  0.0422, -0.1441,  0.0645,  0.0809],
          [ 0.1331, -0.0740, -0.0891, -0.0011,  0.022

         0.0981, -0.0925], requires_grad=True)]


In [22]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.0819,  0.0738, -0.0424,  0.0829,  0.0856,  0.0842,  0.0516, -0.0940,
         -0.0035,  0.1297]], grad_fn=<ThAddmmBackward>)


#### Loss Function:

A loss function takes the (output, target) pair of inputs, and computes a value that estimates how far away from output is the target. <br>
There are several loss functions under nn package. <br>
nn.MSELoss computes the mean squared error between the input and the target. 

In [38]:
output= net(input)
target= torch.randn(10)  # a dummy target for example
target= target.view(1, -1)
print(target.size())
criterion= nn.MSELoss()

loss= criterion(output, target)
print(loss)

torch.Size([1, 10])
tensor(0.5003, grad_fn=<MseLossBackward>)


In [42]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x000002AB7B5A8A20>
<ThAddmmBackward object at 0x000002AB7B5A85F8>
<ExpandBackward object at 0x000002AB7B5A8A20>


### Backprop:
You need to clear the existing gradients though, else gradients will be accumulated to existing gradients.<br>

Now we shall call loss.backward(), and have a look at conv1’s bias gradients before and after the backward.

In [43]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([ 0.0157,  0.0053,  0.0004, -0.0038, -0.0032,  0.0048])
