### Defining a Network

In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F

# nn.Module has all the layers of a Neural Network
class NeuralNet(nn.Module):
    
    def __init__(self):
        super(NeuralNet, self).__init__()
        """
        First convolution layer
        One image 32 x 32 -- one input layer, 6 feature maps or filters of size 28 x 28 
        -- 6 output layers, 5 x 5  -- filter size
        
        """
        self.conv1 = nn.Conv2d(1,6,5)
        """
        Second convolution layer
        6 feature maps -- input, 16 feature maps or filters of size 10 x 10 (max-pooling 
        is done is between which gives the layer of size 6 x 14 x 14)
        
        """
        self.conv2 = nn.Conv2d(6,16,5)
        # Fully connected layers - prior to which subsampling or maxpooling is done giving a lyer of size -- 16 x 5 x 5
        self.fc1 = nn.Linear(16*5*5, 120)
        # The other two FC layers
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    """
    We need to only define the forward function, backward function is automatically defined by 
    autograd.
    A forward function defines the functions that need to be performed, hence, max pooling and 
    ReLU are applied in this function, therfore, not written in the __init__ method.
    The forward function isn't explicity called since the hooks are already defined in the 
    nn.Module class, the super class.
    
    """
    def forward(self, x):
        # 2 x 2 is the maxpool filter size - hence, 6 x 28 x 28 turns to 6 x 14 x 14.
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        # If the max pool window is a square, then a single number suffices
        # 16 x 10 x 10 changes to 16 x 5 x 5
        x = F.max_pool2d(F.relu(self.conv2(x)),2)
        # self.num_of_flat_features = 400 -- 16 x 5 x 5
        x = x.view(-1, self.num_of_flat_features(x))
        # print(x.size()) -- torch.tensor([1, 400])
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_of_flat_features(self,x):
        # x.size() - 1 x 16 x 5 x 5 -- 1 is the batch size
        # size - 16 x 5 x 5
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = NeuralNet()
net

NeuralNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [2]:
# param -- Learnable parameters of a model
param = list(net.parameters())
len(param)

10

In [3]:
for i in range(10):
    print(param[i].size())

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [4]:
# A detailed view of parameters
param = list(net.named_parameters())
len(param)

10

In [5]:
for name, param in net.named_parameters():
        print(name, param.data)
        break

conv1.weight tensor([[[[-0.1973,  0.1441, -0.1931,  0.1865, -0.1701],
          [-0.1249, -0.1571, -0.1246,  0.1344, -0.1944],
          [-0.1563, -0.0145, -0.1876,  0.0460,  0.1662],
          [-0.1878,  0.0535, -0.0005,  0.0971,  0.0039],
          [ 0.1449, -0.1251, -0.0664, -0.1351,  0.1475]]],


        [[[ 0.0729, -0.1037, -0.1384,  0.1544,  0.0736],
          [-0.1383, -0.0221,  0.0405,  0.1948, -0.0636],
          [-0.0050, -0.0425, -0.0581, -0.1180,  0.1647],
          [-0.1828,  0.1465,  0.1235,  0.0464, -0.0210],
          [ 0.0247,  0.1393,  0.0507, -0.0561,  0.0082]]],


        [[[ 0.0055,  0.1133,  0.1573, -0.1209, -0.0170],
          [-0.0975,  0.0385,  0.1862, -0.0741, -0.1965],
          [ 0.0360,  0.0911,  0.1235,  0.1954, -0.1332],
          [-0.0523, -0.1640, -0.1746,  0.0640, -0.1908],
          [-0.0339, -0.0097,  0.1895,  0.0439, -0.0177]]],


        [[[-0.0361,  0.1131, -0.1163,  0.0054, -0.1796],
          [ 0.1649,  0.0555,  0.1771, -0.1388,  0.1147],
      

In [6]:
# nn. Conv2d takes in a 4D tensor - nSamples x nChannels x Height x Width. 
input = torch.randn(1,1,32,32)
out = net(input)
out

tensor([[-0.0015,  0.1118,  0.1037, -0.1606, -0.1546,  0.0788,  0.0225,  0.0484,
         -0.0402,  0.1156]], grad_fn=<ThAddmmBackward>)

### Loss function

In [7]:
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()
loss = criterion(target, out)
loss

tensor(0.9376, grad_fn=<MeanBackward1>)

In [8]:
print(loss.grad_fn)

<MeanBackward1 object at 0x106521160>


In [9]:
print(loss.grad_fn.next_functions[0][0])

<PowBackward0 object at 0x1065213c8>


In [10]:
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<SubBackward0 object at 0x1065216d8>


In [11]:
# Prints the whole stack of Graph of computations
def print_graph(g, level=0):
    if g == None: return
    print('*'*level*4, g)
    for subg in g.next_functions:
        print_graph(subg[0], level+1)

print_graph(loss.grad_fn, 0)

 <MeanBackward1 object at 0x106521cc0>
**** <PowBackward0 object at 0x106521c50>
******** <SubBackward0 object at 0x106521940>
************ <ThAddmmBackward object at 0x106521e80>
**************** <ExpandBackward object at 0x106521e48>
******************** <AccumulateGrad object at 0x107b6f320>
**************** <ReluBackward object at 0x106521ef0>
******************** <ThAddmmBackward object at 0x107b6f320>
************************ <ExpandBackward object at 0x10c4f9390>
**************************** <AccumulateGrad object at 0x10c4f9470>
************************ <ReluBackward object at 0x10c4f93c8>
**************************** <ThAddmmBackward object at 0x10c4f9470>
******************************** <ExpandBackward object at 0x10c4f9588>
************************************ <AccumulateGrad object at 0x10c4f9668>
******************************** <ViewBackward object at 0x10c4f95c0>
************************************ <MaxPool2DWithIndicesBackward object at 0x10c4f9668>
******************

### Backpropagation

In [12]:
# net.zero_grad()
# print('conv1.bias.grad before backprop')
# print(net.conv1.bias.grad)
# Output:
# conv1.bias.grad before backprop
# None

In [13]:
# loss.backward()
# print('conv1.bias.grad after backward')
# print(net.conv1.bias.grad)
# Output:
# conv1.bias.grad after backward
# tensor([-0.0182,  0.0038,  0.0034,  0.0181,  0.0007,  0.0107])

### Update weights

In [14]:
# loss.backward()
# lr = 0.01
# for f in net.parameters():
#    f.data.sub_(f.grad.data * lr)

In [15]:
"""
Not just a simple weight updation rule, w -= lr * grad, instead we can use optimization 
techniques for better accuracy, say SGD, Nesterov-SGD, Adam etc.
Backpropagation + Loss + updation of weights

"""
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr = 0.01)

# While traning,
optimizer.zero_grad()
out = net(input)
loss = criterion(loss, out)
loss.backward()
optimizer.step()