In [53]:
# Define network 
import torch 
import torch.nn as nn 
import torch.nn.functional as F 

In [54]:
class SampleNet(nn.Module): 
    def __init__(self): 
        super(SampleNet, self).__init__()
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
    def forward(self,x): 
        # max ppoling over (2,2) window
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        # if size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1,self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self,x): 
        size = x.size()[1:] # all dimensions except th batch dim 
        num_features = 1 
        for s in size: 
            num_features *= s 
        return num_features
network = SampleNet()
print(network)

SampleNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [55]:
params = list(network.parameters()) # learnable parameters from network 
print(len(params))
print(params[0].size()) # conv1 weights
print(params[1].size()) # conv1 out
print(params[2].size()) # conv2 weights
print(params[3].size()) # conv2 out
print(params[4].size()) # FC1 
print(params[5].size()) # FC1 out 
print(params[6].size()) # FC2 
print(params[7].size()) # FC2 out 
print(params[8].size()) # FC3 
print(params[9].size()) # FC3 out 


10
torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [56]:
input = torch.randn(1,1,32,32) # torch Conv2d take (nSamples * nChannels * height, width)
# (Num_sample, channel, height, width)
out = network(input) # giving input and getting output 
print(out) # printing output 
print(out.size()) # printing output 

tensor([[ 0.0946, -0.0267, -0.0753, -0.0472,  0.0171, -0.0872,  0.0157, -0.1682,
         -0.1327,  0.1094]], grad_fn=<AddmmBackward>)
torch.Size([1, 10])


In [57]:
network.zero_grad() # zero the gradient buffers of all parameters
out.backward(torch.randn(1,10)) # backprop with random gradient

In [58]:
# LOSS FUNCTION 
# (output, target)
# nn.MSELoss
output = network(input)
target = torch.randn(10) # dnmmy target
target = target.view(1,-1)
criterion = nn.MSELoss()
loss = criterion(output,target)

print(loss)

tensor(1.2932, grad_fn=<MseLossBackward>)


In [59]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])


<MseLossBackward object at 0x7f7547cdc0f0>
<AddmmBackward object at 0x7f7547cdc748>
<AccumulateGrad object at 0x7f7547cdc0f0>


In [63]:
# BACKPROP 
network.zero_grad() # zeroes the gradient buffers of all parameters 
print('conv1.bias.grad before backward')
print(network.conv1.bias.grad)

loss.backward()
print('conv1.bias.grad after backward')
print(network.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0089,  0.0088,  0.0109,  0.0067, -0.0049, -0.0068])


In [68]:
# UPDATE WEIGHTS 

learning_rate = 0.01
for f in network.parameters(): 
    f.data.sub_(f.grad.data*learning_rate) # updating weights 
    # f.data have weights 
    print(f)


Parameter containing:
tensor([[[[-0.1408, -0.0170, -0.0969, -0.0608,  0.0046],
          [-0.1818, -0.0775,  0.0273,  0.1258,  0.0628],
          [-0.0653,  0.0213, -0.0523, -0.1444,  0.0847],
          [-0.0170,  0.0501, -0.0195,  0.1502, -0.0193],
          [-0.1992, -0.0709, -0.1096,  0.0889,  0.1241]]],


        [[[ 0.0285,  0.1648, -0.1416, -0.0024,  0.0607],
          [-0.1614,  0.0245,  0.1147, -0.1550,  0.0521],
          [ 0.1117, -0.0486, -0.0786,  0.1713, -0.1478],
          [ 0.0646, -0.0362, -0.1495,  0.0876, -0.0660],
          [ 0.0031,  0.1198,  0.1130, -0.1571, -0.0186]]],


        [[[-0.1160,  0.0977,  0.1923,  0.0328,  0.1802],
          [ 0.1391, -0.0715,  0.1625,  0.0352,  0.0357],
          [ 0.1789,  0.1234, -0.0009, -0.1871, -0.1321],
          [ 0.1473,  0.1229,  0.1120,  0.0729,  0.1495],
          [-0.0807,  0.1287,  0.1632,  0.0615,  0.0804]]],


        [[[ 0.1682,  0.0984, -0.1596, -0.1722,  0.1733],
          [ 0.0711, -0.0523, -0.1400, -0.1401,  0.0379

In [70]:
# Update rules 
# SGD, Nesterov-SGD, Adam, RMSProp 
# torch.optim does it 
import torch.optim as optim 

optimizer = optim.SGD(network.parameters(), lr=0.01)
# in training loop 
optimizer.zero_grad() # zeroes all parameters gradient buffer
output = network(input)
loss = criterion(output,target)
loss.backward()
optimizer.step() # does the update 
