In [None]:
import torch 
import torch.nn as nn 
import torch.nn.functional as f

In [None]:
class Net(nn.Module):
  def __init__(self):
    super(Net,self).__init__()
    self.conv1 = nn.Conv2d(1,6,5)
    self.conv2 = nn.Conv2d(6,16,5)
    self.fc1 = nn.Linear(16*5*5,120)
    self.fc2 = nn.Linear(120,84)
    self.fc3 = nn.Linear(84,10)
  def forward(self,x):
    x = f.max_pool2d(f.relu(self.conv1(x)),(2,2))
    x = f.max_pool2d(f.relu(self.conv2(x)),2)
    x = torch.flatten(x,1)
    x = f.relu(self.fc1(x))
    x = f.relu(self.fc2(x))
    x = self.fc3(x)
    return x
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [None]:
#The learnable parameters of a model are returned by net.parameters()

In [None]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


In [None]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.1423,  0.0736,  0.0151, -0.0994, -0.0295,  0.0184, -0.0251, -0.0560,
         -0.0715,  0.0347]], grad_fn=<AddmmBackward>)


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [None]:
net.zero_grad()
out.backward(torch.randn(1,10))

In [None]:
t = torch.arange(18)
t

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17])

In [None]:
t.view(-1,1)

tensor([[ 0],
        [ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 8],
        [ 9],
        [10],
        [11],
        [12],
        [13],
        [14],
        [15],
        [16],
        [17]])

In [None]:
#torch.nn only supports mini-batches. The entire torch.nn package only supports inputs that are a mini-batch of samples, and not a single sample.

#For example, nn.Conv2d will take in a 4D Tensor of nSamples x nChannels x Height x Width.

#If you have a single sample, just use input.unsqueeze(0) to add a fake batch dimension.

In [None]:
output = net(input)
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()
loss = criterion(output,target)
print(loss)

tensor(1.1464, grad_fn=<MseLossBackward>)


In [None]:
#Now, if we follow loss in the backward direction, using its .grad_fn attribute, we will see a graph of computations that looks like this:

In [None]:
'''input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
      -> flatten -> linear -> relu -> linear -> relu -> linear
      -> MSELoss
      -> loss '''

In [None]:
print(loss.grad_fn) 
print(loss.grad_fn.next_functions[0][0]) 
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x7fb63a851410>
<AddmmBackward object at 0x7fb63a851410>
<AccumulateGrad object at 0x7fb63ea69ad0>


In [None]:
net.zero_grad()
print("conv1.bias.grad before backward")
print(net.conv1.bias.grad)
loss.backward()
print("conv1.bias.grad after backward")
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0018, -0.0195, -0.0070, -0.0055,  0.0228,  0.0029])


In [None]:
learning_rate = 0.01
for f in net.parameters():
  f.data.sub_(f.grad.data * learning_rate)

In [None]:
import torch.optim as optim 
optimizer = optim.SGD(net.parameters(),lr=0.01)
optimizer.zero_grad()
output = net(input)
loss = criterion(output,target)
loss.backward()
optimizer.step()