# Tensors

# Autograd



# nn package

https://pytorch.org/tutorials/beginner/former_torchies/nn_tutorial.html#sphx-glr-beginner-former-torchies-nn-tutorial-py

## Example 1: ConvNet

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MNISTConvNet(nn.Module):
    def __init__(self):
        super(MNISTConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        
    def forward(self, input):
        x = self.pool1(F.relu(self.conv1(input)))
#         print('out1', x.shape)
        
        x = self.pool2(F.relu(self.conv2(x)))
#         print('out2', x.shape)
        
        x = x.view(x.size(0), -1)
#         print('view out', x.shape)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        return x
    
net = MNISTConvNet()
print(net)

params = list(net.parameters())
for p in params:
    print(p.shape)

MNISTConvNet(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)
torch.Size([10, 1, 5, 5])
torch.Size([10])
torch.Size([20, 10, 5, 5])
torch.Size([20])
torch.Size([50, 320])
torch.Size([50])
torch.Size([10, 50])
torch.Size([10])


In [12]:
input = torch.randn(1, 1, 28, 28)
out = net(input)
print(out.size())

torch.Size([1, 10])


In [13]:
params = list(net.parameters())
for p in params:
    print(p.shape)

torch.Size([10, 1, 5, 5])
torch.Size([10])
torch.Size([20, 10, 5, 5])
torch.Size([20])
torch.Size([50, 320])
torch.Size([50])
torch.Size([10, 50])
torch.Size([10])


In [14]:
target = torch.tensor([3], dtype=torch.long)
loss_fn = nn.CrossEntropyLoss()
err = loss_fn(out, target)
err.backward()

print(err)

tensor(2.2193)


In [15]:
print(net.conv1.weight.grad.size())

torch.Size([10, 1, 5, 5])


In [16]:
print(net.conv1.weight.data.norm())
print(net.conv1.weight.grad.data.norm())

tensor(1.8797)
tensor(0.6236)


## Forward and Backward Function Hooks

In [30]:
def printnorm(self, input, output):
    print('Inside ' + self.__class__.__name__ + ' forward')
    print('')
    print('input: ', type(input))
    print('input.shape: ', len(input))
    print('input[0]: ', type(input[0]))
    print('output: ', type(output))
    print('')
    print('input size: ', input[0].size())
    print('output size: ', output.data.size())
    print('output norm: ', output.data.norm())
    
net.conv2.register_forward_hook(printnorm)

out = net(input)

Inside Conv2d forward

input:  <class 'tuple'>
input.shape:  1
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size:  torch.Size([1, 10, 12, 12])
output size:  torch.Size([1, 20, 8, 8])
output norm:  tensor(13.0571)


In [31]:
def printgradnorm(self, grad_input, grad_output):
    print('Inside ' + self.__class__.__name__ + ' backward')
    print('Inside class: ' + self.__class__.__name__)
    print('')
    print('grad_input: ', type(grad_input))
    print('grad_input.len: ', len(grad_input))
    print('grad_input[0]: ', type(grad_input[0]))
    print('grad_output: ', type(grad_output))
    print('grad_output.len: ', len(grad_output))
    print('grad_output[0]: ', type(grad_output))
    print('')
    print('grad_input[0] size: ', grad_input[0].size())
    print('grad_input[1] size: ', grad_input[1].size())
    print('grad_input[2] size: ', grad_input[2].size())
    print('grad_output size: ', grad_output[0].size())
    print('grad_input norm: ', grad_input[0].norm())
    
net.conv2.register_backward_hook(printgradnorm)

out = net(input)
err = loss_fn(out, target)
err.backward()

Inside Conv2d forward

input:  <class 'tuple'>
input.shape:  1
input[0]:  <class 'torch.Tensor'>
output:  <class 'torch.Tensor'>

input size:  torch.Size([1, 10, 12, 12])
output size:  torch.Size([1, 20, 8, 8])
output norm:  tensor(13.0571)
Inside Conv2d backward
Inside class: Conv2d

grad_input:  <class 'tuple'>
grad_input.len:  3
grad_input[0]:  <class 'torch.Tensor'>
grad_output:  <class 'tuple'>
grad_output.len:  1
grad_output[0]:  <class 'tuple'>

grad_input[0] size:  torch.Size([1, 10, 12, 12])
grad_input[1] size:  torch.Size([20, 10, 5, 5])
grad_input[2] size:  torch.Size([20])
grad_output size:  torch.Size([1, 20, 8, 8])
grad_input norm:  tensor(1.00000e-02 *
       1.5216)


## Example 2: Recurrent Net

In [33]:
class RNN(nn.Module):
    def __init__(self, data_size, hidden_size, output_size):
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        input_size = data_size + hidden_size
        
        self.i2h = nn.Linear(input_size, hidden_size)
        self.h2o = nn.Linear(hidden_size, output_size)
        
    def forward(self, data, last_hidden):
        input = torch.cat((data, last_hidden), 1)
        hidden = self.i2h(input)
        output = self.h2o(hidden)
        return hidden, output
    
rnn = RNN(50, 20, 10)

In [34]:
loss_fn = nn.MSELoss()

batch_size = 10
TIMESTEPS = 5

batch = torch.randn(batch_size, 50)
hidden = torch.zeros(batch_size, 20)
target = torch.zeros(batch_size, 10)

loss = 0
for t in range(TIMESTEPS):
    hidden, output = rnn(batch, hidden)
    loss += loss_fn(output, target)
    
loss.backward()

In [35]:
loss

tensor(0.7342)

# Multi-GPU examples

## DataParallel

In [37]:
import torch
import torch.nn as nn

class DataParallelModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1 = nn.Linear(10, 20)
        
        self.block2 = nn.Linear(20, 20)
        self.block2 = nn.DataParallel(self.block2)
        
        self.block3 = nn.Linear(20, 20)
        
    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.blcok3(x)
        
        return x

## Part of the model in CPU and part on the GPU

In [38]:
device = torch.device('cuda:0')

class DistributedModel(nn.Module):
    def __init__(self):
        super().__init__(
            embedding=nn.Embedding(1000, 10), 
            rnn=nn.Linear(10, 10).to(device))
        
    def forward(self, x):
        x = self.embedding(x)
        x = x.to(device)
        x = self.rnn(x)
        return x