In [2]:
from pprint import pprint

import torch
import torch.nn as nn
import torch.nn.functional as F

## Define nn.Module

class Module:

    """
    Base class for all neural network modules.

    Your models should also subclass this class.

    Modules can also contain other Modules, allowing to nest them in
    a tree structure. You can assign the submodules as regular attributes::

        import torch.nn as nn
        import torch.nn.functional as F

        class Model(nn.Module):
            def __init__(self):
                super(Model, self).__init__()
                self.conv1 = nn.Conv2d(1, 20, 5)
                self.conv2 = nn.Conv2d(20, 20, 5)

            def forward(self, x):
                x = F.relu(self.conv1(x))
                return F.relu(self.conv2(x))

    Submodules assigned in this way will be registered, and will have their
    parameters converted too when you call :meth:`to`, etc.

    :ivar training: Boolean represents whether this module is in training or
                    evaluation mode.
    :vartype training: bool
    """

In [18]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 5, 3, bias=True)
        self.bn1 = nn.BatchNorm2d(5)
        self.conv2 = nn.Conv2d(5, 10, 3, bias=False)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        return F.relu(self.conv2(x))

In [19]:
model = Model()
model

Model(
  (conv1): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(5, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1), bias=False)
)

### Debugging model weights interactively

In [20]:
# 1. using named_parameters()
for param, weight in model.named_parameters():
    print(f"{param:20} - shape: {weight.size()}")
    print(weight)
    print("-" * 100)

conv1.weight         - shape: torch.Size([5, 1, 3, 3])
Parameter containing:
tensor([[[[-0.2852, -0.0701, -0.2741],
          [-0.2300,  0.2451,  0.1722],
          [ 0.0412, -0.2351, -0.3152]]],


        [[[-0.3312, -0.2288,  0.1602],
          [-0.2320, -0.2592, -0.1576],
          [ 0.3008,  0.2347, -0.3115]]],


        [[[ 0.0540,  0.3207, -0.0226],
          [-0.2521,  0.0766,  0.2607],
          [-0.1463, -0.2773, -0.2015]]],


        [[[-0.1584,  0.2522, -0.2695],
          [-0.0198,  0.2126,  0.0056],
          [ 0.0169,  0.3250,  0.1753]]],


        [[[-0.2620, -0.2368, -0.1383],
          [-0.1316, -0.2939,  0.2736],
          [-0.1345,  0.2951,  0.3140]]]], requires_grad=True)
----------------------------------------------------------------------------------------------------
conv1.bias           - shape: torch.Size([5])
Parameter containing:
tensor([-0.1654, -0.0731, -0.0374, -0.2080,  0.2781], requires_grad=True)
--------------------------------------------------------

In [22]:
# 2. directly access with member variable
print(model.conv1.weight)
print(model.conv1.bias)

Parameter containing:
tensor([[[[-0.2852, -0.0701, -0.2741],
          [-0.2300,  0.2451,  0.1722],
          [ 0.0412, -0.2351, -0.3152]]],


        [[[-0.3312, -0.2288,  0.1602],
          [-0.2320, -0.2592, -0.1576],
          [ 0.3008,  0.2347, -0.3115]]],


        [[[ 0.0540,  0.3207, -0.0226],
          [-0.2521,  0.0766,  0.2607],
          [-0.1463, -0.2773, -0.2015]]],


        [[[-0.1584,  0.2522, -0.2695],
          [-0.0198,  0.2126,  0.0056],
          [ 0.0169,  0.3250,  0.1753]]],


        [[[-0.2620, -0.2368, -0.1383],
          [-0.1316, -0.2939,  0.2736],
          [-0.1345,  0.2951,  0.3140]]]], requires_grad=True)
Parameter containing:
tensor([-0.1654, -0.0731, -0.0374, -0.2080,  0.2781], requires_grad=True)


## Save Trained Model

In [23]:
import os

save_folder = "./runs/"
save_path = os.path.join(save_folder, "best.pth")   # ./runs/best.pth
os.makedirs(save_folder, exist_ok=True)  

torch.save(model.state_dict(), save_path)
print(f"Model saving success at {save_path}")
print(f"Saved models : {os.listdir(save_folder)}")

Model saving success at ./runs/best.pth
Saved models : ['best.pth']


## Load Saved Model

In [24]:
new_model = Model()
new_model.load_state_dict(torch.load(save_path))
print(f"Model loading success from {save_path}")

Model loading success from ./runs/best.pth


### Check if weights loaded successfully

In [25]:
for (name, trained_weight), (_, saved_weight) in zip(model.named_parameters(), new_model.named_parameters()):
    is_equal = torch.equal(trained_weight, saved_weight)
    print(f"parameter {name:15} from trained model and loaded model is equal? -> {is_equal}")

parameter conv1.weight    from trained model and loaded model is equal? -> True
parameter conv1.bias      from trained model and loaded model is equal? -> True
parameter bn1.weight      from trained model and loaded model is equal? -> True
parameter bn1.bias        from trained model and loaded model is equal? -> True
parameter conv2.weight    from trained model and loaded model is equal? -> True


### What is state_dict()?
 - Very similar with .named_parameters()
 - Python dictionary with model parameter(key) and model weights(value)
 - Type: collections.OrderedDict

In [26]:
for param, weight in model.state_dict().items():
    print(f"{param:20} - shape: {weight.size()}")
    print(weight)
    print("-" * 100)

conv1.weight         - shape: torch.Size([5, 1, 3, 3])
tensor([[[[-0.2852, -0.0701, -0.2741],
          [-0.2300,  0.2451,  0.1722],
          [ 0.0412, -0.2351, -0.3152]]],


        [[[-0.3312, -0.2288,  0.1602],
          [-0.2320, -0.2592, -0.1576],
          [ 0.3008,  0.2347, -0.3115]]],


        [[[ 0.0540,  0.3207, -0.0226],
          [-0.2521,  0.0766,  0.2607],
          [-0.1463, -0.2773, -0.2015]]],


        [[[-0.1584,  0.2522, -0.2695],
          [-0.0198,  0.2126,  0.0056],
          [ 0.0169,  0.3250,  0.1753]]],


        [[[-0.2620, -0.2368, -0.1383],
          [-0.1316, -0.2939,  0.2736],
          [-0.1345,  0.2951,  0.3140]]]])
----------------------------------------------------------------------------------------------------
conv1.bias           - shape: torch.Size([5])
tensor([-0.1654, -0.0731, -0.0374, -0.2080,  0.2781])
----------------------------------------------------------------------------------------------------
bn1.weight           - shape: torch.Siz

In [27]:
from collections import OrderedDict
print(f"model.state_dict() type is : {type(model.state_dict())}")
type(model.state_dict()) == OrderedDict

model.state_dict() type is : <class 'collections.OrderedDict'>


True

### What is difference between named_parameters() and state_dict()?
 - named_parameters() : returns only parameters
 - state_dict(): returns both parameters and buffers (e.g. BN runnin_mean, running_var)
 
https://stackoverflow.com/a/54747245

In [28]:
pprint([name for (name, param) in model.named_parameters()])  # named_parameters() : returns only parameters
print()
pprint(list(model.state_dict().keys()))  # state_dict(): retuns both parameters and buffers

['conv1.weight', 'conv1.bias', 'bn1.weight', 'bn1.bias', 'conv2.weight']

['conv1.weight',
 'conv1.bias',
 'bn1.weight',
 'bn1.bias',
 'bn1.running_mean',
 'bn1.running_var',
 'bn1.num_batches_tracked',
 'conv2.weight']


## CPU vs GPU
 - When training DL model, different types of processors(CPU, GPU etc) are used.
 - Therefore, you should assign loading your model's memory to whether CPU or GPU

### cpu()
Moves all model parameters and buffers to the CPU.

In [29]:
model.cpu()
for weight in model.parameters():
    print(f"model device: {weight.device}")

model device: cpu
model device: cpu
model device: cpu
model device: cpu
model device: cpu


### cuda()
Moves all model parameters and buffers to the GPU.

In [30]:
model.cuda()
for weight in model.parameters():
    print(f"model device: {weight.device}")

model device: cuda:0
model device: cuda:0
model device: cuda:0
model device: cuda:0
model device: cuda:0


### to()
Moves and/or casts the parameters and buffers

In [31]:
device_options = ['cpu', 'cuda']
for device_option in device_options:
    device = torch.device(device_option)
    model.to(device)
    
    print(f"Set model device to {device_option}")
    for weight in model.parameters():
        print(f"model device: {weight.device}")
    print()

Set model device to cpu
model device: cpu
model device: cpu
model device: cpu
model device: cpu
model device: cpu

Set model device to cuda
model device: cuda:0
model device: cuda:0
model device: cuda:0
model device: cuda:0
model device: cuda:0



## Forward
Defines the computation performed at every call

In [33]:
dummy_input = torch.randn(1, 1, 28, 28).to(device)
output = model(dummy_input)
print(f"model output: {output.size()}")
output

model output: torch.Size([1, 10, 24, 24])


tensor([[[[0.0000, 0.4362, 0.0000,  ..., 0.1937, 0.6275, 0.2357],
          [0.3993, 0.0000, 0.0000,  ..., 0.1419, 0.4596, 0.0000],
          [0.2169, 0.0000, 0.0000,  ..., 0.0000, 0.2461, 0.0000],
          ...,
          [0.0000, 0.0000, 0.7581,  ..., 0.4056, 0.0000, 0.0000],
          [0.5377, 0.0000, 0.3348,  ..., 0.0000, 0.1200, 0.0665],
          [0.0597, 0.3673, 0.0000,  ..., 0.0000, 0.5259, 0.6356]],

         [[0.3605, 0.0000, 0.3674,  ..., 0.0000, 0.0000, 0.1606],
          [0.1811, 0.0768, 0.0000,  ..., 0.2405, 0.2088, 0.5318],
          [0.3466, 0.0000, 0.0000,  ..., 0.0000, 0.0161, 0.0000],
          ...,
          [0.0000, 0.0693, 0.1541,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.5182, 0.4892,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.4361, 0.7166, 0.0000,  ..., 0.5499, 0.1858, 0.0000],
          [0.0000, 0.1994, 0.3416,  ..., 0.1077, 0.4668, 0.0000],
          [0.7642, 0.2475, 0.0000,  ..., 0

### Tips
Model's device and Inputs'device must be identical, otherwise runtime Exception raised

In [34]:
cpu_device = torch.device('cpu')
gpu_device = torch.device('cuda')

# device is same
dummy_input = dummy_input.to(gpu_device)
model.to(gpu_device)
output = model(dummy_input)
print(f"model output: {output.size()}")

model output: torch.Size([1, 10, 24, 24])


In [35]:
dummy_input = dummy_input.to(cpu_device)
model.to(gpu_device)

# device is different
# RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
output = model(dummy_input)  
print(f"model output: {output.size()}")

RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _thnn_conv2d_forward

### requires_grad()
Change if autograd should record operations on parameters in this module.

In [36]:
# requires_grad = False
model.requires_grad_(requires_grad=False)
for param, weight in model.named_parameters():
    print(f"param {param:15} required gradient? -> {weight.requires_grad}")

param conv1.weight    required gradient? -> False
param conv1.bias      required gradient? -> False
param bn1.weight      required gradient? -> False
param bn1.bias        required gradient? -> False
param conv2.weight    required gradient? -> False


In [37]:
# requires_grad = True
model.requires_grad_(requires_grad=True)
for param, weight in model.named_parameters():
    print(f"param {param:15} required gradient? -> {weight.requires_grad}")

param conv1.weight    required gradient? -> True
param conv1.bias      required gradient? -> True
param bn1.weight      required gradient? -> True
param bn1.bias        required gradient? -> True
param conv2.weight    required gradient? -> True


### train(), eval()
Sets the module in training(evaluation) mode.

This has effects only on certain modules which works differently in training/evaluation
such as Dropout or BatchNorm

You should set model to train mode if your are on training phase, otherwise eval model

[Below is pytorch implementation of BatchNorm2d](https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py#L111-L118).
Check that tracking running_mean, running_var only when `self.training=True`
```
if self.training and self.track_running_stats:
    # TODO: if statement only here to tell the jit to skip emitting this when it is None
    if self.num_batches_tracked is not None:
        self.num_batches_tracked = self.num_batches_tracked + 1
        if self.momentum is None:  # use cumulative moving average
            exponential_average_factor = 1.0 / float(self.num_batches_tracked)
        else:  # use exponential moving average
            exponential_average_factor = self.momentum
```

In [38]:
model.train()  # set model to train mode
print(f"model.bn1.training: {model.bn1.training}")

model.bn1.training: True


In [39]:
model.eval()  # set model to eval mode
print(f"model.bn1.training: {model.bn1.training}")

model.bn1.training: False


### You can check more details about nn.Module at pytorch official documentation
https://pytorch.org/docs/stable/generated/torch.nn.Module.html

It is highly recommended to check official documentation if you have any question about pyTorch