# nn explain
nn has two main parts : data and model components


containers are responsible for model components and parameters/buffers are responsible for model data


containers : Module, Sequential, ModuleList, ModuleDict, ParameterList, ParameterDict for module construction


parameters : parameter(...) for model training

buffers    : parameter(...) for model aux 

In [1]:
import torch
import torch.nn as nn

## 0.parameters and buffers

**parameter is just tensor with requires_grad=True and have their own space in model.parameters() and model ordered list**


**buffer is just tensor with requires_grad=True/False and have their own space in model.buffers() and model ordered list**


In one model, 

parameter needs to backward and be updated by optimizer.step


buffer needs to be used in backward but not be updated by optimizer.step


both of these data are responsible for the whole module, thus they would be saved by model.state_dict() in form of OrderDict. Moreover, they would be loaded by model.load_state_dict(...)

nn.Parameter(...) should be used in the __init__ function in order to have init para at the first place. 

In [2]:
class test(nn.Module):
    def __init__(self):
        super(test, self).__init__()
        self.a = nn.Parameter(torch.randn(4,4))
        self.linear = nn.Linear(4,5)
        self.tensor_test = torch.rand((1,1), requires_grad=True)
        print("Not added in nn.Module parameters : {}".format(self.tensor_test))
model = test()
print(model)
for para in model.parameters():
    print(para)

Not added in nn.Module parameters : tensor([[0.8124]], requires_grad=True)
test(
  (linear): Linear(in_features=4, out_features=5, bias=True)
)
Parameter containing:
tensor([[-0.6988, -1.2292, -0.4337, -0.0189],
        [-0.1070, -0.6613, -0.3620,  0.7364],
        [-0.4378,  1.9396,  0.4627,  1.7673],
        [-0.6651,  0.5262, -1.1829,  0.8627]], requires_grad=True)
Parameter containing:
tensor([[ 0.3213, -0.2416, -0.4556,  0.0478],
        [-0.0612,  0.1238, -0.3602,  0.4339],
        [ 0.1471, -0.1525, -0.3646,  0.1010],
        [ 0.4652,  0.1764,  0.3794,  0.2159],
        [ 0.4705, -0.1052,  0.1555,  0.4119]], requires_grad=True)
Parameter containing:
tensor([ 0.1092,  0.3478, -0.1668, -0.4925,  0.1740], requires_grad=True)


In [18]:
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        
        # fist way to have a parameter
        # self.x = nn.Parameter(...) directly add one var into OrderDict
        self.param1 = nn.Parameter(torch.tensor(1.))
        
        # second way to have a parameter
        # x = nn.Parameter(...) and self.register_parameter() in order to add normal parameter into OrderDict 
        param2 = nn.Parameter(torch.tensor(2.))
        self.register_parameter('param2', param2)
        
        # the only way to have buffer
        # self.register_buffer in order to add normal tensor into OrderDict
        buff = torch.tensor(3.)
        self.register_buffer('buffer', buff)
        
    def forward(self, x):
        # ParameterList can act as an iterable, or be indexed using ints
        x = self.param1
        y = self.param2
        z = torch.mm(x,y)
        return z

model = MyModule()
print("=====para=====")
for para in model.parameters():
    print(para)

print("=====buff=====")
for buff in model.buffers():
    print(buff)

print("=====orderlist=====")
print(model.state_dict())

print("=====save&load=====")
# save model and load
PATH = './MyModule_dict'
torch.save(model.state_dict(), PATH)
model2 = MyModule()
model2.load_state_dict(torch.load(PATH))
print(model2.state_dict())

tensor(3., requires_grad=True)
=====para=====
Parameter containing:
tensor(1., requires_grad=True)
Parameter containing:
tensor(2., requires_grad=True)
=====buff=====
tensor(3., requires_grad=True)
=====orderlist=====
OrderedDict([('param1', tensor(1.)), ('param2', tensor(2.)), ('buffer', tensor(3.))])
=====save&load=====
tensor(3., requires_grad=True)
OrderedDict([('param1', tensor(1.)), ('param2', tensor(2.)), ('buffer', tensor(3.))])


## 1. containers include Module, Sequential, ModuleList, ModuleDict, ParameterList, ParameterDict

Among them, nn.Module is the father class and the five following classes should be put under nn.Module class.


These containers can be used for adding module components.


**It is quite important to notice that nn supports nesting. Once there is one class from nn.Module, any nn.Linear or other nn.Module defined inside the nn.Module woulde automatically added to the whole nn.Module.**

In [4]:
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        self.params = nn.ParameterList([nn.Parameter(torch.randn(10, 10)) for i in range(10)])

    def forward(self, x):
        # ParameterList can act as an iterable, or be indexed using ints
        for i, p in enumerate(self.params):
            x = self.params[i // 2].mm(x) + p.mm(x)
        return x

model = MyModule()
for para in model.parameters():
    print(para)

Parameter containing:
tensor([[ 3.4073e-01, -6.1802e-01,  2.0644e+00,  9.2256e-01, -1.3488e+00,
          4.9499e-01,  5.7657e-01, -9.7584e-03, -1.2473e+00,  7.5066e-01],
        [-2.0932e+00, -6.6143e-01,  5.4935e-01,  3.0891e-01, -9.4306e-01,
         -2.2446e-01,  1.0764e+00,  2.2019e+00,  6.3417e-01,  5.4304e-01],
        [ 5.3346e-02, -4.5061e-01, -1.0908e-01, -1.5221e+00,  5.8349e-01,
         -6.2337e-01,  7.8563e-01, -1.4640e+00,  6.3888e-01,  4.1768e-01],
        [ 1.8526e-01, -1.3341e+00, -1.8586e-01, -2.9592e-01, -7.3462e-04,
         -1.5999e+00,  4.0736e-01, -6.1220e-01,  1.2491e+00,  1.1794e+00],
        [ 1.1951e+00, -9.0779e-02, -1.1695e-03, -1.4847e+00, -1.8719e+00,
          2.3298e-01, -3.7501e-02,  5.4963e-02, -2.4846e-01,  1.2055e+00],
        [ 7.6458e-01,  1.1144e-01,  1.4895e+00,  1.9522e+00,  4.3273e-01,
         -1.0283e+00,  8.5294e-01, -3.0160e-01, -1.4007e+00, -5.2178e-01],
        [ 1.2018e+00,  3.4727e-01,  1.9874e+00, -1.4930e+00,  1.2953e+00,
         -

In [5]:
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        self.params = nn.ParameterDict({
                'left': nn.Parameter(torch.randn(5, 10)),
                'right': nn.Parameter(torch.randn(5, 10))
        })

    def forward(self, x, choice):
        # torch.mm() a@b
        # torch.mul() a*b
        x = self.params[choice].mm(x)
        return x
model = MyModule()
model(torch.ones((10,10)), 'left')

tensor([[-2.3720, -2.3720, -2.3720, -2.3720, -2.3720, -2.3720, -2.3720, -2.3720,
         -2.3720, -2.3720],
        [-1.2823, -1.2823, -1.2823, -1.2823, -1.2823, -1.2823, -1.2823, -1.2823,
         -1.2823, -1.2823],
        [-3.2999, -3.2999, -3.2999, -3.2999, -3.2999, -3.2999, -3.2999, -3.2999,
         -3.2999, -3.2999],
        [-3.7744, -3.7744, -3.7744, -3.7744, -3.7744, -3.7744, -3.7744, -3.7744,
         -3.7744, -3.7744],
        [ 3.8576,  3.8576,  3.8576,  3.8576,  3.8576,  3.8576,  3.8576,  3.8576,
          3.8576,  3.8576]], grad_fn=<MmBackward>)

In [6]:
class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        self.choices = nn.ModuleDict({
                'conv': nn.Conv2d(10, 10, 3),
                'pool': nn.MaxPool2d(3)
        })
        self.activations = nn.ModuleDict([
                ['lrelu', nn.LeakyReLU()],
                ['prelu', nn.PReLU()]
        ])

    def forward(self, x, choice, act):
        x = self.choices[choice](x)
        x = self.activations[act](x)
        return x

model = MyModule()
model(torch.ones((10,10,3,3)), 'conv', 'prelu')

tensor([[[[ 0.1652]],

         [[ 0.4522]],

         [[-0.0740]],

         [[ 0.4692]],

         [[ 0.2617]],

         [[-0.0157]],

         [[ 0.0117]],

         [[ 0.1744]],

         [[ 0.2167]],

         [[ 0.6162]]],


        [[[ 0.1652]],

         [[ 0.4522]],

         [[-0.0740]],

         [[ 0.4692]],

         [[ 0.2617]],

         [[-0.0157]],

         [[ 0.0117]],

         [[ 0.1744]],

         [[ 0.2167]],

         [[ 0.6162]]],


        [[[ 0.1652]],

         [[ 0.4522]],

         [[-0.0740]],

         [[ 0.4692]],

         [[ 0.2617]],

         [[-0.0157]],

         [[ 0.0117]],

         [[ 0.1744]],

         [[ 0.2167]],

         [[ 0.6162]]],


        [[[ 0.1652]],

         [[ 0.4522]],

         [[-0.0740]],

         [[ 0.4692]],

         [[ 0.2617]],

         [[-0.0157]],

         [[ 0.0117]],

         [[ 0.1744]],

         [[ 0.2167]],

         [[ 0.6162]]],


        [[[ 0.1652]],

         [[ 0.4522]],

         [[-0.0740]],

   

## 2.difference between nn.Sequential and nn.Modulelist
both of them are subclasses of containers in torch.nn

The sequential class stores sequential list.

In [7]:
class seq_net(nn.Module):
    def __init__(self):
        super(seq_net, self).__init__()
        self.seq = nn.Sequential(
                   nn.Conv2d(1,20,5),
                   nn.ReLU(),
                   nn.Conv2d(20,64,5),
                   nn.ReLU()
                   )
    def forward(self, x):
        return self.seq(x)

model = seq_net()
print(model)

seq_net(
  (seq): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU()
  )
)


The ModuleList can be used as list, all elements can be used as elements in the list, but the modules in the list are registered automatically to the whole net and the parameters are automatically put on the whole nn.Module model.

In [8]:
class modlist_net(nn.Module):
    def __init__(self):
        super(modlist_net, self).__init__()
        self.modlist = nn.ModuleList([
                       nn.Conv2d(1, 20, 5),
                       nn.ReLU(),
                       nn.Conv2d(20, 64, 5),
                       nn.ReLU()
                       ])

    def forward(self, x):
        for m in self.modlist:
            x = m(x)
        return x

model = modlist_net()
print(model)

modlist_net(
  (modlist): ModuleList(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU()
  )
)


Diff 1 : nn.ModuleList has no forward functions but nn.Sequential has default forward functions

Diff 2 :  nn.Sequential can be named using OrderedDict but nn.ModuleList cannot.

In [9]:
from collections import OrderedDict
class seq_net(nn.Module):
    def __init__(self):
        super(seq_net, self).__init__()
        self.seq = nn.Sequential(OrderedDict([
                   ('conv1', nn.Conv2d(1,20,5)),
                   ('relu1', nn.ReLU()),
                   ('conv2', nn.Conv2d(20,64,5)),
                   ('relu2', nn.ReLU())
                   ]))
    def forward(self, x):
        return self.seq(x)

model = seq_net()
print(model)

seq_net(
  (seq): Sequential(
    (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    (relu1): ReLU()
    (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    (relu2): ReLU()
  )
)


Diff 3 : module in nn.ModuleList has no order, we can put modules in casual order.

Diff 4 : we can use "for" for duplicate modules in nn.ModuleList.

In [10]:
class modlist_net(nn.Module):
    def __init__(self):
        super(modlist_net, self).__init__()
        self.modlist = nn.ModuleList([nn.Linear(10,10) for i in range(10)]

                                    )
    def forward(self, x):
        for m in self.modlist:
            x = m(x)
        return x

model = modlist_net()
print(model)

modlist_net(
  (modlist): ModuleList(
    (0): Linear(in_features=10, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): Linear(in_features=10, out_features=10, bias=True)
    (4): Linear(in_features=10, out_features=10, bias=True)
    (5): Linear(in_features=10, out_features=10, bias=True)
    (6): Linear(in_features=10, out_features=10, bias=True)
    (7): Linear(in_features=10, out_features=10, bias=True)
    (8): Linear(in_features=10, out_features=10, bias=True)
    (9): Linear(in_features=10, out_features=10, bias=True)
  )
)


## 3. Other APIs for nn.Module base class

collect other APIs not mentioned in the above.


train : effect Dropout & BatchNorm layers


eval  : effect Dropout & BatchNorm layers ---> equivalent to self.train(false)


requires_grad_ : change if autograd should record operations on parameters


register_forward_pre_hook : be called every time before forward() is invoked


register_forward_hook : be called every time when forward() is invoked


named_parameters / named_buffers / named_modules / named_children 
parameters / buffers / modules / children

add_module

apply

In [15]:
# when it comes to tensor we use requires_grad_() or requires_grad = False
x = torch.rand((4,4))
x.requires_grad_(False)
x.requires_grad = False
print(x)

# when it comes to nn.Module we use requires_grad_() or requires_grad = False
# this can be used for freezing parameters when fine tuning
# because the grad would not be changed when passing requires_grad_(False) layers


# ========= QUITE IMPORTANT ============
# since the grad in y = None, we just skip the whole step altogether 

y = nn.Linear(2,2)
y.requires_grad_(False)
# or
y.requires_grad = False
print(y)

class MyModule(nn.Module):
    def __init__(self):
        super(MyModule, self).__init__()
        # nn.Parameter actually transform the torch.tensor(requires_grad=True)
        # --> torch.tensor(requires_grad=True) and add this parameter into the orderedlist of nn.Module
        self.params = nn.ParameterList([nn.Parameter(torch.randn(10, 10)) for i in range(10)])

    def forward(self, x):
        # ParameterList can act as an iterable, or be indexed using ints
        for i, p in enumerate(self.params):
            x = self.params[i // 2].mm(x) + p.mm(x)
        return x

model = MyModule()
x = model(torch.ones((10,10)))
model.requires_grad_(False)
loss = torch.sum(x)
loss.backward()


tensor([[0.3697, 0.0677, 0.0544, 0.7485],
        [0.9978, 0.9166, 0.8169, 0.5677],
        [0.3757, 0.7005, 0.2651, 0.6418],
        [0.9936, 0.0302, 0.4438, 0.9563]])
Linear(in_features=2, out_features=2, bias=True)
