In [1]:
import datetime

In [2]:
import datetime
def printbar():
    nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print("\n"+"=========="*8 + "%s" %nowtime)
printbar()




In [3]:
import torch
from torch import nn
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [4]:
w = nn.Parameter(torch.randn(2, 2))
print(w)
print(w.requires_grad)

Parameter containing:
tensor([[-1.5599, -0.4003],
        [ 0.3306,  0.7081]], requires_grad=True)
True


In [5]:
print('nn.ParameterList 可以将多个nn.nn.Parameter组成一个列表')
params_list = nn.ParameterList([nn.Parameter(torch.randn(8, i)) for i in range(1, 3)])
params_list

nn.ParameterList 可以将多个nn.nn.Parameter组成一个列表


ParameterList(
    (0): Parameter containing: [torch.FloatTensor of size 8x1]
    (1): Parameter containing: [torch.FloatTensor of size 8x2]
)

In [6]:
params_list[0].requires_grad

True

In [11]:
print('nn.ParameterDict可以将多个nn.Parameter组成一个字典')
params_dict = nn.ParameterDict({'a': nn.Parameter(torch.randn(2, 2)), 'b': nn.Parameter(torch.zeros(2))})

nn.ParameterDict可以将多个nn.Parameter组成一个字典


In [12]:
params_dict

ParameterDict(
    (a): Parameter containing: [torch.FloatTensor of size 2x2]
    (b): Parameter containing: [torch.FloatTensor of size 2]
)

In [13]:
params_dict['a'].requires_grad

True

In [7]:
print('可以用Moudule将它们管理起来')
print('moudle.parameters()返回一个生成器,包括其结构下的所有parameters')

可以用Moudule将它们管理起来
moudle.parameters()返回一个生成器,包括其结构下的所有parameters


In [8]:
model = nn.Module()
model

Module()

In [9]:
model.w = w
model.w

Parameter containing:
tensor([[-1.5599, -0.4003],
        [ 0.3306,  0.7081]], requires_grad=True)

In [14]:
model.params_list = params_list
model.params_dict = params_dict

In [15]:
model.params_list

ParameterList(
    (0): Parameter containing: [torch.FloatTensor of size 8x1]
    (1): Parameter containing: [torch.FloatTensor of size 8x2]
)

In [16]:
model.params_dict

ParameterDict(
    (a): Parameter containing: [torch.FloatTensor of size 2x2]
    (b): Parameter containing: [torch.FloatTensor of size 2]
)

In [17]:
model.parameters

<bound method Module.parameters of Module(
  (params_list): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 8x1]
      (1): Parameter containing: [torch.FloatTensor of size 8x2]
  )
  (params_dict): ParameterDict(
      (a): Parameter containing: [torch.FloatTensor of size 2x2]
      (b): Parameter containing: [torch.FloatTensor of size 2]
  )
)>

In [19]:
num_params = 0
for param in model.parameters():
    print(param, '\n')
    num_params += 1
print('number of Parameters = ', num_params)

Parameter containing:
tensor([[-1.5599, -0.4003],
        [ 0.3306,  0.7081]], requires_grad=True) 

Parameter containing:
tensor([[-1.2270],
        [-2.5816],
        [ 2.3891],
        [ 0.5333],
        [ 1.3588],
        [-0.1677],
        [-0.8788],
        [-0.3797]], requires_grad=True) 

Parameter containing:
tensor([[ 0.0115, -1.2934],
        [ 0.6264,  0.5675],
        [-1.1727, -0.4857],
        [-0.7341,  0.1562],
        [ 0.6782, -0.9346],
        [ 0.1886,  1.8006],
        [ 0.8892,  1.8200],
        [-0.5503,  0.8568]], requires_grad=True) 

Parameter containing:
tensor([[-1.8071, -0.3070],
        [-0.4366, -0.1803]], requires_grad=True) 

Parameter containing:
tensor([0., 0.], requires_grad=True) 

number of Parameters =  5


### 2. 使用nn.Moudle来管理子模块

In [25]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.embedding = nn.Embedding(num_embeddings = 10000, embedding_dim = 3, padding_idx = 1)
        self.conv = nn.Sequential()
        self.conv.add_module('conv_1', nn.Conv1d(in_channels = 3, out_channels = 16, kernel_size = 5))
        self.conv.add_module('pool_1', nn.MaxPool1d(kernel_size = 2))
        self.conv.add_module('relu_1', nn.ReLU())

        self.conv.add_module('conv_2', nn.Conv1d(in_channels = 16, out_channels = 128, kernel_size = 2))
        self.conv.add_module('pool_2', nn.MaxPool1d(kernel_size = 2))
        self.conv.add_module('relu_2', nn.ReLU())

        self.dense = nn.Sequential()
        self.dense.add_module('flatten', nn.Flatten())
        self.dense.add_module('linear', nn.Linear(6144, 1))
        self.dense.add_module('sigmoid', nn.Sigmoid())

    def forward(self, x):
        x = self.embedding(x).transpose(1, 2)
        x = self.conv(x)
        y = self.dense(x)
        return y
net = Net()

In [26]:
net

Net(
  (embedding): Embedding(10000, 3, padding_idx=1)
  (conv): Sequential(
    (conv_1): Conv1d(3, 16, kernel_size=(5,), stride=(1,))
    (pool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (relu_1): ReLU()
    (conv_2): Conv1d(16, 128, kernel_size=(2,), stride=(1,))
    (pool_2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (relu_2): ReLU()
  )
  (dense): Sequential(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear): Linear(in_features=6144, out_features=1, bias=True)
    (sigmoid): Sigmoid()
  )
)

In [27]:
i = 0
for child in net.children():
    i += 1
    print(child, '\n')
print('child number:', i)

Embedding(10000, 3, padding_idx=1) 

Sequential(
  (conv_1): Conv1d(3, 16, kernel_size=(5,), stride=(1,))
  (pool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu_1): ReLU()
  (conv_2): Conv1d(16, 128, kernel_size=(2,), stride=(1,))
  (pool_2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu_2): ReLU()
) 

Sequential(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Linear(in_features=6144, out_features=1, bias=True)
  (sigmoid): Sigmoid()
) 

child number: 3


In [28]:
i = 0
for name, child in net.named_children():
    i += 1
    print(name, ":", child, '\n')
print('child number:', i)

embedding : Embedding(10000, 3, padding_idx=1) 

conv : Sequential(
  (conv_1): Conv1d(3, 16, kernel_size=(5,), stride=(1,))
  (pool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu_1): ReLU()
  (conv_2): Conv1d(16, 128, kernel_size=(2,), stride=(1,))
  (pool_2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu_2): ReLU()
) 

dense : Sequential(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Linear(in_features=6144, out_features=1, bias=True)
  (sigmoid): Sigmoid()
) 

child number: 3


In [29]:
i = 0
for module in net.modules():
    i += 1
    print(module)
print('module number:', i)

Net(
  (embedding): Embedding(10000, 3, padding_idx=1)
  (conv): Sequential(
    (conv_1): Conv1d(3, 16, kernel_size=(5,), stride=(1,))
    (pool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (relu_1): ReLU()
    (conv_2): Conv1d(16, 128, kernel_size=(2,), stride=(1,))
    (pool_2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (relu_2): ReLU()
  )
  (dense): Sequential(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear): Linear(in_features=6144, out_features=1, bias=True)
    (sigmoid): Sigmoid()
  )
)
Embedding(10000, 3, padding_idx=1)
Sequential(
  (conv_1): Conv1d(3, 16, kernel_size=(5,), stride=(1,))
  (pool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu_1): ReLU()
  (conv_2): Conv1d(16, 128, kernel_size=(2,), stride=(1,))
  (pool_2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (relu_2): ReLU()
)
Conv1d(3, 16, kernel_size=(5,), st

In [30]:
print('下面我们通过named_children方法来找到embedding层,并将其参数设置为不可训练(相当于冻结Embedding层)')

下面我们通过named_children方法来找到embedding层,并将其参数设置为不可训练(相当于冻结Embedding层)


In [31]:
children_dict = {name: module for name, module in net.named_children()}
children_dict

{'embedding': Embedding(10000, 3, padding_idx=1),
 'conv': Sequential(
   (conv_1): Conv1d(3, 16, kernel_size=(5,), stride=(1,))
   (pool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (relu_1): ReLU()
   (conv_2): Conv1d(16, 128, kernel_size=(2,), stride=(1,))
   (pool_2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (relu_2): ReLU()
 ),
 'dense': Sequential(
   (flatten): Flatten(start_dim=1, end_dim=-1)
   (linear): Linear(in_features=6144, out_features=1, bias=True)
   (sigmoid): Sigmoid()
 )}

In [32]:
embedding = children_dict['embedding']
embedding

Embedding(10000, 3, padding_idx=1)

In [33]:
print('冻结其参数')
embedding.requires_grad_(False)

冻结其参数


Embedding(10000, 3, padding_idx=1)

In [34]:
# 可以看到第一层的参数已经不可被训练了
for param in embedding.parameters():
    print(param.requires_grad)
    print(param.numel())

False
30000


In [35]:
from torchkeras import summary

In [36]:
summary(net, input_shape = (200, ), input_dtype = torch.LongTensor)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Embedding-1               [-1, 200, 3]          30,000
            Conv1d-2              [-1, 16, 196]             256
         MaxPool1d-3               [-1, 16, 98]               0
              ReLU-4               [-1, 16, 98]               0
            Conv1d-5              [-1, 128, 97]           4,224
         MaxPool1d-6              [-1, 128, 48]               0
              ReLU-7              [-1, 128, 48]               0
           Flatten-8                 [-1, 6144]               0
            Linear-9                    [-1, 1]           6,145
          Sigmoid-10                    [-1, 1]               0
Total params: 40,625
Trainable params: 10,625
Non-trainable params: 30,000
----------------------------------------------------------------
Input size (MB): 0.000763
Forward/backward pass size (MB): 0.287796
Params size (MB): 0.154

In [37]:
embedding.requires_grad_(True) # 这样是解冻了

Embedding(10000, 3, padding_idx=1)

In [38]:
summary(net, input_shape = (200, ), input_dtype = torch.LongTensor)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Embedding-1               [-1, 200, 3]          30,000
            Conv1d-2              [-1, 16, 196]             256
         MaxPool1d-3               [-1, 16, 98]               0
              ReLU-4               [-1, 16, 98]               0
            Conv1d-5              [-1, 128, 97]           4,224
         MaxPool1d-6              [-1, 128, 48]               0
              ReLU-7              [-1, 128, 48]               0
           Flatten-8                 [-1, 6144]               0
            Linear-9                    [-1, 1]           6,145
          Sigmoid-10                    [-1, 1]               0
Total params: 40,625
Trainable params: 40,625
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.000763
Forward/backward pass size (MB): 0.287796
Params size (MB): 0.154972
E