In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

## 1. 参数类型Parameter

In [2]:
## Paramter是tensor的子类
## 默认添加了 requires_grad 为 True
p=nn.Parameter(torch.randn(3))
p

Parameter containing:
tensor([-0.3616,  0.6998,  0.1822], requires_grad=True)

In [3]:
p.is_leaf

True

In [4]:
p.requires_grad

True

In [5]:
p.data

tensor([-0.3616,  0.6998,  0.1822])

In [8]:
p.grad is None

True

In [9]:
p.grad_fn is None

True

In [12]:
z=(p**2).sum()
z.backward()
p.grad

tensor([-0.7231,  1.3996,  0.3643])

## 2.Parameter容器（一）：ParameterList

In [16]:
##初始化或者为None,或者为一个可推断的迭代器
pl=nn.ParameterList([nn.Parameter(torch.randn(i)) for i in range(1,3)])
pl

ParameterList(
    (0): Parameter containing: [torch.FloatTensor of size 1]
    (1): Parameter containing: [torch.FloatTensor of size 2]
)

In [17]:
pl[0]

Parameter containing:
tensor([0.3579], requires_grad=True)

In [18]:
pl[1]

Parameter containing:
tensor([-1.7983,  0.8289], requires_grad=True)

In [19]:
##添加与扩展
pl.append(nn.Parameter(torch.randn(3)))
pl.extend([nn.Parameter(torch.randn(i)) for i in range(4,6)])
pl

ParameterList(
    (0): Parameter containing: [torch.FloatTensor of size 1]
    (1): Parameter containing: [torch.FloatTensor of size 2]
    (2): Parameter containing: [torch.FloatTensor of size 3]
    (3): Parameter containing: [torch.FloatTensor of size 4]
    (4): Parameter containing: [torch.FloatTensor of size 5]
)

## 3.Parameter容器（二）：ParameterDict

In [25]:
##初始化或者为None，或者为可迭代的推断
pd=nn.ParameterDict({a:nn.Parameter(torch.randn(i)) for i,a in enumerate('abc')})
pd

ParameterDict(
    (a): Parameter containing: [torch.FloatTensor of size 0]
    (b): Parameter containing: [torch.FloatTensor of size 1]
    (c): Parameter containing: [torch.FloatTensor of size 2]
)

In [29]:
##添加
pd['d']=nn.Parameter(torch.randn(ord('d')-ord('a')))

In [30]:
pd

ParameterDict(
    (a): Parameter containing: [torch.FloatTensor of size 0]
    (b): Parameter containing: [torch.FloatTensor of size 1]
    (c): Parameter containing: [torch.FloatTensor of size 2]
    (d): Parameter containing: [torch.FloatTensor of size 3]
)

In [31]:
pd.keys()

odict_keys(['a', 'b', 'c', 'd'])

In [32]:
pd.values()

odict_values([Parameter containing:
tensor([], requires_grad=True), Parameter containing:
tensor([-0.3087], requires_grad=True), Parameter containing:
tensor([-0.1636, -1.9639], requires_grad=True), Parameter containing:
tensor([-0.0884, -2.0917, -0.7969], requires_grad=True)])

In [34]:
##更新
pd.update({a:nn.Parameter(torch.randn(i)) for i,a in enumerate('efg')})

In [35]:
pd

ParameterDict(
    (a): Parameter containing: [torch.FloatTensor of size 0]
    (b): Parameter containing: [torch.FloatTensor of size 1]
    (c): Parameter containing: [torch.FloatTensor of size 2]
    (d): Parameter containing: [torch.FloatTensor of size 3]
    (e): Parameter containing: [torch.FloatTensor of size 0]
    (f): Parameter containing: [torch.FloatTensor of size 1]
    (g): Parameter containing: [torch.FloatTensor of size 2]
)

## 4.Module

所有神经网络的模块和层的基类，可以通过它嵌套构成树状结构。

#### 重要成员：
-  _parameters:OerderedDict类型，value类型是Parameter. 登记在本模块的参数.
-  _buffers:OerderedDict类型，value类型是torch.Tensor. 登记在本模块供持久化的缓存.
-  _modules:OerderedDict类型，value类型是Module. 添加在本模块的子模块.
-  _backward_hooks
-  _forward_pre_hooks
-  _forward_hooks

### 重要函数：
- forward:任何继承类都需要重写的函数
- register_parameter
- register_buffer
- add_module
- register_backward_hook
- register_forkward_hook
- register_forkward_pre_hook
- parameter/named_parameter：递归yield本模块和所有子模块的所有参数
- children/named_children：yield本模块的所有子模块
- modules/named_modules：递归yield本模块和所有子模块

In [52]:
##4.1 个卷积层+1个最大池化+2个ReLU激活的神经网络
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1=nn.Conv2d(3,16,3,padding=1)#输入3个通道，输出16个通道，核为3，padding为1，计算可得卷积后图片尺寸不变
        self.pool2=nn.AdaptiveMaxPool2d(8)
        
    def forward(self,x):
        x=F.relu(self.conv1(x))
        return F.relu(self.pool2(x))

In [40]:
input=torch.randn(20*3*64*64).reshape(20,3,64,64)

In [53]:
n=Net()
out=n(input)
out.shape

torch.Size([20, 16, 8, 8])

In [55]:
##4.2 apply(fn)
##对所有子模块（直属的子模块，不递归）施加fn的作用，一般用以初始化
def init(m):
    if type(m) == nn.Conv2d:
        m.weight.data.fill_(0.1)
        m.bias.data.fill_(0.1)

n2=Net()
n2.apply(init)
n2._modules['conv1'].bias

Parameter containing:
tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000],
       requires_grad=True)

In [56]:
##4.3 children
##children只迭代直接的子模块
for m in n2.children():
    print(m)

Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
AdaptiveMaxPool2d(output_size=8)


In [57]:
##4.4 named_children
for n,m in n2.named_children():
    print(n,'-->',m)

conv1 --> Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
pool2 --> AdaptiveMaxPool2d(output_size=8)


In [59]:
##4.5 modules
##递归所有的模块，包括子模块
for m in n2.modules():
    print(m)

Net(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): AdaptiveMaxPool2d(output_size=8)
)
Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
AdaptiveMaxPool2d(output_size=8)


In [62]:
##4.6 named_modules
for name,m in n2.named_modules():
    print(name,'-->',m)

 --> Net(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): AdaptiveMaxPool2d(output_size=8)
)
conv1 --> Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
pool2 --> AdaptiveMaxPool2d(output_size=8)


In [65]:
##4.7 parameters
for p in n2.parameters():
    print(p.shape)

torch.Size([16, 3, 3, 3])
torch.Size([16])


In [68]:
##4.8 named_parameters
for n,p in n2.named_parameters():
    print(n,'-->',p.data.shape)

conv1.weight --> torch.Size([16, 3, 3, 3])
conv1.bias --> torch.Size([16])


In [69]:
##登记一个参数
n2.register_parameter('param1',nn.Parameter(torch.randn(3)))

In [70]:
for n,p in n2.named_parameters():
    print(n,'-->',p.data.shape)

param1 --> torch.Size([3])
conv1.weight --> torch.Size([16, 3, 3, 3])
conv1.bias --> torch.Size([16])


In [72]:
n2._parameters['param1'].data

tensor([-0.0036, -1.6693, -0.6486])

In [74]:
##4.9 register_backward_hook
def b_hook(m,grad_in,grad_out):
    print('backward hook called')

n2.register_backward_hook(b_hook)
out=n2(input)
type(out)

torch.Tensor

In [75]:
z=out.sum()
z.backward()

backward hook called


In [76]:
##4.10 登记一个buffer
n2.register_buffer('buf1',torch.randn(3))

In [78]:
##4.11 register_forward_hook
def f_hook(m,input,out):
    print('forward hook called')
    
n2.register_forward_hook(f_hook)    
out2=n2(input)

forward hook called


In [79]:
##4.12 register_pre_forward_hook
def pf_hook(m,input):
    print('pre forward_hook called')

n2.register_forward_pre_hook(pf_hook)
out2=n2(input)

pre forward_hook called
forward hook called


In [82]:
##4.13 state_dict
##以字典形式返回模块内所有的参数和buf
##包括所有子类的参数和buf
n2._modules['conv1'].register_parameter('param2',nn.Parameter(torch.randn(2)))
d=n2.state_dict()
d.keys()

odict_keys(['param1', 'buf1', 'conv1.weight', 'conv1.bias', 'conv1.param2'])

In [84]:
##4.14 转移或转型方法：to
linear=nn.Linear(2,2)
linear.weight

Parameter containing:
tensor([[ 0.2380,  0.2048],
        [-0.3255,  0.1231]], requires_grad=True)

In [85]:
##参数的类型转换
linear.to(torch.double)
linear.weight

Parameter containing:
tensor([[ 0.2380,  0.2048],
        [-0.3255,  0.1231]], dtype=torch.float64, requires_grad=True)

In [87]:
gpu1=torch.device('cuda:0')
linear.to(gpu1,dtype=torch.half,non_blocking=True)

Linear(in_features=2, out_features=2, bias=True)

In [88]:
linear.weight

Parameter containing:
tensor([[ 0.2380,  0.2047],
        [-0.3254,  0.1231]],
       device='cuda:0', dtype=torch.float16, requires_grad=True)

In [89]:
cpu=torch.device('cpu')
linear.to(cpu)

Linear(in_features=2, out_features=2, bias=True)

In [90]:
linear.weight

Parameter containing:
tensor([[ 0.2380,  0.2047],
        [-0.3254,  0.1231]], dtype=torch.float16, requires_grad=True)

In [98]:
## 4.14 zero_grad
##把所有参数的梯度清零（grad.detach_()+grad.zero_()）
##如果没有梯度，即为None，则不进行处理
n2._parameters['param1'].grad is None

True

In [99]:
n2._modules['conv1']._parameters['param2'].grad is None

True

In [93]:
n2.zero_grad()
print(n2._parameters['param1'],'\n',n2._modules['conv1']._parameters['param2'])

Parameter containing:
tensor([-0.0036, -1.6693, -0.6486], requires_grad=True) 
 Parameter containing:
tensor([-0.9009, -1.4198], requires_grad=True)


## 5.Sequential容器

顺序容器，按传入module的顺序，构建网络，此时默认以序号（从0开始）作为module的名称，内部按add_module进行添加.

也可以用OrderedDict作为参数传入.内容同样用add_module添加.

In [101]:
s=nn.Sequential(
    nn.Conv2d(3,16,3,padding=1),
    nn.ReLU(),
    nn.Conv2d(16,16,3,padding=1),
    nn.ReLU()
)
s.apply(init)
s._modules['0'].bias

Parameter containing:
tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000],
       requires_grad=True)

In [103]:
for name,m in s.named_modules():
    print(name,'-->',m)

 --> Sequential(
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU()
)
0 --> Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1 --> ReLU()
2 --> Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
3 --> ReLU()


In [105]:
##使用OrderedDict构建
from collections import OrderedDict

In [106]:
s2=nn.Sequential(OrderedDict({
    'conv1':nn.Conv2d(3,16,3,padding=1),
    'relu1':nn.ReLU(),
    'conv2':nn.Conv2d(16,16,3,padding=1),
    'relu2':nn.ReLU()
    
}))

for name,m in s2.named_modules():
    print(name,'-->',m)

 --> Sequential(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu1): ReLU()
  (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
)
conv1 --> Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
relu1 --> ReLU()
conv2 --> Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
relu2 --> ReLU()


## 6.ModuleList容器

以列表的方式维持子模块.

- 初始化：None或者可迭代的推断.
- 方法：
    - append
    - extend    

In [109]:
class Net2(nn.Module):
    def __init__(self,n):
        super(Net2,self).__init__()
        self.linears=nn.ModuleList([nn.Linear(i,i+1) for i in range(2,n)])
        
    def forward(self,x):
        for i ,l in enumerate(self.linears):
            x=self.linears[i](x)
            
        return x

In [110]:
net=Net2(4)
input=torch.randn(2,2)
out=net(input)
out

tensor([[ 0.5733, -0.7317, -0.4228, -1.1397],
        [ 0.6553, -0.7369, -0.3785, -1.3431]], grad_fn=<ThAddmmBackward>)

In [113]:
type(out)

torch.Tensor

In [114]:
for name,m in net.named_modules():
    print(name,'-->',m)

 --> Net2(
  (linears): ModuleList(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): Linear(in_features=3, out_features=4, bias=True)
  )
)
linears --> ModuleList(
  (0): Linear(in_features=2, out_features=3, bias=True)
  (1): Linear(in_features=3, out_features=4, bias=True)
)
linears.0 --> Linear(in_features=2, out_features=3, bias=True)
linears.1 --> Linear(in_features=3, out_features=4, bias=True)


In [116]:
for name,p in net.named_parameters():
    print(name,'-->',p)

linears.0.weight --> Parameter containing:
tensor([[-0.2193, -0.2656],
        [-0.0845,  0.2926],
        [ 0.5343, -0.5668]], requires_grad=True)
linears.0.bias --> Parameter containing:
tensor([ 0.5299, -0.4193, -0.4374], requires_grad=True)
linears.1.weight --> Parameter containing:
tensor([[-0.0166,  0.1842, -0.0968],
        [ 0.0312,  0.5146,  0.1864],
        [-0.5233, -0.0546, -0.0803],
        [-0.5487,  0.1799,  0.4886]], requires_grad=True)
linears.1.bias --> Parameter containing:
tensor([ 0.4810, -0.3749, -0.1120,  0.0878], requires_grad=True)


## 7.ModuleDict容器类

以字典的方式维持子模块.

- 初始化：以字典或者可迭代推断进行.
- 方法：
    - 字典添加
    - pop/clear
    - items/keys/values
    - update

In [132]:
class Net3(nn.Module):
    def __init__(self,mapping=None):
        super(Net3,self).__init__()
        if mapping is not None and isinstance(mapping,dict):
            self.layers=nn.ModuleDict(mapping)
        
        ##注意moduleDict会自动排序，不是OrderedDict
        self.act=nn.ModuleDict({
            'relu':nn.ReLU(),
            'asigmoid':nn.Sigmoid()
        })
    
    def forward(self,x):
        x=self.layers['conv1'](x)
        x=self.act['relu'](x)
        x=self.layers['pool1'](x)
        
        return self.act['asigmoid'](x)

In [133]:
layers={'conv1':nn.Conv2d(3,16,3,padding=1),'pool1':nn.AdaptiveMaxPool2d(8)}
net=Net3(layers)
input=torch.randn(4,3,12,12)
out=net(input)
out.shape

torch.Size([4, 16, 8, 8])

In [128]:
for name,p in net.named_parameters():
    print(name,'-->',p.data.shape)

layers.conv1.weight --> torch.Size([16, 3, 3, 3])
layers.conv1.bias --> torch.Size([16])


In [134]:
for name,m in net.named_modules():
    print(name,'-->',m)

 --> Net3(
  (layers): ModuleDict(
    (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool1): AdaptiveMaxPool2d(output_size=8)
  )
  (act): ModuleDict(
    (asigmoid): Sigmoid()
    (relu): ReLU()
  )
)
layers --> ModuleDict(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): AdaptiveMaxPool2d(output_size=8)
)
layers.conv1 --> Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
layers.pool1 --> AdaptiveMaxPool2d(output_size=8)
act --> ModuleDict(
  (asigmoid): Sigmoid()
  (relu): ReLU()
)
act.asigmoid --> Sigmoid()
act.relu --> ReLU()
