# Pytorch中的神经网络基本单元

In [106]:
import torch
from torch import nn

# nn.Module

nn.Module是神经网络结构的表示，它可以表示一个层，也可以表示一个结构块，也可以表示一个完整的模型结构。

## 自定义一个layer

In [107]:
class ReluLayer(nn.Module):
    def __init__(self):
        super(ReluLayer, self).__init__()
    def forward(self, x):
        return (x > 0) * x
relu = ReluLayer()
print(relu)

ReluLayer()


In [108]:
a = torch.randn(2,3)
relu(a)

tensor([[0.2632, 0.6988, -0.0000],
        [0.4950, 0.9098, -0.0000]])

In [109]:
# 带参数的Layer
class MyFCLayer(nn.Module):
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(in_dim, out_dim))
        self.bias = nn.Parameter(torch.randn(out_dim))
    def forward(self, x):
        return x.matmul(self.weights.data) + self.bias.data
fclayer = MyFCLayer(25, 10)
print(fclayer)

MyFCLayer()


In [101]:
a = torch.randn(1, 25)
fclayer(a)

tensor([[ 6.2853, -3.7129,  2.6538, -0.9810,  1.2909, -5.7321,  4.7858, -0.5158,
         -8.0954,  4.7340]])

## 自定义一个Block

In [102]:
class LinearReluStack(nn.Module):
    def __init__(self):
        super(LinearReluStack, self).__init__()
        self.stack = nn.Sequential(
            nn.Linear(28 * 28, 100),
            ReluLayer(),
            nn.Linear(100, 100),
            ReluLayer(),
            MyFCLayer(100, 10)
        )
    def forward(self, x):
        return self.stack(x)
linear_relu_stack = LinearReluStack()
print(linear_relu_stack)

LinearReluStack(
  (stack): Sequential(
    (0): Linear(in_features=784, out_features=100, bias=True)
    (1): ReluLayer()
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): ReluLayer()
    (4): MyFCLayer()
  )
)


In [103]:
a = torch.randn(1,28*28)
linear_relu_stack(a)

tensor([[ 1.0247, -0.1692, -2.0172,  3.7668, -1.2313, -1.1466, -0.7156, -2.9107,
         -1.1382,  0.1247]], grad_fn=<AddBackward0>)

## 自定义一个模型

In [104]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.stack = LinearReluStack()
    def forward(self, x):
        return self.stack(self.flatten(x))
model = NeuralNetwork()
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (stack): LinearReluStack(
    (stack): Sequential(
      (0): Linear(in_features=784, out_features=100, bias=True)
      (1): ReluLayer()
      (2): Linear(in_features=100, out_features=100, bias=True)
      (3): ReluLayer()
      (4): MyFCLayer()
    )
  )
)


In [105]:
a = torch.randn(1,28,28)
model(a)

tensor([[ 0.2746,  0.5344, -2.2537, -0.1526, -2.7846,  1.7382, -1.8701,  0.2607,
          1.9809,  0.8051]], grad_fn=<AddBackward0>)

## 模仿nn.Sequential

In [70]:
class MySequential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):
            self.add_module(str(idx), module)
    def forward(self, x):
        # _modules是内部的一个OrderedDict
        for module in self._modules.values():
            x = module(x)
        return x
mlp = MySequential(nn.Linear(25, 100),nn.ReLU(), nn.Linear(100, 10))
print(mlp)

MySequential(
  (0): Linear(in_features=25, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)


# 参数

每一层的参数，我们可以通过`layer.bias`和`layer.weight`来访问，得到的是一个`nn.parameter.Parameter`的类型对象。

对于Sequential的模型，我们可以通过下标来访问每一层：`seqmodel[i]`

我们也可以通过`state_dict`来获取nn.Module中的所有层的参数。

In [84]:
mlp = nn.Sequential(nn.Linear(25, 100), nn.ReLU(), nn.Linear(100,10))
first_layer = mlp[0]
first_layer.bias
first_layer.weight
first_layer.state_dict()
type(mlp.state_dict())

collections.OrderedDict

In [88]:
# 获取所有参数
print(*[(name, param.shape) for name, param in mlp.named_parameters()])

('0.weight', torch.Size([100, 25])) ('0.bias', torch.Size([100])) ('2.weight', torch.Size([10, 100])) ('2.bias', torch.Size([10]))


In [90]:
# 访问OrderedDict
mlp.state_dict()['2.weight'].data.shape

torch.Size([10, 100])

对于`nn.parameter.Parameter`类型的对象，我们可以通过`.data`与`.grad`拿到其数据与梯度。

In [86]:
first_layer.bias.data.shape, first_layer.bias.grad

(torch.Size([100]), None)

# 参数初始化

对整个网络应用某个初始化函数

In [91]:
def norm_init(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)
mlp.apply(norm_init)

Sequential(
  (0): Linear(in_features=25, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

单独的某层layer应用初始化 

In [94]:
def xiaver_init(m):
    if type(m) == nn.Linear:
        nn.init.xavier_normal_(m.weight)
        
mlp[2].apply(xiaver_init)

Linear(in_features=100, out_features=10, bias=True)

# 多个layer共享参数

In [97]:
shared = nn.Linear(8, 8) #需要共享参数的layer
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), shared, nn.ReLU(), shared,
                    nn.ReLU(), nn.Linear(8, 1))

net[2]和net[4]是共享参数的，梯度累加。