# Build model
1. 神经网络模型通过layers/modules对数据进行操作。pytorch中的namespace **torch.nn**提供了NN构造所需的所有building blocks，比如conv, affine, pooling, dropout, normalization, transformer和loss functions等layers，还有containers和utilities
2. container中包括**torch.nn.Module**。它是所有NN modules的base class，pytorch中所有的module都是**nn.Module**的子类
3. pytorch中的module可以nest：每个神经网络模型自身都是一个module，该module又由其他modules(layers)构成。这种nest structure可以很方便的构造复杂的网络架构。
4. **nn.Module**的子类会自动track参数，可以用两个method来查看：parameters()和named_parameters() 

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

  warn(


In [2]:
device = ("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


## 定义一个NN
1. 自定义model也得定义为**nn.Module**的子类
2. 每个子类都必须定义\__init__和\__forward__两个method。模型对input data的操作都放在\__forward__中

In [3]:
# 自定义NN
class RKNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
# 创建自定义NN的实例
model = RKNet().to(device)  # model要建在gpu上
print(model)                # 打印model的structure

RKNet(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1, 28, 28, device=device)
scores = model(X)

prob = nn.Softmax(dim=1)(scores) # dim决定softmax求解的维度
y_pred = prob.argmax(1)
print(f'predict class:{y_pred}')

predict class:tensor([6], device='cuda:0')


## 典型layers

### nn.Flatten
1. 参数：torch.nn.Flatten(start_dim=1, end_dim=-1)
2. 压缩[start_dim, end_dim]范围的dims
2. 默认将输入的data压成2维数据，保留原第一维，压缩剩下的维度，比如输出(N, D)

In [6]:
input_image = torch.rand(3,28,28)
print(input_image.size())

flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

flatten2 = nn.Flatten(0, 1)  # 压缩[0, 1]范围的dims
flat_image2 = flatten2(input_image)
print(flat_image2.size())

torch.Size([3, 28, 28])
torch.Size([3, 784])
torch.Size([84, 28])


### nn.Linear
1. affine layer
2. 参数：torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)
   · in_features (int) – size of each input sample
   · out_features (int) – size of each output sample
   · bias (bool)取False时, 就不会learn bias. Default: True

In [7]:
layer1 = nn.Linear(in_features=28*28, out_features=6)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 6])


### nn.ReLU

In [8]:
print(f"Before ReLU:\n {hidden1}\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU:\n {hidden1}")

Before ReLU:
 tensor([[ 0.3974,  0.0036,  0.5583, -0.3212,  0.2824, -0.1104],
        [ 0.2649, -0.2224,  0.7471, -0.5801,  0.0676,  0.0150],
        [ 0.3988, -0.1779,  0.4358, -0.5818,  0.4039,  0.0568]],
       grad_fn=<AddmmBackward0>)

After ReLU:
 tensor([[0.3974, 0.0036, 0.5583, 0.0000, 0.2824, 0.0000],
        [0.2649, 0.0000, 0.7471, 0.0000, 0.0676, 0.0150],
        [0.3988, 0.0000, 0.4358, 0.0000, 0.4039, 0.0568]],
       grad_fn=<ReluBackward0>)


### nn.Sequential
1. an ordered container of modules.
2. 数据会按照Sequential中定义的layer顺序做处理

In [9]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(6, 10)
)
input_image = torch.rand(3,28,28)
scores = seq_modules(input_image)

softmax = nn.Softmax(dim=1)
pred_probab = softmax(scores)

## 模型参数
1. NN中的一些layers有参数，比如有的layers在training后都有weights和bias
2. 把model定义为nn.Module的子类后，nn.Module能自动track所有model object中定义的fields，而参数可以通过model的parameters()和named_parameters()两种method来获取。

In [10]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: RKNet(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0093,  0.0292, -0.0226,  ..., -0.0265,  0.0247, -0.0258],
        [-0.0104,  0.0330,  0.0024,  ...,  0.0071,  0.0025,  0.0158]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0007,  0.0023], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0363,  0.0007,  0.0311,  ...,  0.0019, -0.0224,  0.0272],
        [ 0.0262,  0.0339,  0.0396,  ...,  0.0277,  0.0087,  0.0132]],
       device='cuda:0', grad_fn=<SliceBackw