In [25]:
import numpy as np
import torch # 基础库
# import torch.utils.data as Data # Data.TensorDataset ,Data.DataLoader
from torch.utils.data import TensorDataset,DataLoader # 数据加载
from torch import nn # 网络模型
from torch.nn import init # 参数初始化
import torch.optim as optim # 优化算法

from collections import OrderedDict

In [26]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = torch.tensor(torch.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(torch.normal(0, 0.01, size=labels.size()), dtype=torch.float)

  features = torch.tensor(torch.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)
  labels += torch.tensor(torch.normal(0, 0.01, size=labels.size()), dtype=torch.float)


In [27]:
# 定义模型
class LinearNet(nn.Module):
    def __init__(self, n_feature):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(n_feature, 1)
        # 可以用nn.Sequential来更加方便地搭建网络，Sequential是一个有序的容器，网络层将按照在传入Sequential的顺序依次被添加到计算图中。
        self.linear_seq_1 = nn.Sequential(
            nn.Linear(num_inputs, 1)
            # 此处还可以传入其他层
        )
        self.linear_seq_2 = nn.Sequential()
        self.linear_seq_2.add_module('linear', nn.Linear(num_inputs, 1))
        # self.linear_seq_2.add_module ......

        self.linear_seq_3 = nn.Sequential(OrderedDict([
          ('linear', nn.Linear(num_inputs, 1))
          # ......
        ]))

    # forward 定义前向传播
    def forward(self, x):
        y = self.linear(x) # init里面定义的网络层，或者叫hide层
        return y

In [28]:
# 加载数据
batch_size = 10
# 将训练数据的特征和标签组合
dataset = TensorDataset(features, labels)
# 随机读取小批量
data_iter = DataLoader(dataset, batch_size, shuffle=True)
# 这里可以分别加载训练集和测试集
# train_dataloader = DataLoader(training_data, batch_size=batch_size)
# test_dataloader = DataLoader(test_data, batch_size=batch_size)


In [29]:
net = LinearNet(num_inputs)
print(net) # 使用print可以打印出网络的结构

# 查看模型所有的可学习参数
# 这里的model中每个参数的名字都是系统自动命名的，只要是权值都是带有weight，偏置都带有bias，
for param in net.parameters():
    print(param)


LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
  (linear_seq_1): Sequential(
    (0): Linear(in_features=2, out_features=1, bias=True)
  )
  (linear_seq_2): Sequential(
    (linear): Linear(in_features=2, out_features=1, bias=True)
  )
  (linear_seq_3): Sequential(
    (linear): Linear(in_features=2, out_features=1, bias=True)
  )
)
Parameter containing:
tensor([[0.0794, 0.6773]], requires_grad=True)
Parameter containing:
tensor([0.4389], requires_grad=True)
Parameter containing:
tensor([[-0.1447,  0.0336]], requires_grad=True)
Parameter containing:
tensor([-0.4959], requires_grad=True)
Parameter containing:
tensor([[-0.1717, -0.6898]], requires_grad=True)
Parameter containing:
tensor([-0.5886], requires_grad=True)
Parameter containing:
tensor([[0.6263, 0.4640]], requires_grad=True)
Parameter containing:
tensor([-0.1312], requires_grad=True)


In [30]:
# 初始化模型参数(这个也可以不需要)
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)  # 也可以直接修改bias的data: net[0].bias.data.fill_(0)

# 定义损失函数
loss = nn.MSELoss()
# nn.L1Loss()

# 在pytorch中没有明确的添加L1和L2正则化的方法，但是可以直接的采用优化器自带的weight_decay选项来制订权重衰减，相当于L2正则化中的λ

# 定义优化算法
optimizer = optim.SGD(net.parameters(), lr=0.03, weight_decay=1e-5)

# 可以为不同子网络设置不同的学习率
# optimizer =optim.SGD([
#                 # 如果对某个参数不指定学习率，就使用最外层的默认学习率
#                 {'params': net.linear_seq_1.parameters()}, # lr=0.03
#                 {'params': net.linear_seq_2.parameters(), 'lr': 0.01}
#             ], lr=0.03)

# 调整学习率
# for param_group in optimizer.param_groups:
#     param_group['lr'] *= 0.1 # 学习率为之前的0.1倍

In [31]:
# 训练模型

num_epochs = 3
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        # 计算预测误差
        output = net(X)
        l = loss(output, y.view(-1, 1))

        # 反向传播
        optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()
        l.backward() # retain_graph=True
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))

epoch 1, loss: 0.000207
epoch 2, loss: 0.000029
epoch 3, loss: 0.000107


In [32]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[ 2.0003, -3.3996]], requires_grad=True)
Parameter containing:
tensor([4.2001], requires_grad=True)
Parameter containing:
tensor([[-0.1447,  0.0336]], requires_grad=True)
Parameter containing:
tensor([-0.4959], requires_grad=True)
Parameter containing:
tensor([[-0.1717, -0.6898]], requires_grad=True)
Parameter containing:
tensor([-0.5886], requires_grad=True)
Parameter containing:
tensor([[0.6263, 0.4640]], requires_grad=True)
Parameter containing:
tensor([-0.1312], requires_grad=True)


In [33]:
print(true_w, net.linear.weight)
print(true_b, net.linear.bias)

[2, -3.4] Parameter containing:
tensor([[ 2.0003, -3.3996]], requires_grad=True)
4.2 Parameter containing:
tensor([4.2001], requires_grad=True)


pip install torchinfo
```
from torchinfo import summary

model = ConvNet()
batch_size = 16
summary(model, input_size=(batch_size, 1, 28, 28))
```
```
================================================================================================================
Layer (type:depth-idx)          Input Shape          Output Shape         Param #            Mult-Adds
================================================================================================================
SingleInputNet                  --                   --                   --                  --
├─Conv2d: 1-1                   [7, 1, 28, 28]       [7, 10, 24, 24]      260                1,048,320
├─Conv2d: 1-2                   [7, 10, 12, 12]      [7, 20, 8, 8]        5,020              2,248,960
├─Dropout2d: 1-3                [7, 20, 8, 8]        [7, 20, 8, 8]        --                 --
├─Linear: 1-4                   [7, 320]             [7, 50]              16,050             112,350
├─Linear: 1-5                   [7, 50]              [7, 10]              510                3,570
================================================================================================================
Total params: 21,840
Trainable params: 21,840
Non-trainable params: 0
Total mult-adds (M): 3.41
================================================================================================================
Input size (MB): 0.02
Forward/backward pass size (MB): 0.40
Params size (MB): 0.09
Estimated Total Size (MB): 0.51
================================================================================================================
```

## 一般测试不需要梯度更新

```
def test(dataloader, model):
    size = len(dataloader.dataset)


    model.eval() # 不启用 BatchNormalization 和 Dropout
    # 在 model(test) 之前，需要加上model.eval()，否则只要有输入数据，即使不训练，model 也会改变权值。
    #这是model中含有的 batch normalization 层所带来的的性质。


    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
```

## model.train 和model.eval
### model.train()
启用 BatchNormalization 和 Dropout

如果模型中有BN层(Batch Normalization）和Dropout，需要在训练时添加model.train()。
model.train()是保证BN层能够用到每一批数据的均值和方差。对于Dropout，model.train()是随机取一部分网络连接来训练更新参数。

```
# 实例化这个网络
Model = Net()
# 训练模式使用.train()
Model.train(mode=True)
```

## 保存模型或网络
- 仅保存和加载模型参数
torch.save(model.state_dict(), "model.pth")

- 保存和加载整个模型
torch.save(model, PATH)
## 加载模型
- 仅保存和加载模型参数
model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))

- 保存和加载整个模型
model = torch.load(PATH)