### 3.3 线性回归的简洁实现

In [5]:
import torch
from torch import nn
import matplotlib.pyplot as plt
import numpy as np

torch.manual_seed(666)
np.random.seed(666)

### 3.3.1 生成数据集

In [2]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
# torch.randn(*size) 生成标准正太分布的随机数 
features = torch.randn(num_examples, num_inputs, dtype=torch.float32)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()),
                       dtype=torch.float32)

### 3.3.2 读取数据

In [3]:
import torch.utils.data as Data

batch_size = 10

# 将训练数据的特征和标签组合
dataset = Data.TensorDataset(features, labels)

# 把 dataset 放入 DataLoader
data_iter = Data.DataLoader(
    dataset=dataset,         # torch TensorDataset format
    batch_size=batch_size,   # mini batch size
    shuffle=True,            # 是否打乱数据
    num_workers=2,           # 多线程读取数据
)

In [4]:
for X, y in data_iter:
    print(X, '\n', y)
    break

tensor([[ 0.3599,  0.4855],
        [-0.4741,  0.3136],
        [-1.1171, -0.3931],
        [ 0.6882,  0.6378],
        [ 0.1022, -0.2862],
        [-0.0714,  0.3247],
        [ 0.5803,  1.6024],
        [-0.8415, -0.0101],
        [-1.1748,  0.0288],
        [-0.0035,  1.1548]]) 
 tensor([ 3.2656,  2.1721,  3.3192,  3.4052,  5.3677,  2.9468, -0.0950,  2.5465,
         1.7524,  0.2820])


### 3.3.3 定义模型

In [8]:
class LinearNet(nn.Module):
    def __init__(self, n_features):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(n_features, 1)
        
    def forward(self, x):
        y = self.linear(x)
        return y

net = LinearNet(num_inputs)
print(net)

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)


In [9]:
# 写法一
net = nn.Sequential(
    nn.Linear(num_inputs, 1)
    # 后面还可传入其他层
)

# 写法二
net = nn.Sequential()
net.add_module('linear', nn.Linear(num_inputs, 1))
# net.add_module() ...

# 写法三
from collections import OrderedDict
net = nn.Sequential(OrderedDict([
    ('linear', nn.Linear(num_inputs, 1))
    # ...
]))
print(net)
print(net[0])

Sequential(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)
Linear(in_features=2, out_features=1, bias=True)


In [10]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[-0.6959, -0.3507]], requires_grad=True)
Parameter containing:
tensor([0.0732], requires_grad=True)


### 3.3.4 初始化模型参数

In [11]:
from torch.nn import init

init.normal_(net[0].weight, mean=0.0, std=0.01)
init.constant_(net[0].bias, val=0.0) # 也可直接修改bias的data, net[0].bias.data.fill_(0)

Parameter containing:
tensor([0.], requires_grad=True)

In [12]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[0.0259, 0.0045]], requires_grad=True)
Parameter containing:
tensor([0.], requires_grad=True)


### 3.3.5 定义损失函数

In [13]:
loss = nn.MSELoss()

### 3.3.6 定义优化算法

In [15]:
import torch.optim as optim

optimzer = optim.SGD(net.parameters(), lr=0.03)
print(optimzer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.03
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [17]:
# 不同的子网设置不同的学习率
# optimzer = optim.SGD([
#     {'params': net.subnet1.parameters()}, #lr=0.03
#     {'params': net.subnet2.parameters(), lr=0.01}
#     ], lr=0.03)

# for param_group in optimizer.param_groups:
#     param_group['lr'] *= 0.1 # 学习率为之前的0.1倍

### 3.3.7 训练模型

In [20]:
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        output = net(X)
        # 此处后面的1是确定要和output的保持一样
        # 即labels是1维,这里就是(num, 1)
        l = loss(output, y.view(-1, 1))
        optimzer.zero_grad() # 梯度清零，等价于 net.zero_grad()
        l.backward()
        optimzer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))

epoch 0, loss: 0.000537
epoch 1, loss: 0.000063
epoch 2, loss: 0.000133


In [21]:
dense = net[0]
print(true_w, dense.weight.data)
print(true_b, dense.bias.data)

[2, -3.4] tensor([[ 2.0004, -3.4002]])
4.2 tensor([4.1997])
