In [1]:
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

1. 我们手工设置一个 w和b，然后使用这组参数去生成数据
2. 然后使用这组数据去学习w'和b'，进行比较学习到的差异

In [2]:
true_w = torch.tensor([2, -3.4, 5, -3, 1,-2])
true_b = 4.2
true_w.shape,true_w.size()

(torch.Size([6]), torch.Size([6]))

In [3]:
# 构造数据
def synthetic_data(w, b, num_examples):
    X = torch.normal(mean=0, std=1,size=(num_examples, len(w)))
    Y = X @ w + b
    Y += torch.normal(mean=0, std=0.01,size=Y.shape)
    return X, Y.reshape((-1, 1))

In [4]:
features, labels = synthetic_data(true_w, true_b, 10000)

In [5]:
features[0], features[0] @true_w,true_w @ features[0], len(true_w )

(tensor([-0.8054, -0.2786,  2.3090,  0.3210,  0.7938, -0.5885]),
 tensor(11.8891),
 tensor(11.8891),
 6)

In [6]:
dataset = [features, labels]
len(dataset[0]),dataset[0].shape, type(dataset)

(10000, torch.Size([10000, 6]), list)

In [7]:
class MyDataset(Dataset):
    """
        if the dataset is small, we can load it into memory
        if the dataset is quite large, using the filename or fileindex as index
    """
    def __init__(self, dataset):
        # dataset=[X, y]是一个list，如果直接len(dataset)=2，所以我们需要使用X的len or X.shape[0]
        self.len = len(dataset[0])
        self.X = dataset[0]
        self.y = dataset[1]
    def __getitem__(self, index):
        """implement dataset[index]"""
        return self.X[index], self.y[index]
    def __len__(self):
        """len(batch) can return length"""
        return self.len
dataset = MyDataset([features, labels])
train_loader = DataLoader(dataset=dataset, batch_size=10,shuffle=True, num_workers=2)

In [8]:
from torch import nn
# X 作为输入有7个feature，输出为scalar，为一个维度
net = nn.Sequential(nn.Linear(len(true_w), 1))

In [9]:
def init_normal(m):
  # 如果当前层是线性层
  if type(m) == nn.Linear:
    nn.init.normal_(m.weight, mean=0,std=0.01)
    nn.init.zeros_(m.bias)
# for loop 逐个将线性层使用初始化参数
net.apply(init_normal)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([ 0.0157, -0.0046,  0.0050, -0.0123, -0.0059,  0.0047]), tensor(0.))

In [12]:
def train(epochs, weight_decay, lr):
    criterion = nn.MSELoss()
    # optimizer = torch.optim.SGD(net.parameters(), lr=0.03)
    optimizer = torch.optim.SGD([
        {"params":net[0].weight,'weight_decay': weight_decay},
        {"params":net[0].bias}], lr=lr)
    for epoch in range(epochs):
        for i, (x, y) in enumerate(train_loader, 0):
            """(0, seq[0]), (1, seq[1]), (2, seq[2]), ..."""
            y_hat = net(x)
            loss = criterion(y, y_hat) 
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        loss = criterion(net(features), labels)
        print(f'epoch {epoch + 1}, loss {loss:f}')

In [13]:
train(10,weight_decay=0,lr=0.03)

epoch 1, loss 0.000100
epoch 2, loss 0.000102
epoch 3, loss 0.000103
epoch 4, loss 0.000101
epoch 5, loss 0.000100
epoch 6, loss 0.000100
epoch 7, loss 0.000101
epoch 8, loss 0.000103
epoch 9, loss 0.000101
epoch 10, loss 0.000101


In [14]:
w = net[0].weight.data
print('w的估计误差：', true_w - w.reshape(true_w.shape))
b = net[0].bias.data
print('b的估计误差：', true_b - b)

w的估计误差： tensor([-3.9530e-04, -1.0347e-04, -3.8624e-04,  5.5313e-05,  2.5928e-05,
         9.5844e-05])
b的估计误差： tensor([-0.0016])
