In [21]:
import torch
import torch.nn as nn
import numpy as np

### 生成数据

In [22]:
num_inputs = 2
num_examples = 1000

# 生成特征数据(1000, 2)
features = torch.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)
features.shape

torch.Size([1000, 2])

In [23]:
true_w = [2, -3.4]
true_b = 4.2

# 生成目标值(1000, 1)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
# 添加噪声
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float) 
labels.shape

torch.Size([1000])

### 读取数据

In [24]:
import torch.utils.data as Data


# 将训练数据的特征和标签组合
dataset = Data.TensorDataset(features, labels)

# 随机读取小批量
batch_size = 10
data_iter = Data.DataLoader(dataset, batch_size, shuffle=True)

In [25]:
for X, y in data_iter:
    print(X, y)
    break

tensor([[-0.4973, -0.4906],
        [-1.4452,  1.8044],
        [-0.4910,  0.7050],
        [-0.3675,  0.2225],
        [ 0.7671, -0.2331],
        [-0.1940,  0.5269],
        [ 2.4738,  0.7782],
        [ 0.0836, -0.2440],
        [-2.5807, -1.4374],
        [ 0.5497, -0.8408]]) tensor([ 4.8641, -4.8244,  0.8262,  2.7176,  6.5351,  2.0360,  6.5134,  5.2122,
         3.9283,  8.1641])


### 构建模型

In [26]:
net = nn.Sequential(
    nn.Linear(num_inputs, 1)
    # 
    )
print(net)

# net = nn.Sequential()
# net.add_module('linear', nn.Linear(num_inputs, 1))

Sequential(
  (0): Linear(in_features=2, out_features=1, bias=True)
)


In [27]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[0.6703, 0.4235]], requires_grad=True)
Parameter containing:
tensor([0.5059], requires_grad=True)


#### 参数初始化

In [28]:
from torch.nn import init

init.normal_(net[0].weight, mean=0, std=0.01)
init.constant_(net[0].bias, val=0)

Parameter containing:
tensor([0.], requires_grad=True)

In [29]:
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[0.0088, 0.0149]], requires_grad=True)
Parameter containing:
tensor([0.], requires_grad=True)


In [30]:
'''
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32)
b = torch.zeros(1, dtype=torch.float32)

w.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True) 

def linreg(X, w, b): 
    return torch.mm(X, w) + b
'''

'\nw = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32)\nb = torch.zeros(1, dtype=torch.float32)\n\nw.requires_grad_(requires_grad=True)\nb.requires_grad_(requires_grad=True) \n\ndef linreg(X, w, b): \n    return torch.mm(X, w) + b\n'

#### 损失函数

In [31]:
loss = nn.MSELoss()

In [32]:
'''
def squared_loss(y_hat, y):  
    return (y_hat - y.view(y_hat.size())) ** 2 / 2
'''

'\ndef squared_loss(y_hat, y):  \n    return (y_hat - y.view(y_hat.size())) ** 2 / 2\n'

#### 优化器

In [33]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.03)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.03
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [34]:
'''
def sgd(params, lr, batch_size):  
    for param in params:
        param.data -= lr * param.grad / batch_size
'''

'\ndef sgd(params, lr, batch_size):  \n    for param in params:\n        param.data -= lr * param.grad / batch_size\n'

### 训练模型

In [35]:
num_epochs = 3
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        output = net(X)
        l = loss(output, y.view(-1, 1))
        optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))

epoch 1, loss: 0.000210
epoch 2, loss: 0.000092
epoch 3, loss: 0.000062


In [36]:
dense = net[0]
print(true_w, dense.weight)
print(true_b, dense.bias)

[2, -3.4] Parameter containing:
tensor([[ 1.9998, -3.4003]], requires_grad=True)
4.2 Parameter containing:
tensor([4.2000], requires_grad=True)


In [37]:
'''
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

for epoch in range(num_epochs):  
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y).sum()  
        l.backward()  
        sgd([w, b], lr, batch_size)  

        # 梯度清零
        w.grad.data.zero_()
        b.grad.data.zero_()
    train_l = loss(net(features, w, b), labels)
    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))
'''

"\nlr = 0.03\nnum_epochs = 3\nnet = linreg\nloss = squared_loss\n\nfor epoch in range(num_epochs):  \n    for X, y in data_iter(batch_size, features, labels):\n        l = loss(net(X, w, b), y).sum()  \n        l.backward()  \n        sgd([w, b], lr, batch_size)  \n\n        # 梯度清零\n        w.grad.data.zero_()\n        b.grad.data.zero_()\n    train_l = loss(net(features, w, b), labels)\n    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))\n"