## Part 1 - 标量与矢量运算

In [2]:
import torch
from time import time


In [6]:
# 测试tensor加法的运算效率

size = 1024 ** 2

x = torch.ones(size)
y = torch.ones(size)
z = torch.zeros(size)

start = time()
for i in range(size):
    z[i] = x[i] + y[i]
print("use for loop to calc, cost : %f"%(time() - start))

use for loop to calc, cost : 7.745594


In [7]:
start = time()
z = x + y
print("use vector add to calc, cost : %f"%(time() - start))

use vector add to calc, cost : 0.001416


In [None]:
# 测试使用ｃｕｄａ加速后的矢量运算效率
a = torch.ones(size)
b = torch.ones(size)
c = torch.zeros(size)
start = time()
c = a + b
print("cudause vector add to calc, cost : %f"%(time() - start))

## Part 2 - 手工实现线性回归算法

In [1]:
%matplotlib inline
import torch 
import numpy as np
import matplotlib.pyplot as plt
import random
from IPython import display

In [8]:
# 原文为两个变量，此处测试三变量
num_inputs = 3
num_examples = 1000
true_w = [2.05, -3.4, 1.28]
true_b = 0.2
features = torch.randn(num_examples, num_inputs,
                       dtype=torch.float32)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1]  \
            + true_w[2] * features[:, 2]+ true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()),
                       dtype=torch.float32)

print(features[0], labels[0])

tensor([0.1670, 0.1013, 0.3756]) tensor(0.6782)


In [9]:
# 本函数已保存在d2lzh包中方便以后使用
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)  # 样本的读取顺序是随机的
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) # 最后一次可能不足一个batch
        yield  features.index_select(0, j), labels.index_select(0, j)

        
batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, y)
    break

tensor([[ 0.5778, -1.2826, -0.6243],
        [ 0.9409, -1.7390, -0.3835],
        [ 0.1676, -0.9237, -1.5869],
        [-1.0868, -0.4981, -0.9275],
        [ 0.0693, -0.3614,  0.7760],
        [-0.4783,  1.2052,  0.6201],
        [ 0.2797, -0.6210, -0.5013],
        [ 0.3057, -0.0477,  1.0127],
        [-0.1686,  0.3785, -0.3210],
        [ 0.5045, -0.9925,  0.4847]]) tensor([ 4.9342,  7.5444,  1.6726, -1.5184,  2.5661, -4.0903,  2.2346,  2.3030,
        -1.8396,  5.2212])


In [11]:
def linreg(X, w, b):  # 本函数已保存在d2lzh_pytorch包中方便以后使用
    return torch.mm(X, w) + b

def squared_loss(y_hat, y):  # 本函数已保存在d2lzh_pytorch包中方便以后使用
    # 注意这里返回的是向量, 另外, pytorch里的MSELoss并没有除以 2
    return (y_hat - y.view(y_hat.size())) ** 2 / 2

def sgd(params, lr, batch_size):  # 本函数已保存在d2lzh_pytorch包中方便以后使用
    for param in params:
        param.data -= lr * param.grad / batch_size # 注意这里更改param时用的param.data

In [43]:
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32)
b = torch.zeros(1, dtype=torch.float32)

w.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True) 

lr = 0.01
num_epochs = 12
net = linreg
loss = squared_loss

for epoch in range(num_epochs):  # 训练模型一共需要num_epochs个迭代周期
    # 在每一个迭代周期中，会使用训练数据集中所有样本一次（假设样本数能够被批量大小整除）。X
    # 和y分别是小批量样本的特征和标签
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y).sum()  # l是有关小批量X和y的损失
        l.backward()  # 小批量的损失对模型参数求梯度
        sgd([w, b], lr, batch_size)  # 使用小批量随机梯度下降迭代模型参数

        # 不要忘了梯度清零
        w.grad.data.zero_()
        b.grad.data.zero_()
    train_l = loss(net(features, w, b), labels)
    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))

epoch 1, loss 1.138208
epoch 2, loss 0.146106
epoch 3, loss 0.019103
epoch 4, loss 0.002582
epoch 5, loss 0.000390
epoch 6, loss 0.000098
epoch 7, loss 0.000057
epoch 8, loss 0.000051
epoch 9, loss 0.000050
epoch 10, loss 0.000050
epoch 11, loss 0.000050
epoch 12, loss 0.000050


In [54]:
print('real weights: \n', true_w, '\n** \ncalculated weights: \n', w)
print('<------>\n<------>\nreal bias: \n', true_b, '\n** \ncalculated bias: \n', b)

real weights: 
 [2.05, -3.4, 1.28] 
** 
calculated weights: 
 tensor([[ 2.0496],
        [-3.3998],
        [ 1.2792]], requires_grad=True)
<------>
<------>
real bias: 
 0.2 
** 
calculated bias: 
 tensor([0.2006], requires_grad=True)


## Part 3 - 用torch API简洁实现线性回归

In [57]:
# 原文为两个变量，此处测试三变量
num_inputs = 3
num_examples = 1000
true_w = [2.05, -3.4, 1.28]
true_b = 0.2
features = torch.randn(num_examples, num_inputs,
                       dtype=torch.float32)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1]  \
            + true_w[2] * features[:, 2]+ true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()),
                       dtype=torch.float32)

print(features[0], labels[0])

tensor([-1.1314, -0.0537, -2.0535]) tensor(-4.5778)


In [61]:
import torch.utils.data as Data
import torch.nn as nn

batch_size = 10
# 将训练数据的特征和标签组合
dataset = Data.TensorDataset(features, labels)
# 随机读取小批量
data_iter = Data.DataLoader(dataset, batch_size, shuffle=True)

for X, y in data_iter:
    print(X, y)
    break


tensor([[ 4.3551e-01, -1.4299e+00,  1.0157e+00],
        [ 1.3555e+00, -7.1962e-01, -3.1199e-01],
        [-1.5121e+00, -3.6046e-01, -1.4631e+00],
        [ 8.4685e-01,  1.2765e-02, -4.0271e-02],
        [-1.6655e+00, -1.5249e+00, -1.9606e-01],
        [ 5.3617e-01,  2.5399e-03,  1.3388e+00],
        [ 1.5466e+00, -6.6703e-01,  3.3518e-01],
        [ 3.1904e+00,  8.6668e-01, -9.0497e-02],
        [-9.5961e-01,  6.2080e-01, -1.9145e-01],
        [ 1.3016e+00, -2.6860e-02,  1.3234e+00]]) tensor([ 7.2694,  5.0125, -3.5275,  1.8693,  1.7118,  3.0186,  6.0611,  3.6762,
        -4.1122,  4.6583])


In [66]:
class LinearNet(nn.Module):
    def __init__(self, n_feature):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(n_feature, 1)
    # forward 定义前向传播
    def forward(self, x):
        y = self.linear(x)
        return y

net = LinearNet(num_inputs)
print(net) # 使用print可以打印出网络的结构


LinearNet(
  (linear): Linear(in_features=3, out_features=1, bias=True)
)


In [70]:
# 写法一
net1 = nn.Sequential(
    nn.Linear(num_inputs, 1)
    # 此处还可以传入其他层
    )
print('method 1:', net1)


# 写法二
net2 = nn.Sequential()
net2.add_module('linear', nn.Linear(num_inputs, 1))
print('method 2:', net2)
# net.add_module ......

# 写法三
from collections import OrderedDict
net = nn.Sequential(OrderedDict([
          ('linear', nn.Linear(num_inputs, 1)),
          # ......
        ]))
print('method 3:', net)

print(net[0])


method 1: Sequential(
  (0): Linear(in_features=3, out_features=1, bias=True)
)
method 2: Sequential(
  (linear): Linear(in_features=3, out_features=1, bias=True)
)
method 3: Sequential(
  (linear): Linear(in_features=3, out_features=1, bias=True)
)
Linear(in_features=3, out_features=1, bias=True)


In [87]:
from torch.nn import init

init.normal_(net[0].weight, mean=0, std=0.01)
init.constant_(net[0].bias, val=0)  # 也可以直接修改bias的data: net[0].bias.data.fill_(0)

loss = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
print(optimizer)

# 调整学习率
for param_group in optimizer.param_groups:
    param_group['lr'] *= 0.1 # 学习率为之前的0.1倍


SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [89]:
num_epochs = 30
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        output = net(X)
        l = loss(output, y.view(-1, 1))
        optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))


epoch 1, loss: 0.002726
epoch 2, loss: 0.003024
epoch 3, loss: 0.002236
epoch 4, loss: 0.001037
epoch 5, loss: 0.000336
epoch 6, loss: 0.000701
epoch 7, loss: 0.000426
epoch 8, loss: 0.000206
epoch 9, loss: 0.000188
epoch 10, loss: 0.000192
epoch 11, loss: 0.000169
epoch 12, loss: 0.000195
epoch 13, loss: 0.000147
epoch 14, loss: 0.000070
epoch 15, loss: 0.000070
epoch 16, loss: 0.000087
epoch 17, loss: 0.000124
epoch 18, loss: 0.000131
epoch 19, loss: 0.000148
epoch 20, loss: 0.000192
epoch 21, loss: 0.000091
epoch 22, loss: 0.000099
epoch 23, loss: 0.000178
epoch 24, loss: 0.000020
epoch 25, loss: 0.000096
epoch 26, loss: 0.000157
epoch 27, loss: 0.000093
epoch 28, loss: 0.000084
epoch 29, loss: 0.000030
epoch 30, loss: 0.000134


In [90]:
dense = net[0]
print(true_w, dense.weight)
print(true_b, dense.bias)

[2.05, -3.4, 1.28] Parameter containing:
tensor([[ 2.0502, -3.3994,  1.2806]], requires_grad=True)
0.2 Parameter containing:
tensor([0.2002], requires_grad=True)
