In [1]:
import torch

## 计算梯度

In [2]:
x1 = torch.tensor([100], dtype=torch.float, requires_grad=True)
x2 = torch.tensor([200], dtype=torch.float, requires_grad=True)
x3 = torch.tensor([300], dtype=torch.float, requires_grad=True)
constant = torch.tensor([30], dtype=torch.float, requires_grad=False)  # 定义一个常量

x1, x2, x3, constant

(tensor([100.], requires_grad=True),
 tensor([200.], requires_grad=True),
 tensor([300.], requires_grad=True),
 tensor([30.]))

In [3]:
x1.shape, x2.shape, x3.shape, constant

(torch.Size([1]), torch.Size([1]), torch.Size([1]), tensor([30.]))

In [4]:
f = 10 * x1 + 20 * x2 + constant * x3  # 前向传播 (前向运算)
f.backward()  # 反向传播 -> 求各个参数的偏导数

x1.grad, x2.grad, x3.grad

(tensor([10.]), tensor([20.]), tensor([30.]))

In [5]:
constant.grad, type(constant.grad)  # 常量没有梯度

(None, NoneType)

## 更新权重(参数)

In [6]:
learning_rate = 0.01  # 学习率

# 忽略 requires_grad 属性; 代码块里面所有 tensor 都视为常量 tensor
with torch.no_grad():
    # 更新每个 tensor 的 data
    x1.data = x1.data - learning_rate * x1.grad
    x2.data = x2.data - learning_rate * x2.grad
    x3.data = x3.data - learning_rate * x3.grad

x1, x2, x3

(tensor([99.9000], requires_grad=True),
 tensor([199.8000], requires_grad=True),
 tensor([299.7000], requires_grad=True))

## 梯度是可以累加的

In [7]:
f = 10 * x1 + 20 * x2 + constant * x3  # 前向传播(前向运算)
f.backward()  # 反向传播 (求导的过程)

x1.grad, x2.grad, x3.grad

(tensor([20.]), tensor([40.]), tensor([60.]))

## nn.Parameter 批量化创建权重

In [8]:
from torch import nn

weights = nn.Parameter(torch.ones(size=(512, 512)))
weights.requires_grad, weights.data.requires_grad  # 查看 weights 的 requires_grad 属性

(True, False)

In [9]:
weights

Parameter containing:
tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]], requires_grad=True)

In [10]:
X = torch.ones(size=(512, 512)) * 666
output = (X * weights).sum()
output

tensor(1.7459e+08, grad_fn=<SumBackward0>)

In [11]:
output.backward()
weights.grad.data

tensor([[666., 666., 666.,  ..., 666., 666., 666.],
        [666., 666., 666.,  ..., 666., 666., 666.],
        [666., 666., 666.,  ..., 666., 666., 666.],
        ...,
        [666., 666., 666.,  ..., 666., 666., 666.],
        [666., 666., 666.,  ..., 666., 666., 666.],
        [666., 666., 666.,  ..., 666., 666., 666.]])

In [None]:
X.grad

In [12]:
learning_rate = 2  # 学习率
with torch.no_grad():
    weights.data = weights.data - learning_rate * weights.grad.data

weights.data

tensor([[-1331., -1331., -1331.,  ..., -1331., -1331., -1331.],
        [-1331., -1331., -1331.,  ..., -1331., -1331., -1331.],
        [-1331., -1331., -1331.,  ..., -1331., -1331., -1331.],
        ...,
        [-1331., -1331., -1331.,  ..., -1331., -1331., -1331.],
        [-1331., -1331., -1331.,  ..., -1331., -1331., -1331.],
        [-1331., -1331., -1331.,  ..., -1331., -1331., -1331.]])