In [1]:
import torch
import numpy as np


In [12]:
# 1. 单标量的梯度计算
# y = x**2 + 20
x = torch.tensor(10, requires_grad=True, dtype=torch.float64)   # 创建一个值类型必须是浮点型的标量x，并设置requires_grad=True，表示需要计算梯度
f = x ** 2 + 20 # 变量经过中间计算
f.backward()    # 自动微分
print(x.grad)   # 输出梯度, backward 函数计算的梯度值会存储在张量的 grad 变量中

# 2. 单向量梯度计算
# y = x ** 2 + 20
x2 = torch.tensor([10, 20, 30, 40], requires_grad=True, dtype=torch.float64)
f2 = x2 ** 2 + 20 # 变量经过中间计算
# 由于求导的结果必须是标量
# 而 f2 的结果是: tensor([120., 420., 920., 1620.])
# 所以, 不能直接自动微分
# 需要将结果计算为标量才能进行计算
f2_ = f2.mean()   # f2_ = 1/4 * f2 = 1/4 * 2*x
f2_.backward()    # 自动微分
print(x2.grad)

# 3. 多标量梯度计算
# y = x1**2 + x2**2 + x1 * x2
x1 = torch.tensor(10, requires_grad=True, dtype=torch.float64)
x2 = torch.tensor(20, requires_grad=True, dtype=torch.float64)
f3 = x1 ** 2 + x2 ** 2 + x1 * x2 # 变量经过中间计算
f3.backward()   # 自动微分
print(x1.grad, x2.grad) # 输出梯度

# 4. 多向量梯度计算
# y = x1**2 + x2**2 + x1 * x2
x1 = torch.tensor([10, 20], requires_grad=True, dtype=torch.float64)
x2 = torch.tensor([30, 40], requires_grad=True, dtype=torch.float64)
f4 = x1 ** 2 + x2 ** 2 + x1 * x2  # 变量经过中间计算
f4_ = f4.sum()    # 转换为标量
f4_.backward()    # 自动微分
print(x1.grad, x2.grad)

tensor(20., dtype=torch.float64)
tensor([ 5., 10., 15., 20.], dtype=torch.float64)
tensor(40., dtype=torch.float64) tensor(50., dtype=torch.float64)
tensor([50., 80.], dtype=torch.float64) tensor([ 70., 100.], dtype=torch.float64)


In [17]:
# 1. 控制梯度计算
x = torch.tensor(10, requires_grad=True, dtype=torch.float64)
print(x.requires_grad)

# ① 代码装饰
with torch.no_grad():
    y = x ** 2
print(y.requires_grad)
# ② 函数装饰
@torch.no_grad()
def func():
    return x ** 2
y = func()
print(y.requires_grad)
# ③ 全局变量控制
torch.set_grad_enabled(False)
y = x ** 2
print(y.requires_grad)

True
False
False
False


In [5]:
# 2. 累计梯度, 梯度清零
x = torch.tensor([10, 20, 30, 40], requires_grad=True, dtype=torch.float64)
for _ in range(3):
    f1 = x**2 + 20  # 变量经过中间计算
    f2 = f1.mean()  # 转换为标量
    # 默认张量的 grad 属性会累计历史梯度值
    # 需要每次手动清理上次的梯度
    # 注意: 一开始梯度不存在, 需要做判断
    if x.grad is not None:
        x.grad.data.zero_()

    f2.backward()   # 自动微分
    print(x.grad)
    

tensor([ 5., 10., 15., 20.], dtype=torch.float64)
tensor([ 5., 10., 15., 20.], dtype=torch.float64)
tensor([ 5., 10., 15., 20.], dtype=torch.float64)


In [8]:
# 3. 案例-梯度下降优化函数
# y = x**2, min y 时, x的值
x = torch.tensor(10, requires_grad=True, dtype=torch.float64)

for _ in range(1000):
    y = x**2        # 正向计算
    if x.grad is not None:  # 梯度清零
        x.grad.data.zero_()
    y.backward()    # 反向计算
    x.data = x.data - 0.001 * x.grad  # 更新参数, 梯度下降
    print("%.10f" % x.data)

9.9800000000
9.9600400000
9.9401199200
9.9202396802
9.9003992008
9.8805984024
9.8608372056
9.8411155312
9.8214333001
9.8017904335
9.7821868527
9.7626224789
9.7430972340
9.7236110395
9.7041638174
9.6847554898
9.6653859788
9.6460552069
9.6267630965
9.6075095703
9.5882945511
9.5691179620
9.5499797261
9.5308797666
9.5118180071
9.4927943711
9.4738087824
9.4548611648
9.4359514425
9.4170795396
9.3982453805
9.3794488897
9.3606899920
9.3419686120
9.3232846747
9.3046381054
9.2860288292
9.2674567715
9.2489218580
9.2304240143
9.2119631662
9.1935392399
9.1751521614
9.1568018571
9.1384882534
9.1202112769
9.1019708543
9.0837669126
9.0655993788
9.0474681800
9.0293732437
9.0113144972
8.9932918682
8.9753052845
8.9573546739
8.9394399645
8.9215610846
8.9037179624
8.8859105265
8.8681387055
8.8504024281
8.8327016232
8.8150362200
8.7974061475
8.7798113352
8.7622517126
8.7447272091
8.7272377547
8.7097832792
8.6923637126
8.6749789852
8.6576290272
8.6403137692
8.6230331417
8.6057870754
8.5885755012
8.5713983502

In [6]:
# numpy 错误
x1 = torch.tensor([10, 20], requires_grad=True, dtype=torch.float64)
#print(x1.numpy())    # Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.
print(x1.detach().numpy())   # 正确

# 共享数据
x = torch.tensor([10, 20], requires_grad=True, dtype=torch.float64) # 叶子结点x
x_ = x.detach() # 创建一个叶子结点x_
print(id(x), id(x_))
x_[0] = 100    # 修改分离後产生的新张量
print(x)    # x 会进行梯度计算
print(x_)   # x_ 不进行梯度计算

[10. 20.]
1728479000400 1728459621712
tensor([100.,  20.], dtype=torch.float64, requires_grad=True)
tensor([100.,  20.], dtype=torch.float64)
