In [1]:
from __future__ import print_function
import torch as t
from torch.autograd import Variable as V

In [2]:
# grad_fn可以查看这个variable的反向传播函数
# z是add函数的输出，所以它的反向传播函数是AddBackward
x = V(t.ones(1))
b = V(t.rand(1), requires_grad = True)
w = V(t.rand(1), requires_grad = True)
y = w * x # 等价于y=w.mul(x)
z = y + b # 等价于z=y.add(b)
z.creator # 书上的版本用 z.grad_fn 在此我用creator代替grad_fn

<torch.autograd._functions.basic_ops.Add at 0x7f74cb4485c0>

In [3]:
# 后面的一部分巴拉巴拉的，我的PyTorch版本没有grad_fn进行不了，省略省略

In [4]:
def abs(x):
    if x.data[0]>0: return x
    else: return -x
x = V(t.ones(1),requires_grad=True)
y = abs(x)
y.backward()
x.grad

Variable containing:
 1
[torch.FloatTensor of size 1]

In [5]:
x = V(-1*t.ones(1),requires_grad=True)
y = abs(x)
y.backward()
print(x.grad)

Variable containing:
-1
[torch.FloatTensor of size 1]



In [6]:
def f(x):
    result = 1
    for ii in x:
        if ii.data[0]>0: result=ii*result
    return result
x = V(t.arange(-2,4),requires_grad=True)
y = f(x) # y = x[3]*x[4]*x[5]
y.backward()
x.grad

Variable containing:
 0
 0
 0
 6
 3
 2
[torch.FloatTensor of size 6]

In [7]:
# volatile=True是另外一个很重要的标志，它能够将所有依赖于它的节点全部设置为volatile=True，其优先级比requires_grad=True高。
# volatile=True的节点不会求导，即使requires_grad=True，也不会进行反向传播。
# 对于不需要反向传播的情景（如inference，测试推理时），该参数可实现一定程度的速度提升，并节省显存。
x = V(t.ones(1))
w = V(t.rand(1), requires_grad=True)
y = x * w
# y依赖于w，而w.requires_grad = True
x.requires_grad, w.requires_grad, y.requires_grad

(False, True, True)

In [8]:
x = V(t.ones(1), volatile=True)
w = V(t.rand(1), requires_grad = True)
y = x * w
# y依赖于w和x，但x.volatile = True, w.requires_grad = True
x.requires_grad, w.requires_grad, y.requires_grad

(False, True, False)

In [9]:
# 直接跳到91页底

In [11]:
x = V(t.ones(3), requires_grad=True)
w = V(t.ones(3), requires_grad=True)
y = x * w
# y依赖于w，而w.requires_grad = True
z = y.sum()
x.requires_grad, w.requires_grad, y.requires_grad

(True, True, True)

In [12]:
# 非叶子节点的grad计算完之后自动清空，y.grad是None
z.backward()
(x.grad, w.grad, y.grad)

(Variable containing:
  1
  1
  1
 [torch.FloatTensor of size 3], Variable containing:
  1
  1
  1
 [torch.FloatTensor of size 3], None)

In [13]:
# 法一：使用grad获取中间变量梯度
x = V(t.ones(3), requires_grad=True)
w = V(t.rand(3), requires_grad=True)
y = x * w
z = y.sum()
# z对y的梯度，隐式调用backward()
t.autograd.grad(z, y)

AttributeError: 'module' object has no attribute 'grad'

In [14]:
# 没有grad属性？怕又是因为我的PyTorch版本太旧

In [16]:
# 法二：使用hook
# hook是一个函数，输入是梯度，不应该有返回值
def variable_hook(grad):
    print('y的梯度： \r\n',grad)
    
x = V(t.ones(3), requires_grad=True)
w = V(t.rand(3), requires_grad=True)
y = x * w
# 注册hook
hook_handle = y.register_hook(variable_hook)
z = y.sum()
z.backward()

# 除非每次都要用hook，否则用完之后记得移除hook
hook_handle.remove()

y的梯度： 
 Variable containing:
 1
 1
 1
[torch.FloatTensor of size 3]



In [17]:
# ------------------------------------

In [18]:
x = V(t.arange(0,3), requires_grad=True)
y = x**2 + x*2
z = y.sum()
z.backward() # 从z开始反向传播
x.grad

Variable containing:
 2
 4
 6
[torch.FloatTensor of size 3]

In [19]:
x = V(t.arange(0,3), requires_grad=True)
y = x**2 + x*2
z = y.sum()
y_grad_variables = V(t.Tensor([1,1,1])) # dz/dy
y.backward(y_grad_variables) # 从y开始反向传播
x.grad

RuntimeError: element 0 of gradients tuple is not a Tensor or None

In [20]:
# 。。。告辞.jpg。。。