1、torch.autograd.functional.jacobian(func,inputs,...)，算给定一个函数的雅可比

In [11]:
import torch
from torch.autograd.functional import jacobian
def exp_reducer(x):
    return x.exp().sum(dim=1)
inputs=torch.rand(2,2)
jacobian(exp_reducer,inputs)

tensor([[[2.1630, 1.1608],
         [0.0000, 0.0000]],

        [[0.0000, 0.0000],
         [2.0105, 1.4772]]])

In [9]:
jacobian(exp_reducer,inputs,create_graph=True)

tensor([[[1.0200, 2.3459],
         [0.0000, 0.0000]],

        [[0.0000, 0.0000],
         [2.5761, 2.2953]]], grad_fn=<ViewBackward0>)

In [14]:
a=torch.randn(3)
a

tensor([1.3250, 0.2230, 0.2210])

In [15]:
def func(x):
    return a+x
x=torch.randn(3)
x

tensor([-0.4069, -0.5715, -0.7206])

In [16]:
jacobian(func,x)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

2、在PyTorch中如何去算对向量的导数

In [17]:
# 1、用backward算
a

tensor([1.3250, 0.2230, 0.2210])

In [18]:
x=torch.randn(3,requires_grad=True)

In [20]:
y=func(x)
y

tensor([ 1.4357, -1.7285,  0.4050], grad_fn=<AddBackward0>)

In [21]:
y.backward(torch.ones_like(y)) # 算出x点处的梯度

In [22]:
x.grad

tensor([1., 1., 1.])

In [23]:
# 2、用雅可比算
jacobian(func,x)

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [24]:
torch.ones_like(y) @ jacobian(func,x) #@表示矩阵相乘

tensor([1., 1., 1.])

3、如何去算对矩阵的导数（梯度运算）

In [26]:
# 1、用backward算
a=torch.randn(2,3,requires_grad=True)
b=torch.randn(3,2,requires_grad=True)

In [27]:
a

tensor([[ 0.5412,  0.8503, -1.2114],
        [ 0.2175,  0.2749,  1.3860]], requires_grad=True)

In [28]:
b

tensor([[-1.1325, -0.2740],
        [-1.1552,  0.0024],
        [-1.2445, -0.0039]], requires_grad=True)

In [29]:
a @ b

tensor([[-0.0877, -0.1415],
        [-2.2887, -0.0643]], grad_fn=<MmBackward0>)

In [30]:
y = a @ b
y.backward(torch.ones_like(y))

In [31]:
a.grad

tensor([[-1.4065, -1.1528, -1.2484],
        [-1.4065, -1.1528, -1.2484]])

In [32]:
b.grad

tensor([[0.7588, 0.7588],
        [1.1252, 1.1252],
        [0.1745, 0.1745]])

In [33]:
# 2、用雅可比算
def func(a):
    return a@b
a

tensor([[ 0.5412,  0.8503, -1.2114],
        [ 0.2175,  0.2749,  1.3860]], requires_grad=True)

In [34]:
func(a)

tensor([[-0.0877, -0.1415],
        [-2.2887, -0.0643]], grad_fn=<MmBackward0>)

In [35]:
func(a[0])

tensor([-0.0877, -0.1415], grad_fn=<SqueezeBackward3>)

In [38]:
# a的第一行的梯度
torch.ones_like(func(a[0])) @ jacobian(func,a[0])

tensor([-1.4065, -1.1528, -1.2484])

In [39]:
a.grad

tensor([[-1.4065, -1.1528, -1.2484],
        [-1.4065, -1.1528, -1.2484]])

In [40]:
# a的第二行的梯度
torch.ones_like(func(a[1])) @ jacobian(func,a[1])

tensor([-1.4065, -1.1528, -1.2484])

In [41]:
a

tensor([[ 0.5412,  0.8503, -1.2114],
        [ 0.2175,  0.2749,  1.3860]], requires_grad=True)

In [42]:
b

tensor([[-1.1325, -0.2740],
        [-1.1552,  0.0024],
        [-1.2445, -0.0039]], requires_grad=True)

In [43]:
a@b

tensor([[-0.0877, -0.1415],
        [-2.2887, -0.0643]], grad_fn=<MmBackward0>)

In [44]:
def func(b):
    return a@b

In [45]:
# b的第一列的梯度
torch.ones_like(func(b[:,0])) @ jacobian(func,b[:,0])

tensor([0.7588, 1.1252, 0.1745])

In [47]:
b.grad

tensor([[0.7588, 0.7588],
        [1.1252, 1.1252],
        [0.1745, 0.1745]])

In [48]:
# b的第二列的梯度
torch.ones_like(func(b[:,1])) @ jacobian(func,b[:,1])

tensor([0.7588, 1.1252, 0.1745])

以上为把矩阵的梯度变为对向量的梯度去算