From 6b503545aacd723c3d6b0f57e07af3f32fbc288a Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Thu, 13 Dec 2018 20:33:30 -0500 Subject: [PATCH] jacobian-vector-product --- beginner_source/blitz/autograd_tutorial.py | 51 ++++++++++++++++++++-- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/beginner_source/blitz/autograd_tutorial.py b/beginner_source/blitz/autograd_tutorial.py index 92a1d17221b..4e52518111a 100644 --- a/beginner_source/blitz/autograd_tutorial.py +++ b/beginner_source/blitz/autograd_tutorial.py @@ -108,8 +108,48 @@ # :math:`\frac{\partial o}{\partial x_i}\bigr\rvert_{x_i=1} = \frac{9}{2} = 4.5`. ############################################################### -# You can do many crazy things with autograd! +# Mathematically, if you have a vector valued function :math:`\vec{y}=f(\vec{x})`, +# then the gradient of :math:`\vec{y}` with respect to :math:`\vec{x}` +# is a Jacobian matrix: +# +# .. math:: +# J=\left(\begin{array}{ccc} +# \frac{\partial y_{1}}{\partial x_{1}} & \cdots & \frac{\partial y_{m}}{\partial x_{1}}\\ +# \vdots & \ddots & \vdots\\ +# \frac{\partial y_{1}}{\partial x_{n}} & \cdots & \frac{\partial y_{m}}{\partial x_{n}} +# \end{array}\right) +# +# Generally speaking, ``torch.autograd`` is an engine for computing +# Jacobian-vector product. That is, given any vector +# :math:`v=\left(\begin{array}{cccc} v_{1} & v_{2} & \cdots & v_{m}\end{array}\right)^{T}`, +# compute the product :math:`J\cdot v`. If :math:`v` happens to be +# the gradient of a scalar function :math:`l=g\left(\vec{y}\right)`, +# that is, +# :math:`v=\left(\begin{array}{ccc}\frac{\partial l}{\partial y_{1}} & \cdots & \frac{\partial l}{\partial y_{m}}\end{array}\right)^{T}`, +# then by the chain rule, the Jacobian-vector product would be the +# gradient of :math:`l` with respect to :math:`\vec{x}`: +# +# .. math:: +# J\cdot v=\left(\begin{array}{ccc} +# \frac{\partial y_{1}}{\partial x_{1}} & \cdots & \frac{\partial y_{m}}{\partial x_{1}}\\ +# \vdots & \ddots & \vdots\\ +# \frac{\partial y_{1}}{\partial x_{n}} & \cdots & \frac{\partial y_{m}}{\partial x_{n}} +# \end{array}\right)\left(\begin{array}{c} +# \frac{\partial l}{\partial y_{1}}\\ +# \vdots\\ +# \frac{\partial l}{\partial y_{m}} +# \end{array}\right)=\left(\begin{array}{c} +# \frac{\partial l}{\partial x_{1}}\\ +# \vdots\\ +# \frac{\partial l}{\partial x_{n}} +# \end{array}\right) +# +# This characteristic of Jacobian-vector product makes it very +# convenient to feed external gradients into a model that has +# non-scalar output. +############################################################### +# Now let's take a look at an example of Jacobian-vector product: x = torch.randn(3, requires_grad=True) @@ -120,9 +160,12 @@ print(y) ############################################################### -# -gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float) -y.backward(gradients) +# Now in this case ``y`` is no longer a scalar. ``torch.autograd`` +# could not compute the full Jacobian directly, but if we just +# want the Jacobian-vector product, simply pass the vector to +# ``backward`` as argument: +v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float) +y.backward(v) print(x.grad)