# Parametrized linear vector-valued function

Let $f: \mathbb{R^n} \times \mathbb{R}^{p} \rightarrow \mathbb{R}^m$ be a parametraized vector field given by
$$
f(x; \theta) = f(x; A, b) = x^T A + b^T, \quad A \in \mathbb{R}^{n \times m}, \quad b \in \mathbb{R}^m, \quad p = n(m + 1).
$$
Its partial derivatives are
$$
\frac{\partial}{\partial a_{ij}}f(x, \theta) = x_i, \quad 
\frac{\partial}{\partial b_j}f(x, \theta) = 1 .
$$

In [52]:
import torch

## backward method

1. no batch

In [53]:
def f(x, a, b):
    return torch.matmul(a, x) + b

In [54]:
n = 2
m = 3

x = torch.randn(n, requires_grad=False, dtype=torch.float)
a = torch.randn((m, n), requires_grad=True, dtype=torch.float)
b = torch.randn(m, requires_grad=True, dtype=torch.float)
print(x)

tensor([ 0.1635, -1.1557])


In [55]:
y = f(x, a, b)
basis_vectors = torch.eye(m)
v = basis_vectors[0]
y.backward(v, retain_graph=True)
    
print(a.grad.shape)
print(b.grad.shape)

a.grad.zero_()
b.grad.zero_()

torch.Size([3, 2])
torch.Size([3])


tensor([0., 0., 0.])

In [56]:
# evaluate f
y = f(x, a, b)

# basis vectors
basis_vectors = torch.eye(m)

# preallocate jacobian matrix
p = n * (m+1)
jac_y = torch.empty(m, p)

for i, v in enumerate(basis_vectors):
    
    # use vector-Jacobian product
    y.backward(v, retain_graph=True)
    
    # save gradients
    print(a.grad)
    print(b.grad)
    #jac_y[i, 1] = x2.grad
    #jac_y[i, 2] = x3.grad
    
    # reset gradients
    a.grad.zero_()
    b.grad.zero_()

tensor([[ 0.1635, -1.1557],
        [ 0.0000,  0.0000],
        [ 0.0000,  0.0000]])
tensor([1., 0., 0.])
tensor([[ 0.0000,  0.0000],
        [ 0.1635, -1.1557],
        [ 0.0000,  0.0000]])
tensor([0., 1., 0.])
tensor([[ 0.0000,  0.0000],
        [ 0.0000,  0.0000],
        [ 0.1635, -1.1557]])
tensor([0., 0., 1.])


In [57]:
jac_y.shape

torch.Size([3, 8])

In [11]:
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import grad
from torch.autograd.functional import jacobian

In [8]:
jacobian(model, (x,))

(tensor([[-0.1968, -0.0537],
         [ 0.2123,  0.5456],
         [-0.4865,  0.0271]]),)

In [None]:
# preallocate Jacobian matrix with respect to the coefficients
J = torch.empty(batch_size, 9)

for i in range(batch_size):
    # use vector-Jacobian product
    v = torch.eye(batch_size)[i].reshape(batch_size, 1)
    f.backward(v, retain_graph=True)
    
    # save gradients
    J[i, 0] = a.grad
    J[i, 1] = b.grad
    
    # reset gradients
    a.grad.zero_()
    b.grad.zero_()

# show Jacobian
print(J)

## grad() method

## jacobian() method

In [58]:
from torch.autograd.functional import jacobian

In [59]:
jac_y_a, jac_y_b = jacobian(lambda a, b : f(x, a, b), (a, b))

In [60]:
jac_y_a.shape, jac_y_b.shape

(torch.Size([3, 3, 2]), torch.Size([3, 3]))

In [65]:
#x, jac_y_a, jac_y_b