In [2]:
import torch

# Make a grid

In [None]:
N_side = 8

x_vec = torch.linspace(0, 1, N_side)

# xx colum is all zeros
xx, yy  = torch.meshgrid(x_vec, x_vec, indexing = 'xy')

# flatten and concat
x_full = torch.cat([xx.reshape(-1, 1), yy.reshape(-1, 1)], dim = 1)
print(x_full.shape)

torch.Size([64, 2])


# Understand order of outputs:

Stregtegy: All inputs/outputs have different dimensionality so that we can differentite them in the shape of the output.

## Case 1: 

- N inputs/outputs: 5
- (x) input dimensionality: 2D
- (y) output dimensionality: 3D 

In [31]:
x_N5 = x_full[[3, 18, 11, 60, 61], :]

def f_2dto3d(x):
    # Define the function f(x)
    y1 = x[:, 0] ** 2 + x[:, 1] ** 2
    # print(y1.shape)
    y2 = torch.sin(x[:, 0]) + torch.cos(x[:, 1])
    y3 = x[:, 0] * x[:, 1]
    return torch.concatenate([y1.unsqueeze(-1), y2.unsqueeze(-1), y3.unsqueeze(-1)], dim = -1)

autograd_jac = torch.autograd.functional.jacobian(f_2dto3d, x_N5)
autograd_jac.shape

torch.Size([5, 3, 5, 2])

- The output is [N, output_dim, N, input_dim].
- From the [documentation](https://pytorch.org/docs/stable/generated/torch.autograd.functional.jacobian.html) "Jacobian[i][j] will contain the Jacobian of the **ith output** and **jth input** and will have as size the concatenation of the sizes of the corresponding output and the corresponding input"
- rows i.e. [i] are the output dims, as in the proper Jacobian definition.

In [32]:
print(torch.func.jacfwd(f_2dto3d)(x_N5).shape)
print(torch.func.jacrev(f_2dto3d)(x_N5).shape)

torch.Size([5, 3, 5, 2])
torch.Size([5, 3, 5, 2])


## Case 2: 

- N inputs/outputs: 5
- (x) input dimensionality: 2D
- (y) output dimensionality: 3x4D 

In [33]:
x_N5 = x_full[[3, 18, 11, 60, 61], :]

def f_2dto3x4d(x):
    # Define the function f(x)
    y1 = x[:, 0] ** 2 + x[:, 1] ** 2
    # print(y1.shape)
    y2 = torch.sin(x[:, 0]) + torch.cos(x[:, 1])
    y3 = x[:, 0] * x[:, 1]
    out1 = torch.concatenate([y1.unsqueeze(-1), y2.unsqueeze(-1), y3.unsqueeze(-1)], dim = -1).unsqueeze(-1)
    return torch.concatenate([out1, 2 * out1, 3 * out1, 4 * out1], dim = -1)

print("Shape of output:")
print(f_2dto3x4d(x_N5).shape)

autograd_jac = torch.autograd.functional.jacobian(f_2dto3x4d, x_N5)
autograd_jac.shape

Shape of output:
torch.Size([5, 3, 4])


torch.Size([5, 3, 4, 5, 2])

- The output is [N, output_dim1, output_dim2, N, input_dim]

In [34]:
print(torch.func.jacfwd(f_2dto3x4d)(x_N5).shape)
print(torch.func.jacrev(f_2dto3x4d)(x_N5).shape)

torch.Size([5, 3, 4, 5, 2])
torch.Size([5, 3, 4, 5, 2])


# Case 3:

This has is most like ours because our input dim (of A) is N x 2 x 2 

- N inputs/outputs: 5
- (x) input dimensionality: 2x4D
- (y) output dimensionality: 3D 

In [37]:
x_N5 = x_full[[3, 18, 11, 60, 61], :]
x_N5_2x4 = torch.cat([x_N5.unsqueeze(-1), x_N5.unsqueeze(-1), x_N5.unsqueeze(-1), x_N5.unsqueeze(-1)], dim = -1)
x_N5_2x4.shape

torch.Size([5, 2, 4])

In [38]:
def f_2x4dto3d(x):
    # Define the function f(x)
    y1 = x[:, 0].mean(-1) ** 2 + x[:, 1].mean(-1) ** 2
    # print(y1.shape)
    y2 = torch.sin(x[:, 0].mean(-1)) + torch.cos(x[:, 1].mean(-1))
    y3 = x[:, 0].mean(-1) * x[:, 1].mean(-1)
    return torch.concatenate([y1.unsqueeze(-1), y2.unsqueeze(-1), y3.unsqueeze(-1)], dim = -1)

print(f_2dto3d(x_N5_2x4).shape)

autograd_jac = torch.autograd.functional.jacobian(f_2x4dto3d, x_N5_2x4)
autograd_jac.shape

torch.Size([5, 4, 3])


torch.Size([5, 3, 5, 2, 4])

- The output is [N, output_dim1, N, input_dim1, input_dim2]
- So autograd returns all output dims first, followed by N, and then all input dims.

In [41]:
print(torch.func.jacfwd(f_2x4dto3d)(x_N5_2x4).shape)
print(torch.func.jacrev(f_2x4dto3d)(x_N5_2x4).shape)

torch.Size([5, 3, 5, 2, 4])
torch.Size([5, 3, 5, 2, 4])


# Check values

In [None]:
def f(x):
    # Define the function f(x), where x is a tensor
    y1 = x[:, 0] ** 2 + x[:, 1] ** 2
    # print(y1.shape)
    y2 = torch.sin(x[:, 0]) + torch.cos(x[:, 1])
    return torch.concatenate([y1.unsqueeze(-1), y2.unsqueeze(-1)], dim = -1)

def analytical_jacobian_f(x):
    # Define the Jacobian of f(x)
    df1_dx = 2 * x[:, 0]
    df1_dy = 2 * x[:, 1]
    df2_dx = torch.cos(x[:, 0])
    df2_dy = - torch.sin(x[:, 1])
    # This is the correct concat
    row1 = torch.stack([df1_dx, df1_dy], dim = -1)
    row2 = torch.stack([df2_dx, df2_dy], dim = -1)
    # unqueeze at 1 because 0 is batch dim
    return torch.cat([row1.unsqueeze(1), row2.unsqueeze(1)], dim = 1)

torch.func.vmap(torch.func.jacfwd(f))(x_N5)

In [19]:
def f_single(x_single):
    # Define the function f(x), where x is a tensor
    y1 = x_single[0] ** 2 + x_single[1] ** 2
    # print(y1.shape)
    y2 = torch.sin(x_single[0]) + torch.cos(x_single[1])
    return torch.concatenate([y1.unsqueeze(-1), y2.unsqueeze(-1)], dim = -1)

In [29]:
# extract one value for single value formulation
x_N5_single = x_N5[3, :]
# pass through
f_single(x_N5_single)

torch.func.vmap(torch.func.jacfwd(f_single))(x_N5)

# torch.func.vmap(torch.func.jacfwd(f_single))(x_N5) == analytical_jacobian_f(x_N5)

tensor([[[ 0.8571,  0.0000],
         [ 0.9096,  0.0000]],

        [[ 0.5714,  0.5714],
         [ 0.9595, -0.2818]],

        [[ 0.8571,  0.2857],
         [ 0.9096, -0.1424]],

        [[ 1.1429,  2.0000],
         [ 0.8411, -0.8415]],

        [[ 1.4286,  2.0000],
         [ 0.7556, -0.8415]]])

In [26]:
analytical_jacobian_f(x_N5)

tensor([[[ 0.8571,  0.0000],
         [ 0.9096, -0.0000]],

        [[ 0.5714,  0.5714],
         [ 0.9595, -0.2818]],

        [[ 0.8571,  0.2857],
         [ 0.9096, -0.1424]],

        [[ 1.1429,  2.0000],
         [ 0.8411, -0.8415]],

        [[ 1.4286,  2.0000],
         [ 0.7556, -0.8415]]])

In [104]:
# Select 5 points
x_N5 = x_full[[3, 18, 11, 60, 61], :]
x_N5

tensor([[0.4286, 0.0000],
        [0.2857, 0.2857],
        [0.4286, 0.1429],
        [0.5714, 1.0000],
        [0.7143, 1.0000]])

In [106]:
analytical_jacobian_f(x_N5)

tensor([[[ 0.8571,  0.0000],
         [ 0.9096, -0.0000]],

        [[ 0.5714,  0.5714],
         [ 0.9595, -0.2818]],

        [[ 0.8571,  0.2857],
         [ 0.9096, -0.1424]],

        [[ 1.1429,  2.0000],
         [ 0.8411, -0.8415]],

        [[ 1.4286,  2.0000],
         [ 0.7556, -0.8415]]])

In [108]:
jac_autograd = torch.autograd.functional.jacobian(f, x_N5)
torch.einsum("bobi -> boi", jac_autograd)

tensor([[[ 0.8571,  0.0000],
         [ 0.9096,  0.0000]],

        [[ 0.5714,  0.5714],
         [ 0.9595, -0.2818]],

        [[ 0.8571,  0.2857],
         [ 0.9096, -0.1424]],

        [[ 1.1429,  2.0000],
         [ 0.8411, -0.8415]],

        [[ 1.4286,  2.0000],
         [ 0.7556, -0.8415]]])

In [23]:
from torch.func import jacrev, jacfwd, vmap

In [110]:
jacfwd(f)(x_N5).shape

torch.Size([5, 2, 5, 2])

# Jacobian

Row 1: f1, row2: f2
"Jacobian[i][j] will contain the Jacobian of the ith output and jth input"

## Learnings:
- **torch.autograd.functional.jacobian**: 
    - default: create_graph = False! We probably need to turn this on?!
    - function is quite slow
    - [N, output_dim, N, input_dim(s)]
    - vectorize does not work
- **torch.func.jacref**
    - the function passed into jacref is meant to handle a single input.
    - the vmap formulation - which saves a lot of times - is designed to take in a function that takes in one item at the time. 

In [60]:
def exp_reducer(x):
    return x.exp().sum(dim = 1)

inputs = torch.rand(3, 2)
print(exp_reducer(inputs)) # only f1 (i.e. y1) 1D output

# torch.autograd.functional.jacobian(exp_reducer, inputs)[0].shape
print(torch.autograd.functional.jacobian(exp_reducer, inputs).shape)
jac = torch.autograd.functional.jacobian(exp_reducer, inputs)

# Only one row expected f1/x1 f1/x2
jac[:, :, :]

torch.einsum("bbi -> bi", jac)

tensor([3.3938, 2.5333, 2.2979])
torch.Size([3, 3, 2])


tensor([[1.6762, 1.7176],
        [1.0402, 1.4931],
        [1.1861, 1.1117]])