In [46]:
import torch
import torch.nn as nn

# Example MLP with input size 16 and output size 16
class MLP(nn.Module):
    def __init__(self, input_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 4)
        self.fc2 = nn.Linear(4, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the MLP
mlp = MLP(input_size=4, output_size=10)
input_tensor = torch.randn(1, 4, requires_grad=True)  # Input tensor with requires_grad=True

# Forward pass
output = mlp(input_tensor)

# Initialize the Jacobian list
jacobian_wrt_weights = []

# Get all the parameters (weights) of the model
params = list(mlp.parameters())
print(sum(p.numel() for p in mlp.parameters()))

# Compute the Jacobian
for i in range(output.size(1)):  # Iterate over each output element
    grad_output = torch.zeros_like(output)
    grad_output[:, i] = 1
    gradients = torch.autograd.grad(
        outputs=output,
        inputs=params,
        grad_outputs=grad_output,
        create_graph=True,
        retain_graph=True,
        only_inputs=True
    )
    flattened_grads = torch.cat([grad.flatten() for grad in gradients])
    jacobian_wrt_weights.append(flattened_grads)

# Stack the Jacobian rows to form the full Jacobian matrix
jacobian_wrt_weights = torch.stack(jacobian_wrt_weights, dim=0)


print(jacobian_wrt_weights)

70
torch.Size([70])
torch.Size([70])
torch.Size([70])
torch.Size([70])
torch.Size([70])
torch.Size([70])
torch.Size([70])
torch.Size([70])
torch.Size([70])
torch.Size([70])
tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00, -2.4993e-01, -4.9217e-02, -5.2218e-02,
         -1.9784e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  1.2275e-01,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  4.2927e-01,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0

Jacobian matrix:
(tensor([[[[0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.]]],


        [[[0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.]]]]), tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]]))
Jacobian for parameter 0:
tensor([[[[0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.]]],


        [[[0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.]]]])
Jacobian for parameter 1:
tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])
