In [1]:
###################################################################
# I verified the correctness of my formula using rand method.
#==================================================================

In [2]:
########################################################
# d(theta.T @ A @ theta)/d(theta) = (A + A.T ) @ theta
#========================================================

import torch

# create a 2D array
A = torch.rand([4,4], dtype=torch.float64)

# make theta differentiable
theta = torch.rand(4, dtype=torch.float64, requires_grad=True)

quadratic_form = (theta.t() @ A) @ theta # scalar tensor
quadratic_form

tensor(4.0056, dtype=torch.float64, grad_fn=<DotBackward0>)

In [3]:
quadratic_form.backward()
theta.grad # d(quadratic_form)/d(theta)

tensor([3.8649, 2.5111, 2.8573, 3.4208], dtype=torch.float64)

In [4]:
prediction = (A + A.t()) @ theta
prediction

tensor([3.8649, 2.5111, 2.8573, 3.4208], dtype=torch.float64,
       grad_fn=<MvBackward0>)

In [5]:
torch.allclose(theta.grad, prediction, rtol=1e-16, atol=1e-16)

True

In [6]:
# test
def compute_loss(A, theta):
    prediction = (A + A.t()) @ theta 
    quadratic_form = (theta.t() @ A) @ theta # scalar tensor
    quadratic_form.backward()
    with torch.no_grad():
        loss = ((theta.grad - prediction) ** 2).sum()
    return loss.item()

epsilon = 1e-16
for _ in range(10):
    A = torch.rand([4,4], dtype=torch.float64)
    theta = torch.rand(4, dtype=torch.float64, requires_grad=True)
    loss = compute_loss(A, theta)
    if loss < epsilon:
        print(f"correct: loss = {loss}")
    else:
        print(f"incorrect: loss = {loss}")


correct: loss = 3.944304526105059e-31
correct: loss = 6.162975822039155e-32
correct: loss = 2.9582283945787943e-31
correct: loss = 2.465190328815662e-31
correct: loss = 4.930380657631324e-32
correct: loss = 0.0
correct: loss = 6.162975822039155e-32
correct: loss = 1.9721522630525295e-31
correct: loss = 0.0
correct: loss = 4.930380657631324e-32
