In [1]:
###################################################################
# I verified the correctness of my formula using rand method.
#==================================================================

$$\frac{{\partial(\theta^T A \theta)}}{{\partial\theta}} = (A + A^T) \theta$$
where $$ \theta \in \mathbb{R}^d, A \in \mathbb{R}^{d\times d} $$


In [2]:
########################################################
# d(theta.T @ A @ theta)/d(theta) = (A + A.T ) @ theta
#========================================================

import torch

# create a 2D array
A = torch.rand([4,4], dtype=torch.float64)

# make a function of theta differentiable
theta = torch.rand(4, dtype=torch.float64, requires_grad=True)

q = (theta.t() @ A) @ theta # scalar tensor (0-D tensor)
q

tensor(3.0654, dtype=torch.float64, grad_fn=<DotBackward0>)

In [3]:
q.backward()
left_side = theta.grad # dq/d(theta)
left_side

tensor([2.5472, 3.6170, 2.3156, 2.1638], dtype=torch.float64)

In [4]:
right_side = (A + A.t()) @ theta
right_side

tensor([2.5472, 3.6170, 2.3156, 2.1638], dtype=torch.float64,
       grad_fn=<MvBackward0>)

In [5]:
torch.allclose(left_side, right_side, rtol=1e-16, atol=1e-16)

False

In [6]:
# test
def compute_loss(A, theta):
    right_side = (A + A.t()) @ theta 
    q = (theta.t() @ A) @ theta # scalar tensor
    q.backward()
    left_side = theta.grad
    with torch.no_grad():
        loss = ((left_side - right_side) ** 2).sum()
    return loss.item()

epsilon = 1e-16
for _ in range(10):
    A = torch.rand([4,4], dtype=torch.float64)
    theta = torch.rand(4, dtype=torch.float64, requires_grad=True)
    loss = compute_loss(A, theta)
    if loss < epsilon:
        print(f"correct: loss = {loss}")
    else:
        print(f"incorrect: loss = {loss}")


correct: loss = 4.930380657631324e-32
correct: loss = 1.232595164407831e-32
correct: loss = 4.930380657631324e-32
correct: loss = 5.9164567891575885e-31
correct: loss = 4.930380657631324e-32
correct: loss = 0.0
correct: loss = 2.7733391199176196e-32
correct: loss = 2.465190328815662e-31
correct: loss = 2.465190328815662e-31
correct: loss = 1.1093356479670479e-31


In [7]:
#####################################################
# what if A is symmentric ?
######################################################

Let matrix A be symmentric.

$$\frac{{\partial(\theta^T A \theta)}}{{\partial\theta}} = 2 A \theta$$
where $$ \theta \in \mathbb{R}^d, A \in \mathbb{R}^{d\times d} $$

In [8]:
import torch

# create a 2D array
A = torch.rand([4,4], dtype=torch.float64)
A = (A + A.t())/2.0 # make 2D array A symmentric

# make a function of theta differentiable
theta = torch.rand(4, dtype=torch.float64, requires_grad=True)

quadratic_form = (theta.t() @ A) @ theta # scalar tensor
quadratic_form

tensor(0.5441, dtype=torch.float64, grad_fn=<DotBackward0>)

In [9]:
quadratic_form.backward()
right_side = theta.grad # d(quadratic_form)/d(theta)
right_side

tensor([0.8044, 1.1669, 0.9762, 1.0060], dtype=torch.float64)

In [10]:
left_side = 2 * A @ theta
left_side

tensor([0.8044, 1.1669, 0.9762, 1.0060], dtype=torch.float64,
       grad_fn=<MvBackward0>)

In [11]:
torch.allclose(left_side, right_side, rtol=1e-16, atol=1e-16)

True