In [52]:
###################################################################
# I verified the correctness of my formula using rand method.
#==================================================================

$$\frac{{\partial(\theta^T A \theta)}}{{\partial\theta}} = (A + A^T) \theta$$
where $$ \theta \in \R^d, A \in \mathbb{R}^{d\times d} $$


In [53]:
########################################################
# d(theta.T @ A @ theta)/d(theta) = (A + A.T ) @ theta
#========================================================

import torch

# create a 2D array
A = torch.rand([4,4], dtype=torch.float64)

# make a function of theta differentiable
theta = torch.rand(4, dtype=torch.float64, requires_grad=True)

t = (theta.t() @ A) @ theta # scalar tensor (0-D tensor)
t

tensor(0.9781, dtype=torch.float64, grad_fn=<DotBackward0>)

In [54]:
t.backward()
left_side = theta.grad # d(v)/d(theta)
left_side

tensor([1.4762, 1.4292, 1.4856, 1.0659], dtype=torch.float64)

In [55]:
right_side = (A + A.t()) @ theta
right_side

tensor([1.4762, 1.4292, 1.4856, 1.0659], dtype=torch.float64,
       grad_fn=<MvBackward0>)

In [56]:
torch.allclose(left_side, right_side, rtol=1e-16, atol=1e-16)

True

In [57]:
# test
def compute_loss(A, theta):
    right_side = (A + A.t()) @ theta 
    t = (theta.t() @ A) @ theta # scalar tensor
    t.backward()
    left_side = theta.grad
    with torch.no_grad():
        loss = ((left_side - right_side) ** 2).sum()
    return loss.item()

epsilon = 1e-16
for _ in range(10):
    A = torch.rand([4,4], dtype=torch.float64)
    theta = torch.rand(4, dtype=torch.float64, requires_grad=True)
    loss = compute_loss(A, theta)
    if loss < epsilon:
        print(f"correct: loss = {loss}")
    else:
        print(f"incorrect: loss = {loss}")


correct: loss = 0.0
correct: loss = 2.9582283945787943e-31
correct: loss = 2.465190328815662e-31
correct: loss = 2.465190328815662e-31
correct: loss = 0.0
correct: loss = 4.930380657631324e-32
correct: loss = 1.9721522630525295e-31
correct: loss = 1.9721522630525295e-31
correct: loss = 1.9721522630525295e-31
correct: loss = 1.9721522630525295e-31


In [58]:
#####################################################
# what if A is symmentric ?
######################################################

Let matrix A be symmentric.

$$\frac{{\partial(\theta^T A \theta)}}{{\partial\theta}} = 2 A \theta$$
where $$ \theta \in \R^d, A \in \mathbb{R}^{d\times d} $$

In [59]:
import torch

# create a 2D array
A = torch.rand([4,4], dtype=torch.float64)
A = (A + A.t())/2.0 # make 2D array A symmentric

# make a function of theta differentiable
theta = torch.rand(4, dtype=torch.float64, requires_grad=True)

quadratic_form = (theta.t() @ A) @ theta # scalar tensor
quadratic_form

tensor(1.1511, dtype=torch.float64, grad_fn=<DotBackward0>)

In [60]:
quadratic_form.backward()
right_side = theta.grad # d(quadratic_form)/d(theta)
right_side

tensor([1.6843, 2.6387, 1.4087, 0.9093], dtype=torch.float64)

In [61]:
left_side = (A + A.t()) @ theta
left_side

tensor([1.6843, 2.6387, 1.4087, 0.9093], dtype=torch.float64,
       grad_fn=<MvBackward0>)

In [62]:
torch.allclose(left_side, right_side, rtol=1e-16, atol=1e-16)

True