In [1]:
###################################################################
# I verified the correctness of my formula using a random method.
#==================================================================

In [2]:
###############################################
# Kronecker(A, B) vec(C) = vec(B C A.T) 
#===============================================

import numpy as np

A = np.random.random([4,4]).astype(np.float64)
B = np.random.random([4,4]).astype(np.float64)
C = np.random.random([4,4]).astype(np.float64)

vec1 = np.kron(A, B) @ C.flatten('F')
vec2 = (B @ C @ A.T).flatten('F')
loss = ((vec1 - vec2)**2).sum()

vec1, vec2, loss, np.allclose(vec1, vec2)

(array([0.24583714, 1.74250629, 1.57265487, 1.02410235, 0.59601792,
        3.58239405, 2.95536781, 2.0422207 , 0.51720152, 2.98875682,
        2.42672546, 1.67909167, 0.40372286, 2.59125342, 2.27852267,
        1.50320504]),
 array([0.24583714, 1.74250629, 1.57265487, 1.02410235, 0.59601792,
        3.58239405, 2.95536781, 2.0422207 , 0.51720152, 2.98875682,
        2.42672546, 1.67909167, 0.40372286, 2.59125342, 2.27852267,
        1.50320504]),
 1.0877652325899108e-30,
 True)

In [3]:
# test
def compute_loss(A, B, C):
    vec1 = np.kron(A, B) @ C.flatten('F')
    vec2 = (B @ C @ A.T).flatten('F')
    loss = ((vec1 - vec2) ** 2).sum()
    return loss

epsilon = 1e-16
for _ in range(10):
    A = np.random.random([4,4]).astype(np.float64)
    B = np.random.random([4,4]).astype(np.float64)
    C = np.random.random([4,4]).astype(np.float64)
    loss = compute_loss(A, B, C)
    if loss < epsilon:
        print(f"correct: loss = {loss}")
    else:
        print(f"incorrect: loss = {loss}")
    

correct: loss = 1.0168910106364605e-31
correct: loss = 1.4791141972893971e-31
correct: loss = 6.162975822039155e-31
correct: loss = 7.888609052210118e-31
correct: loss = 7.395570986446986e-31
correct: loss = 1.232595164407831e-30
correct: loss = 1.3805065841367707e-30
correct: loss = 8.874685183736383e-31
correct: loss = 9.860761315262648e-31
correct: loss = 5.546678239835239e-31


In [4]:
########################################################
# d(theta.T @ A @ theta)/d(theta) = (A + A.T ) @ theta
#========================================================

import torch

# create a symmentric 2D array
A = torch.rand([4,4], dtype=torch.float64)

# make theta differentiable
theta = torch.rand(4, dtype=torch.float64, requires_grad=True)

quadratic_form = (theta.t() @ A) @ theta # scalar tensor
quadratic_form

tensor(0.2510, dtype=torch.float64, grad_fn=<DotBackward0>)

In [5]:
quadratic_form.backward()
theta.grad # d(quadratic_form)/d(theta)

tensor([0.7090, 0.3917, 0.7746, 0.7281], dtype=torch.float64)

In [6]:
prediction = (A + A.t()) @ theta
prediction

tensor([0.7090, 0.3917, 0.7746, 0.7281], dtype=torch.float64,
       grad_fn=<MvBackward0>)

In [7]:
torch.allclose(theta.grad, prediction, rtol=1e-16, atol=1e-16)

True

In [8]:
# test
def compute_loss(A, theta):
    prediction = (A + A.t()) @ theta 
    quadratic_form = (theta.t() @ A) @ theta # scalar tensor
    quadratic_form.backward()
    with torch.no_grad():
        loss = ((theta.grad - prediction) ** 2).sum()
    return loss.item()

epsilon = 1e-16
for _ in range(10):
    A = torch.rand([4,4], dtype=torch.float64)
    theta = torch.rand(4, dtype=torch.float64, requires_grad=True)
    loss = compute_loss(A, theta)
    if loss < epsilon:
        print(f"correct: loss = {loss}")
    else:
        print(f"incorrect: loss = {loss}")

correct: loss = 0.0
correct: loss = 1.9721522630525295e-31
correct: loss = 0.0
correct: loss = 0.0
correct: loss = 9.860761315262648e-32
correct: loss = 1.9721522630525295e-31
correct: loss = 4.930380657631324e-32
correct: loss = 4.930380657631324e-32
correct: loss = 2.9582283945787943e-31
correct: loss = 4.930380657631324e-32
