### Imports

In [214]:
import numpy as np
import pandas as pd
import pytensor
import pytensor.tensor as pt
import matplotlib.pyplot as plt
from pytensor.compile.builders import OpFromGraph
from time import perf_counter
from collections import defaultdict
import pymc_extras as pmx
from pymc_extras.statespace import structural as sts
import pytensor
from pytensor.graph.basic import explicit_graph_inputs
import numpy as np

### Generate a random dataset

In [None]:
mod = (
    sts.LevelTrendComponent(order=2, innovations_order=[0, 1], name='level') +
    sts.AutoregressiveComponent(order=1, name='ar') +
    sts.MeasurementError(name='obs_error')
).build(verbose = False)

param_values = {
    'initial_level': np.array([10, 0.1]),
    'sigma_level': np.array([1e-2]),
    'params_ar': np.array([0.95]),
    'sigma_ar': np.array(1e-2),
    'sigma_obs_error': np.array(1e-2),
}

data_fn = pmx.statespace.compile_statespace(mod, steps=100)
hidden_state_data, obs_data = data_fn(**param_values)

matrices = mod._unpack_statespace_with_placeholders()

matrix_fn = pytensor.function(list(explicit_graph_inputs(matrices)),
                                   matrices)
a0, P0, c, d, T, Z, R, H, Q = matrix_fn(**param_values, initial_state_cov=np.eye(mod.k_states))

<function compile_statespace.<locals>.f at 0x000001820DC942C0>


### Symbolic variable

In [218]:
# Paramètres symboliques
A_sym = pt.matrix("A")              # (n, n)
H_sym = pt.matrix("H")              # (n, n)
Q_sym = pt.matrix("Q")              # (n, n)
R_sym = pt.matrix("R")              # (n, n)
T_sym = pt.matrix("T")              # (n, n)
Z_sym = pt.matrix("Z")              # (n, n)
y_sym = pt.matrix("y")              # (T, n) : observations

a0_sym = pt.vector("a0")            # (n,)        
P0_sym = pt.matrix("P0")            # (n, n)

data_sym = pt.matrix('data_sym')    # [T, obs_dim]

## Kalman filter with classic gradient

### The Loss

The Negative Log-Likelihood loss os given in the paper as the following expression :

$$
L_{NLL} = \sum l_{n|n} + l_{n|n-1}
$$

Where :

$$
\begin{align}
&l_{n|n} = 0 \\
&l_{n|n-1} = log det(F) + v_n^TFv_n
\end{align}
$$

In [219]:
def predict(a, P, T, Q):
    a_hat = T @ a                               # x_n|n-1
    P_hat = T @ P @ T.T + Q                     # P_n|n-1
    return a_hat, P_hat

def update(y, a, P, Z, H):
    v = y - Z.dot(a)                            # z_n
    PZT = P.dot(Z.T)                            

    F = Z.dot(PZT) + H                          # S_n
    F_inv = pt.linalg.inv(F)                    # S_n^(-1)
    K = PZT.dot(F_inv)                          # K_n

    I_KZ = pt.eye(a.shape[0]) - K.dot(Z)
    a_filtered = a + K.dot(v)                   # x_n|n
    P_filtered = I_KZ @ P                       # P_n|n

    inner_term = v.T @ F_inv @ v
    _, F_logdet = pt.linalg.slogdet(F)          # log det S_n
    ll = (F_logdet + inner_term).ravel()[0]     # Loss

    return [a_filtered, P_filtered, Z.dot(a), F, ll]

def kalman_step(y, a, P, T, Z, H, Q):
    a_filtered, P_filtered, obs_mu, obs_cov, ll = update(y=y, a=a, P=P, Z=Z, H=H)
    a_hat, P_hat = predict(a=a_filtered, P=P_filtered, T=T, Q=Q)
    return [a_filtered, a_hat, obs_mu, P_filtered, P_hat, obs_cov, ll]


outputs_info = [None, a0_sym, None, None, P0_sym, None, None]

results_seq, updates = pytensor.scan(
    kalman_step,
    sequences=[data_sym],
    outputs_info=outputs_info,
    non_sequences=[T_sym, Z_sym, H_sym, Q_sym],
    strict=False,
)

# --- Loss ---
a_upd_seq, a_pred_seq, y_hat_seq, P_upd_seq, P_pred_seq, obs_cov, ll_seq = results_seq
loss = pt.sum(ll_seq)

## Custom gradient

### Gradient with respect to **$a_{n-1|n-1}$**

From the article we have :

$$
\begin{align}
&\frac{dL}{da_{n-1|n-1}} = T_n^T \frac{dL}{da_{n|n-1}} 
+ \frac{dl_{n-1|n-1}}{da_{n-1|n-1}} \quad &\text{(equation 22)} \\
&\frac{dl_{n|n}}{da_{n|n}} = 0 \quad &\text{(equation 28)}
\end{align}
$$

Givent this two equations, we now have :
$$
\begin{align}
&\frac{dL}{da_{n-1|n-1}} = T_n^T \frac{dL}{da_{n|n-1}} 
\end{align}
$$

### Gradient with respect to **$P_{n-1|n-1}$**

From the article we have :

$$
\begin{align}
&\frac{dL}{dP_{n-1|n-1}} = T_n^T \frac{dL}{dP_{n|n-1}} T_n
+ \frac{dl_{n-1|n-1}}{dP_{n-1|n-1}} \quad &\text{(equation 23)} \\
&\frac{dl_{n|n}}{dP_{n|n}} = 0 \quad &\text{(equation 28)}
\end{align}
$$

Givent this two equations, we now have :
$$
\begin{align}
&\frac{dL}{dP_{n-1|n-1}} = T_n^T \frac{dL}{dP_{n|n-1}} T_n
\end{align}
$$


### Gradient with respect to **$a_{n|n-1}$**

From the article we have :

$$
\begin{align}
&\frac{dL}{da_{n|n-1}} = (I - K_n Z_n)^T \frac{dL}{da_{n|n}} + \frac{dl_{n|n-1}}{da_{n|n-1}} \quad &\text{(equation 20)} \\
&\frac{dl_{n|n-1}}{da_{n|n-1}} = -2 Z_n^{T}F_n^{-1} v_n \quad &\text{(equation 30)} \\
&\frac{dL}{da_{n|n}} = T_n^T \frac{dL}{da_{n+1|n}} \quad &\text{see gradient with respect to} \quad a_{n|n}
\end{align}
$$

Givent this two equations, we now have :
$$
\begin{align}
&\frac{dL}{da_{n|n-1}} = (I - K_n Z_n)^T T_n^T \frac{dL}{da_{n+1|n}} - 2 Z_n^{T}F^{-1} v_n
\end{align}
$$


In [132]:
def grad_a_hat(inp, out, out_grad):
    y, a, P, T, Z, H, Q = inp
    a_hat_grad, _, _ = out_grad

    v = y - Z.dot(a)                          

    PZT = P.dot(Z.T)
    F = Z.dot(PZT) + H                          
    F_inv = pt.linalg.inv(F)
    
    K = PZT.dot(F_inv)                          
    I_KZ = pt.eye(a.shape[0]) - K.dot(Z)

    grad_a_pred = I_KZ.T @ T.T @ a_hat_grad - 2 * Z.T @ F_inv @ v

    return grad_a_pred

### Gradient with respect to **$P_{n|n-1}$**

From the article we have :

$$
\begin{align}
&\frac{dL}{dP_{n|n-1}} = (I - K_n Z_n)^T [
    \frac{dL}{dP_{n|n}}
    + \frac{1}{2} \frac{dL}{da_{n|n}} v_n^T H_n^-1 Z_n
    + \frac{1}{2} Z_n^T R_n^{-1} v_n (\frac{dL}{da_{n|n}})^T
    ](I - K_n Z_n) 
    + \frac{dl{n|n-1}}{dP_{n|n-1}}  \quad &\text{(equation 21)} \\
&\frac{dl_{n|n-1}}{dP_{n|n-1}} = Z_n^T F_n^{-1} Z_n - Z_n^T F_n^-1 v_n v_n^T F_n^{-1} Z_n \quad &\text{(equation 29)} \\
&\frac{dL}{da_{n|n}} = T_n^T \frac{dL}{da_{n+1|n}} \quad &\text{see gradient with respect to} \quad a_{n|n} \\
&\frac{dL}{dP_{n|n}} = T_n^T \frac{dL}{dP_{n+1|n}} T_n \quad &\text{see gradient with respect to} \quad P_{n|n}
\end{align}
$$

Givent this two equations, we now have :
$$
\begin{align}
&\frac{dL}{dP_{n|n-1}} = (I - K_n Z_n)^T [
    T_n^T \frac{dL}{dP_{n+1|n}} T_n
    + \frac{1}{2} T_n^T \frac{dL}{da_{n+1|n}} v_n^T H_n^{-1} Z_n
    + \frac{1}{2} Z_n^T H_n^{-1} v_n (T_n^T \frac{dL}{da_{n+1|n}})^T
    ](I - K_n Z_n) 
    + Z_n^T F_n^{-1} Z_n 
    - Z_n^T F_n^{-1} v_n v_n^T F_n^{-1} Z_n
\end{align}
$$

In [133]:
def grad_P_hat(inp, out, out_grad):
    y, a, P, T, Z, H, Q = inp
    a_hat_grad, P_hat_grad, ll_grad = out_grad

    v = y - Z.dot(a)
    v = v.dimshuffle(0, 'x')
    a_hat_grad = a_hat_grad.dimshuffle(0, 'x') 

    P_filtered_grad = T.T @ P_hat_grad @ T
    a_filtered_grad = T.T @ a_hat_grad                     

    PZT = P.dot(Z.T)
    F = Z.dot(PZT) + H

    H_inv = pt.linalg.inv(H)                         
    F_inv = pt.linalg.inv(F)
    
    K = PZT.dot(F_inv)                          
    I_KZ = pt.eye(a.shape[0]) - K.dot(Z)

    grad_P_hat = I_KZ.T @ ( P_filtered_grad + 0.5 * a_filtered_grad @ v.T @ H_inv @ Z + 0.5 * Z.T @ H_inv @ v @ a_filtered_grad.T ) @ I_KZ + Z.T @ F_inv @ Z - Z.T @ F_inv @ v @ v.T @ F_inv @ Z

    return grad_P_hat

### Gradient with respect to **y**

From the article we have :

$$
\begin{align}
&\frac{dL}{dy_n} = K_n^T\frac{dL}{da_{n|n}} + \frac{dl_{n|n-1}}{dy_n} \quad &\text{(equation 24)} \\
&\frac{dl_{n|n-1}}{dy_n} = 2F^{-1}v_n \quad &\text{(equation 31)} \\
&\frac{dL}{da_{n|n}} = T_n^T \frac{dL}{da_{n+1|n}} \quad &\text{see gradient with respect to} \quad a_{n|n} \\
\end{align}
$$

Givent this two equations, we now have :
$$
\begin{align}
&\frac{dL}{dy_n} = K_n^TT_n^T\frac{dL}{da_{n+1|n}} + 2F^{-1}v_n
\end{align}
$$


In [134]:
def grad_y(inp, out, out_grad):
    y, a, P, T, Z, H, Q = inp
    a_hat_grad, P_h_grad, y_grad = out_grad

    y_hat = Z.dot(a)
    v = y - y_hat

    PZT = P.dot(Z.T)
    F = Z.dot(PZT) + H
    F_inv = pt.linalg.inv(F)

    K = PZT.dot(F_inv) 
    
    return K.T @ T.T @ a_hat_grad + 2 * F_inv @ v

### Gradient with respect to Q

From the article we have :

$$
\begin{align}
\frac{dL}{dQ_n} = \frac{dL}{dP_{n|n-1}} & \quad \text{(equation 25)}
\end{align}
$$

In [11]:
def grad_Q(inp, out, out_grad):
    _, P_h_grad, _ = out_grad
    return P_h_grad

## Gradient with respect to **H**

From the article we have :

$$
\begin{align}
&\frac{dL}{dH_n} = K_n^T\frac{dL}{dP_{n|n}}K_n 
- \frac{1}{2} K_n^T \frac{dL}{da_{n|n}} v_n^T F^{-1}
- \frac{1}{2} S_n^{-1} v_n (\frac{dL}{da_{n|n}})^T K_n
+ \frac{dl_{n|n-1}}{dH_n} 
\quad &\text{(equation 26)} \\
&\frac{dl_{n|n-1}}{dH_n} = F^{-1} - F_n^{-1} v_n v_n^T F_n^{-1} 
\quad &\text{(equation 31)} \\
&\frac{dL}{da_{n|n}} = T_n^T \frac{dL}{da_{n+1|n}} \quad &\text{see gradient with respect to} \quad a_{n|n} \\
&\frac{dL}{dP_{n|n}} = T_n^T \frac{dL}{dP_{n+1|n}} T_n \quad &\text{see gradient with respect to} \quad P_{n|n}
\end{align}
$$

Givent this two equations, we now have :
$$
\begin{align}
&\frac{dL}{dH_n} = K_n^T T_n^T \frac{dL}{dP_{n+1|n}} T_n K_n 
- \frac{1}{2} K_n^T T_n^T \frac{dL}{da_{n+1|n}} v_n^T F^{-1}
- \frac{1}{2} F_n^{-1} v_n (T_n^T \frac{dL}{da_{n+1|n}})^T K_n
+ F^{-1} - F_n^{-1} v_n v_n^T F_n^{-1}
\end{align}
$$


In [135]:
def grad_H(inp, out, out_grad):
    y, a, P, T, Z, H, Q = inp
    a_hat_grad, P_h_grad, y_grad = out_grad
    
    y_hat = Z.dot(a)
    v = y - y_hat

    PZT = P.dot(Z.T)
    F = Z.dot(PZT) + H
    F_inv = pt.linalg.inv(F)

    K = PZT.dot(F_inv)

    v = v.dimshuffle(0, 'x')
    a_hat_grad = a_hat_grad.dimshuffle(0, 'x') 

    a_filtered_grad = T.T @ a_hat_grad
    P_filtered_grad = T.T @ P_h_grad @ T

    return K.T @ P_filtered_grad @ K - 0.5 * K.T @ a_filtered_grad @ v.T @ F_inv - 0.5 *  F_inv @ v @ a_filtered_grad.T @ K + F_inv - F_inv @ v @ v.T @ F_inv

### Total grad

In [None]:
def custom_grad(inp, out, out_grad):
    y, a, P, T, Z, H, Q = inp
    a_filtered, P_filtered, y_hat = out
    a_hat_grad, P_hat_grad, y_grad = out_grad

    PZT = P.dot(Z.T)
    F = Z.dot(PZT) + H

    y_hat = Z.dot(a)
    v = y - y_hat

    H_inv = pt.linalg.inv(H)
    F_inv = pt.linalg.inv(F)

    K = PZT.dot(F_inv)
    I_KZ = pt.eye(a.shape[0]) - K.dot(Z)
    
    grad_a_pred = I_KZ.T @ T.T @ a_hat_grad - 2 * Z.T @ F_inv @ v
    grad_y = K.T @ T.T @ a_hat_grad + 2 * F_inv @ v


    a_hat_grad = a_hat_grad.dimshuffle(0, 'x')
    v = v.dimshuffle(0, 'x')
    
    P_filtered_grad = T.T @ P_hat_grad @ T
    a_filtered_grad = T.T @ a_hat_grad  

    grad_P_hat = I_KZ.T @ ( P_filtered_grad + 0.5 * a_filtered_grad @ v.T @ H_inv @ Z + 0.5 * Z.T @ H_inv @ v @ a_filtered_grad.T ) @ I_KZ + Z.T @ F_inv @ Z - Z.T @ F_inv @ v @ v.T @ F_inv @ Z
    grad_Z = None
    grad_T = None
    grad_Q = P_hat_grad
    grad_H = K.T @ P_filtered_grad @ K - 0.5 * K.T @ a_filtered_grad @ v.T @ F_inv - 0.5 * F_inv @ v @ a_filtered_grad.T @ K + F_inv - F_inv @ v @ v.T @ F_inv

    return [grad_P_hat,
            grad_a_pred,
            grad_y,
            grad_Z,
            grad_T,
            grad_Q,
            grad_H]


## Custom Kalman Filter

In [259]:
y_sym = pt.vector("y")

kalman_step_op = OpFromGraph(
    inputs=[y_sym, a0_sym, P0_sym, T_sym, Z_sym, H_sym, Q_sym],
    outputs=kalman_step(y_sym, a0_sym, P0_sym, T_sym, Z_sym, H_sym, Q_sym),
    lop_overrides=[grad_y, grad_a_hat, grad_P_hat, None, None, grad_H, grad_Q],
    inline=True
)

outputs_info = [None, a0_sym, None, None, P0_sym, None, None]

results_op, updates = pytensor.scan(
    kalman_step_op,
    sequences=[data_sym],
    outputs_info=outputs_info,
    non_sequences=[T_sym, Z_sym, H_sym, Q_sym],
    strict=False,
)
# --- Loss ---
a_upd_op, a_pred_op, y_hat_op, P_upd_op, P_pred_op, obs_cov, ll_op = results_op
loss_op = pt.sum(ll_op)

## Handmade Numpy Backpropagation 

In [264]:
def compute_grad_a0(observations, a0, P0, a_pred_seq, P_pred_seq, Z, H, T):
    # Constant
    SHAPE_a0 = a0.shape[0]
    NB_obs = len(observations)

    # Initialisation for the backprop
    PZT = P_pred_seq[-2].dot(Z.T)
    F = Z.dot(PZT) + H
    F_inv = np.linalg.solve(F, np.eye(F.shape[0]))
    
    grad = [0 for _ in range(NB_obs)]
    grad[-1] = - 2 * Z.T @ F_inv @ (observations[-1] - Z @ a_pred_seq[-2])

    # Backprop
    for i in range(3, NB_obs+1):

        PZT = P_pred_seq[-i].dot(Z.T)
        F = Z.dot(PZT) + H
        F_inv = np.linalg.solve(F, np.eye(F.shape[0]))

        K = PZT.dot(F_inv)
        I_KZ = np.eye(SHAPE_a0) - K.dot(Z)

        grad[1-i] = I_KZ.T @ T.T @ grad[2-i] - (2 * Z.T @ F_inv @ (observations[1-i] - Z @ a_pred_seq[-i])).T   

    # Last iter with a0/P0
    PZT = P0.dot(Z.T)
    F = Z.dot(PZT) + H
    F_inv = np.linalg.solve(F, np.eye(F.shape[0]))

    K = PZT.dot(F_inv)
    I_KZ = np.eye(SHAPE_a0) - K.dot(Z)

    grad[0] = I_KZ.T @ T.T @ grad[1] - (2 * Z.T @ F_inv @ (observations[0] - Z @ a0)).T

    return grad

## Speed observation

### Benchmark for pytensor computed gradients

In [253]:
def benchmark_kalman_gradients(loss, obs_data, a0, P0, T, Z, R, H, Q):
    results = defaultdict(dict)
    exec_time = 0

    grad_list = pt.grad(loss, [a0_sym])
    f_grad = pytensor.function(
        inputs=[data_sym, a0_sym, P0_sym, T_sym, Z_sym, H_sym, Q_sym],
        outputs=grad_list,
    )

    for _ in range(20):
    
        # --- exécution ---
        t0 = perf_counter()
        _ = f_grad(
            obs_data[:, np.newaxis],
            a0,
            P0,
            T,
            Z,
            H,
            R @ Q @ R.T,
        )
        t1 = perf_counter()
        exec_time += (t1 - t0)/20
    
    
    results["exec_time"] = exec_time

    return results

### Benchmark for numpy computed gradient

In [None]:
def benchmark_kalman_gradients_np(a_pred_seq, P_pred_seq, obs_data, a0, P0, T, Z, R, H, Q):
        results = defaultdict(dict)
        forward_pass = 0
        backprop = 0
        kalman_fn = pytensor.function(inputs=[data_sym, a0_sym, P0_sym, T_sym, Z_sym, H_sym, Q_sym],
                                      outputs=(a_pred_seq, P_pred_seq))

        for _ in range(20):

                # --- forward pass ---
                t0 = perf_counter()
                a_pred, P_pred = kalman_fn(obs_data[:, np.newaxis],
                                        a0,
                                        P0,
                                        T,
                                        Z,
                                        H,
                                        R@Q@R.T,)
                t1 = perf_counter()
                forward_pass += (t1 - t0)/20
                

                # --- Backprop ---
                t0 = perf_counter()
                _ = compute_grad_a0(
                        obs_data,
                        a0,
                        P0,
                        a_pred,
                        P_pred,
                        Z,
                        H,
                        T,)
                t1 = perf_counter()
                backprop += (t1 - t0)/20

        results["Forward pass"] = forward_pass      
        results["Backprop"] = backprop

        return results

### Comparison

In [254]:
results = benchmark_kalman_gradients(loss, obs_data, a0, P0, T, Z, R, H, Q)

In [257]:
print(results)

defaultdict(<class 'dict'>, {'exec_time': 0.016296579997288063})


In [260]:
results_op = benchmark_kalman_gradients(loss_op, obs_data, a0, P0, T, Z, R, H, Q)

In [261]:
print(results_op)

defaultdict(<class 'dict'>, {'exec_time': 0.015451419999590144})


In [267]:
results_np = benchmark_kalman_gradients_np(a_pred_seq, P_pred_seq, obs_data, a0, P0, T, Z, R, H, Q)

In [268]:
print(results_np)

defaultdict(<class 'dict'>, {'Forward pass': 0.00269013000652194, 'Backprop': 0.002321220003068447})


## Error observation

### Comparing the gradient with respect to a0

In [274]:
# First the classic way with autodiff

grad_list = pt.grad(loss, [a0_sym])
f_grad = pytensor.function(
    inputs=[data_sym, a0_sym, P0_sym, T_sym, Z_sym, H_sym, Q_sym],
    outputs=grad_list,
)

grad_a0 = f_grad(obs_data[:, np.newaxis], a0, P0, T, Z, H, R @ Q @ R.T)

# Now using our OpFromGraph custom gradient

grad_list_op = pt.grad(loss_op, [a0_sym])
f_grad = pytensor.function(
    inputs=[data_sym, a0_sym, P0_sym, T_sym, Z_sym, H_sym, Q_sym],
    outputs=grad_list_op,
)

grad_a0_op = f_grad(obs_data[:, np.newaxis], a0, P0, T, Z, H, R @ Q @ R.T)

# And here using our handmaid numpy backprop

kalman_fn = pytensor.function(inputs=[data_sym, a0_sym, P0_sym, T_sym, Z_sym, H_sym, Q_sym],
                              outputs=(a_pred_seq, P_pred_seq))
a_pred, P_pred = kalman_fn(obs_data[:, np.newaxis], a0, P0, T, Z, H, R@Q@R.T)

grad_a0_np = compute_grad_a0(obs_data, a0, P0, a_pred, P_pred, Z, H, T)[0]

In [278]:
print("Comparison between classic a0 gradient and our custom OpFromGraph :", np.allclose(grad_a0,  grad_a0_op))
print("Comparison between classic a0 gradient and our handmaid NumPy backprop :", np.allclose(grad_a0,  grad_a0_np))

Comparison between classic a0 gradient and our custom OpFromGraph : True
Comparison between classic a0 gradient and our handmaid NumPy backprop : True


In [279]:
# First the classic way with autodiff

grad_list = pt.grad(loss, [data_sym, a0_sym, P0_sym, H_sym, Q_sym])
f_grad = pytensor.function(
    inputs=[data_sym, a0_sym, P0_sym, T_sym, Z_sym, H_sym, Q_sym],
    outputs=grad_list,
)

grad_a0 = f_grad(obs_data[:, np.newaxis], a0, P0, T, Z, H, R @ Q @ R.T)

# Now using our OpFromGraph custom gradient

grad_list_op = pt.grad(loss_op, [data_sym, a0_sym, P0_sym, H_sym, Q_sym])
f_grad = pytensor.function(
    inputs=[data_sym, a0_sym, P0_sym, T_sym, Z_sym, H_sym, Q_sym],
    outputs=grad_list_op,
)

grad_a0_op = f_grad(obs_data[:, np.newaxis], a0, P0, T, Z, H, R @ Q @ R.T)

In [289]:
print("Comparison between classic y gradient and our custom OpFromGraph :", np.allclose(grad_a0[0],  grad_a0_op[0]))
print("Comparison between classic a0 gradient and our custom OpFromGraph :", np.allclose(grad_a0[1],  grad_a0_op[1]))
print("Comparison between classic P0 gradient and our custom OpFromGraph :", np.allclose((grad_a0[2] + grad_a0[2].T)/2,  grad_a0_op[2]))
print("Comparison between classic H gradient and our custom OpFromGraph :", np.allclose(grad_a0[3],  grad_a0_op[3]))
print("Comparison between classic Q gradient and our custom OpFromGraph :", np.allclose((grad_a0[4] + grad_a0[4].T)/2,  grad_a0_op[4]))

Comparison between classic y gradient and our custom OpFromGraph : True
Comparison between classic a0 gradient and our custom OpFromGraph : True
Comparison between classic P0 gradient and our custom OpFromGraph : True
Comparison between classic H gradient and our custom OpFromGraph : True
Comparison between classic Q gradient and our custom OpFromGraph : True
