In [1]:
import numpy as np
import tensorflow as tf
import nn
import nn_debug

## data

In [2]:
s = np.array([2, 2, 2, 3])
h = np.random.normal(size=4)
# h = np.array([0.146667883, -0.0871001333, 0.308514476, -0.0197103918])
s_tf = tf.Variable(s[:,np.newaxis], dtype=tf.float32)
h_tf = tf.Variable(h[:,np.newaxis], dtype=tf.float32)

# loss - lambdarank

## gradient (tape)

### as is - no

In [3]:
loss_function = nn.get_loss_function("lambdarank")

In [4]:
with tf.GradientTape() as g:
    g.watch(s_tf)
    g.watch(h_tf)
    loss = loss_function(s_tf, h_tf)
g.gradient(loss, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 0.20678273],
       [ 0.06932775],
       [ 0.02377368],
       [-0.29988414]], dtype=float32)>

### stop gradient - no

In [5]:
@tf.function
def loss_func(s, h):
    S_ij = tf.cast(tf.math.sign(s - tf.squeeze(s, 1)), tf.float32)
    delta_h = tf.cast(h - tf.squeeze(h, 1), tf.float32)
    delta_ndcg = tf.stop_gradient(tf.abs(nn.get_delta_ndcg(s, h)))
    P_ij = (1 / 2) * (1 + S_ij)
    C = tf.nn.sigmoid_cross_entropy_with_logits(logits=delta_h * delta_ndcg, labels=P_ij)
    return C

In [6]:
with tf.GradientTape() as g:
    g.watch(s_tf)
    g.watch(h_tf)
    loss = loss_func(s_tf, h_tf)
g.gradient(loss, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 0.20678273],
       [ 0.06932775],
       [ 0.02377368],
       [-0.29988414]], dtype=float32)>

## gradient (analytic)

In [9]:
@tf.function()
def get_C_term_dev_tf1(s, h):
    S_ij = tf.cast(tf.math.sign(s - tf.squeeze(s, 1)), tf.float32)
    delta_h = tf.cast(h - tf.squeeze(h, 1), tf.float32)
    C_term_dev = (1 - S_ij) / 2 - 1 / (1 + tf.math.exp(delta_h))
    C_term_dev *= tf.abs(nn.get_delta_ndcg(s_tf, h_tf))
    C_term_dev = tf.reduce_sum(C_term_dev, 1, keepdims=True)
    return C_term_dev

In [10]:
get_C_term_dev_tf1(s_tf, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 0.12702468],
       [ 0.03988631],
       [ 0.01276071],
       [-0.1796717 ]], dtype=float32)>

### custom gradient - yes!

In [11]:
@tf.custom_gradient
def loss_func(s, h):
    S_ij = tf.cast(tf.math.sign(s - tf.squeeze(s, 1)), tf.float32)
    delta_h = tf.cast(h - tf.squeeze(h, 1), tf.float32)
    delta_ndcg = tf.stop_gradient(tf.abs(nn.get_delta_ndcg(s, h)))
    P_ij = (1 / 2) * (1 + S_ij)
    C = tf.nn.sigmoid_cross_entropy_with_logits(logits=delta_h * delta_ndcg, labels=P_ij)
    def grad(dy):
        return s * dy, get_C_term_dev_tf1(s, h)
    return C, grad

In [12]:
with tf.GradientTape() as g:
    g.watch(s_tf)
    g.watch(h_tf)
    loss = loss_func(s_tf, h_tf)
g.gradient(loss, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 0.12702468],
       [ 0.03988631],
       [ 0.01276071],
       [-0.1796717 ]], dtype=float32)>