In [1]:
import numpy as np
import tensorflow as tf
import nn
import nn_debug

## data

In [2]:
s = np.array([2, 2, 2, 3])
h = np.array([0.146667883, -0.0871001333, 0.308514476, -0.0197103918])
s_tf = tf.Variable(s[:,np.newaxis], dtype=tf.float32)
h_tf = tf.Variable(h[:,np.newaxis], dtype=tf.float32)

# loss - ranknet

In [3]:
loss_function = nn.get_loss_function("ranknet")

## gradient (tape)

In [4]:
with tf.GradientTape() as g:
    g.watch(s_tf)
    g.watch(h_tf)
    loss = loss_function(s_tf, h_tf)
g.gradient(loss, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 1.1186054],
       [ 0.654696 ],
       [ 1.438669 ],
       [-3.2119703]], dtype=float32)>

## gradient (analytic)

In [5]:
@tf.function()
def get_C_term_dev_tf(s, h):
    S_ij = tf.cast(tf.math.sign(s - tf.squeeze(s, 1)), tf.float32)
    delta_h = tf.cast(h - tf.squeeze(h, 1), tf.float32)
    C_term_dev = (1 - S_ij) / 2 - 1 / (1 + tf.math.exp(delta_h))
    C_term_dev = tf.reduce_sum(C_term_dev, 1, keepdims=True)
    return C_term_dev

In [6]:
2 * get_C_term_dev_tf(s_tf, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 1.1186054],
       [ 0.6546962],
       [ 1.4386692],
       [-3.2119706]], dtype=float32)>

# loss - lambdarank

## gradient (tape)

In [7]:
loss_function = nn.get_loss_function("lambdarank")

In [8]:
with tf.GradientTape() as g:
    g.watch(s_tf)
    g.watch(h_tf)
    loss = loss_function(s_tf, h_tf)
g.gradient(loss, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 0.02427023],
       [ 0.0114659 ],
       [ 0.09950132],
       [-0.13523746]], dtype=float32)>

## gradient (analytic)

In [9]:
@tf.function()
def get_C_term_dev_tf1(s, h):
    S_ij = tf.cast(tf.math.sign(s - tf.squeeze(s, 1)), tf.float32)
    delta_h = tf.cast(h - tf.squeeze(h, 1), tf.float32)
    C_term_dev = (1 - S_ij) / 2 - 1 / (1 + tf.math.exp(delta_h))
    C_term_dev *= tf.abs(nn.get_delta_ndcg(s, h))
    C_term_dev = tf.reduce_sum(C_term_dev, 1, keepdims=True)
    return C_term_dev

In [10]:
get_C_term_dev_tf1(s_tf, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 0.02427023],
       [ 0.0114659 ],
       [ 0.09950132],
       [-0.13523746]], dtype=float32)>

In [12]:
# https://www.cs.cmu.edu/~pinard/Papers/sigirfp092-donmez.pdf
@tf.function()
def get_C_term_dev_tf2(s, h):
    S_ij = tf.cast(tf.math.sign(s - tf.squeeze(s, 1)), tf.float32)
    delta_h = tf.cast(h - tf.squeeze(h, 1), tf.float32)
    C_term_dev = S_ij * tf.abs(nn.get_delta_ndcg(s_tf, h_tf) * 1 / (1 + tf.math.exp(S_ij * delta_h)))
    C_term_dev = tf.reduce_sum(C_term_dev, 1, keepdims=True)
    return C_term_dev

In [13]:
get_C_term_dev_tf2(s_tf, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[-0.02427023],
       [-0.0114659 ],
       [-0.09950131],
       [ 0.13523744]], dtype=float32)>

In [14]:
# https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/lambdarank.pdf
@tf.function()
def get_C_term_dev_tf3(s, h):
    S_ij = tf.cast(tf.math.sign(s - tf.squeeze(s, 1)), tf.float32)
    delta_h = tf.cast(h - tf.squeeze(h, 1), tf.float32)
    C_term_dev = (1 - S_ij) / 2 - 1 / (1 + tf.math.exp(delta_h))
    C_term_dev *= nn.get_delta_ndcg(s_tf, h_tf)
    C_term_dev = tf.reduce_sum(C_term_dev, 1, keepdims=True)
    return C_term_dev

In [15]:
get_C_term_dev_tf3(s_tf, h_tf)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[-0.02427023],
       [ 0.0114659 ],
       [-0.09950131],
       [ 0.11230564]], dtype=float32)>