# Tensorflow Autodiff

* [Introduction to gradients and automatic differentiation](https://www.tensorflow.org/guide/autodiff)

In [2]:
import tensorflow as tf
from tensorflow import keras

# Word2vec sampling score with BoW (Bag of Words)

In [3]:
# --------------------------------------------------------------------------------
# Word embedding vectors on the BoW side
# --------------------------------------------------------------------------------
Win = tf.Variable(tf.random.uniform(shape=(5,5), dtype=tf.float32))
print(f"embedding is {Win}\n")

# --------------------------------------------------------------------------------
# Dummpy target word 
# --------------------------------------------------------------------------------
target_word = tf.random.uniform(shape=(1, 5), dtype=tf.float32)
T = tf.constant([1], dtype=tf.float32)
eps = 1e-7


# --------------------------------------------------------------------------------
# Autodiff
# --------------------------------------------------------------------------------
with tf.GradientTape() as tape:
    tape.watch(Win)  # Start recording the history of operations applied to `a`

    # --------------------------------------------------------------------------------
    # Forward path
    # --------------------------------------------------------------------------------
    # Bag of words (BoW) from the context of 3-grams frame around the target word
    indices = [[1], [3]]
    bow = tf.math.reduce_mean(tf.gather_nd(Win, indices), axis=0, keepdims=True)
    print(f"bow is {bow}\n")

    # --------------------------------------------------------------------------------
    # score how close the BoW to the target word with dot product.
    # --------------------------------------------------------------------------------
    score = tf.linalg.matmul(bow, tf.transpose(target_word))
    print(f"score is {score}\n")

    # --------------------------------------------------------------------------------
    # Sigmoid log loss
    # --------------------------------------------------------------------------------
    L = -1.0 * (
        T * tf.math.log(tf.nn.sigmoid(score)) + 
        (1-T) * tf.math.log(tf.nn.sigmoid(1-score))
    )
    print(f"Loss is {L}")
    
    # --------------------------------------------------------------------------------
    # Backward path/Autodiff
    # --------------------------------------------------------------------------------
    dLdW = tape.gradient(L, Win)
    print(f"gradient dL/dW (embedding back prop) is {dLdW}\n")


embedding is <tf.Variable 'Variable:0' shape=(5, 5) dtype=float32, numpy=
array([[0.5330119 , 0.09007072, 0.2937175 , 0.09215891, 0.91912687],
       [0.7878784 , 0.8388704 , 0.23437726, 0.22551596, 0.37300646],
       [0.44537175, 0.3073889 , 0.02727795, 0.12882948, 0.15287817],
       [0.02237642, 0.63794255, 0.32387066, 0.6367707 , 0.48452032],
       [0.53519464, 0.5502291 , 0.48767912, 0.68348444, 0.4968723 ]],
      dtype=float32)>

bow is [[0.4051274  0.7384065  0.27912396 0.43114334 0.4287634 ]]

score is [[0.70276856]]

Loss is [[0.40226826]]
gradient dL/dW (embedding back prop) is IndexedSlices(indices=tf.Tensor([1 3], shape=(2,), dtype=int32), values=tf.Tensor(
[[-0.03461591 -0.03589327 -0.03585096 -0.09037671 -0.06268762]
 [-0.03461591 -0.03589327 -0.03585096 -0.09037671 -0.06268762]], shape=(2, 5), dtype=float32), dense_shape=tf.Tensor([5 5], shape=(2,), dtype=int32))

