### Dependencies

In [1]:
!git clone "https://github.com/suyash/ContextualDecomposition.git" && mv ContextualDecomposition/cd ./cd

Cloning into 'ContextualDecomposition'...
remote: Enumerating objects: 32, done.[K
remote: Counting objects:   3% (1/32)[Kremote: Counting objects:   6% (2/32)[Kremote: Counting objects:   9% (3/32)[Kremote: Counting objects:  12% (4/32)[Kremote: Counting objects:  15% (5/32)[Kremote: Counting objects:  18% (6/32)[Kremote: Counting objects:  21% (7/32)[Kremote: Counting objects:  25% (8/32)[Kremote: Counting objects:  28% (9/32)[Kremote: Counting objects:  31% (10/32)[Kremote: Counting objects:  34% (11/32)[Kremote: Counting objects:  37% (12/32)[Kremote: Counting objects:  40% (13/32)[Kremote: Counting objects:  43% (14/32)[Kremote: Counting objects:  46% (15/32)[Kremote: Counting objects:  50% (16/32)[Kremote: Counting objects:  53% (17/32)[Kremote: Counting objects:  56% (18/32)[Kremote: Counting objects:  59% (19/32)[Kremote: Counting objects:  62% (20/32)[Kremote: Counting objects:  65% (21/32)[Kremote: Counting objects:  68% (22/32)[Kr

In [2]:
!curl -L -o "job_dir.zip" "https://drive.google.com/uc?export=download&id=13Uyub6pPWWS9USmj2WxAilPMoYFQZtCd" && unzip -q -d "job_dir" "job_dir.zip"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   388    0   388    0     0     77      0 --:--:--  0:00:04 --:--:--   102
100 21.6M    0 21.6M    0     0  3960k      0 --:--:--  0:00:05 --:--:--  137M


### Imports

In [3]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model  # pylint: disable=import-error
from tensorflow.keras.layers import Dense, Dropout, Embedding, Input, LSTM  # pylint: disable=import-error
import tensorflow_datasets as tfds

from cd.cd import lstm_decomposition
from cd.lstm import create_table, create_inv_table, preprocess_dataset

In [5]:
model = tf.keras.models.load_model("job_dir/saved_model/best")
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding (Embedding)        (None, None, 128)         1831808   
_________________________________________________________________
lstm (LSTM)                  (None, 128)               131584    
_________________________________________________________________
dense (Dense)                (None, 2)                 258       
Total params: 1,963,650
Trainable params: 1,963,650
Non-trainable params: 0
_________________________________________________________________


In [0]:
tokens = np.load("job_dir/tokens.npy")
table = create_table(tokens)
inv_table = create_inv_table(tokens)

### Process Input

In [0]:
s = "it 's easy to love robin tunney - she is pretty and she can act - but it gets harder and harder to understand her choices ."

In [8]:
inp = table.lookup(tf.constant(s.split()))
inp = tf.expand_dims(inp, 0)
inp

<tf.Tensor: shape=(1, 27), dtype=int32, numpy=
array([[  12,    8,  396,    7,   82, 3814, 5186,  194,  281,    9,  313,
           4,  281,   66,  590,  194,   22,   12,  283, 5449,    4, 5449,
           7,  683,  115, 2049,    6]], dtype=int32)>

### Generate Overall Prediction

In [9]:
x = model.predict(inp)
x = tf.math.softmax(x)
x

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0.8496722 , 0.15032782]], dtype=float32)>

$P(neg) = 0.84$, $P(pos) = 0.15$

Now, decomposing and getting predictions for subsections

In [10]:
list(enumerate(inv_table.lookup(inp[0]).numpy()))

[(0, b'it'),
 (1, b"'s"),
 (2, b'easy'),
 (3, b'to'),
 (4, b'love'),
 (5, b'robin'),
 (6, b'tunney'),
 (7, b'-'),
 (8, b'she'),
 (9, b'is'),
 (10, b'pretty'),
 (11, b'and'),
 (12, b'she'),
 (13, b'can'),
 (14, b'act'),
 (15, b'-'),
 (16, b'but'),
 (17, b'it'),
 (18, b'gets'),
 (19, b'harder'),
 (20, b'and'),
 (21, b'harder'),
 (22, b'to'),
 (23, b'understand'),
 (24, b'her'),
 (25, b'choices'),
 (26, b'.')]

#### Prepare the Embedding generator for the input

In [0]:
def prepare_embedder(embed_dim, vocab_size):
    seq = Input((None, ), dtype="int32")
    x = Embedding(
        vocab_size,
        embed_dim,
    )(seq)
    return tf.keras.Model(seq, x)

In [0]:
embedder = prepare_embedder(128, 2 + len(tokens))

In [0]:
embedder.set_weights(model.get_weights()[:1])

### Decomposing the prediction into the prediction for [0..15] and [16..26]

In [0]:
_, k, rk, b, dw, db = model.weights

In [0]:
embed_inp = embedder(inp)

In [0]:
pred_0_15, _ = lstm_decomposition(embed_inp, k, rk, b, 0, 15)
pred_16_26, _ = lstm_decomposition(embed_inp, k, rk, b, 16, 26)

In [17]:
tf.math.softmax(tf.matmul(pred_0_15, dw) + db)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0.0050807 , 0.99491924]], dtype=float32)>

In [18]:
tf.math.softmax(tf.matmul(pred_16_26, dw) + db)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0.99566877, 0.00433128]], dtype=float32)>

decomposed prediction for __"it 's easy to love robin tunney - she is pretty and she can act -"__: $P(neg) = 0.005, P(pos) = 0.995$

decomposed prediction for __"but it gets harder and harder to understand her choices ."__: $P(neg) = 0.996, P(pos) = 0.004$