### Dependencies

In [1]:
!git clone "https://github.com/suyash/ContextualDecomposition.git" && mv ContextualDecomposition/cd ./cd

Cloning into 'ContextualDecomposition'...
remote: Enumerating objects: 59, done.[K
remote: Counting objects: 100% (59/59), done.[K
remote: Compressing objects: 100% (34/34), done.[K
remote: Total 59 (delta 30), reused 52 (delta 23), pack-reused 0[K
Unpacking objects: 100% (59/59), done.


In [2]:
!curl -L -o "job_dir.zip" "https://drive.google.com/uc?export=download&id=1wkygiEOc2T9LFbD4fcO__9tXGVkxu0qu" && unzip -q -d "job_dir" "job_dir.zip"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   388    0   388    0     0   1366      0 --:--:-- --:--:-- --:--:--  1361
100 11.0M    0 11.0M    0     0  6616k      0 --:--:--  0:00:01 --:--:--  448M


### Imports

In [3]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model  # pylint: disable=import-error

from cd.cd import cnn_net_decomposition
from cd.preprocess import create_table, create_inv_table

In [5]:
model = tf.keras.models.load_model("job_dir/saved_model/best")
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, None, 64)     915904      input_1[0][0]                    
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, None, 256)    33024       embedding[0][0]                  
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, None, 128)    24704       embedding[0][0]                  
______________________________________________________________________________________________

In [0]:
tokens = np.load("job_dir/tokens.npy")
table = create_table(tokens)
inv_table = create_inv_table(tokens)

### Process Input

In [0]:
s = "the longer the movie goes , the worse it gets , but it 's actually pretty good in the first few minutes"

In [8]:
inp = table.lookup(tf.constant(s.split()))
inp = tf.expand_dims(inp, 0)
inp

<tf.Tensor: shape=(1, 22), dtype=int32, numpy=
array([[   1, 1752,    1,   19,  336,    2,    1,  552,   12,  283,    2,
          22,   12,    8,  301,  313,   46,   11,    1,  167,  169,  164]],
      dtype=int32)>

### Generate Overall Prediction

In [9]:
x = model.predict(inp)
x = tf.math.softmax(x)
x

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[0.98572636, 0.01427361]], dtype=float32)>

$P(neg) = 0.98$, $P(pos) = 0.01$

Now, decomposing and getting predictions for subsections

In [10]:
t = inv_table.lookup(inp[0]).numpy()
list(enumerate(t))

[(0, b'the'),
 (1, b'longer'),
 (2, b'the'),
 (3, b'movie'),
 (4, b'goes'),
 (5, b','),
 (6, b'the'),
 (7, b'worse'),
 (8, b'it'),
 (9, b'gets'),
 (10, b','),
 (11, b'but'),
 (12, b'it'),
 (13, b"'s"),
 (14, b'actually'),
 (15, b'pretty'),
 (16, b'good'),
 (17, b'in'),
 (18, b'the'),
 (19, b'first'),
 (20, b'few'),
 (21, b'minutes')]

### Decomposing the prediction into the prediction for [0..15] and [16..26]

In [0]:
weights = model.weights

In [0]:
embed_inp = tf.nn.embedding_lookup(params=weights[0], ids=inp)

In [0]:
conv_weights = []
for i in range((len(weights) - 3) // 2):
    conv_weights.append([weights[2 * i + 1], weights[2 * i + 2]])

In [0]:
pred_0_11, _ = cnn_net_decomposition(embed_inp, conv_weights, 0, 11)
pred_12_21, _ = cnn_net_decomposition(embed_inp, conv_weights, 12, 21)

In [0]:
dw, db = weights[-2], weights[-1]

In [16]:
tf.math.softmax(tf.matmul(pred_0_11, dw) + db)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[9.99986529e-01, 1.34848515e-05]], dtype=float32)>

In [17]:
tf.math.softmax(tf.matmul(pred_12_21, dw) + db)

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[8.0306752e-04, 9.9919695e-01]], dtype=float32)>

decomposed prediction for __"the longer the movie goes , the worse it gets , but"__: $P(neg) = 0.999, P(pos) = 0.0001$

decomposed prediction for __"it 's actually pretty good in the first few minutes"__: $P(neg) = 0.0008, P(pos) = 0.991$

### Individual Word Level Decomposition

In [0]:
preds = []
for i in range(22):
    rel, _ = cnn_net_decomposition(embed_inp, conv_weights, i, i)
    pred = tf.math.softmax(tf.matmul(rel, dw) + db)
    preds.append(pred.numpy().tolist()[0])

In [19]:
list(zip(t, preds))

[(b'the', [0.49406933784484863, 0.5059306621551514]),
 (b'longer', [0.49385687708854675, 0.5061431527137756]),
 (b'the', [0.49451950192451477, 0.5054804682731628]),
 (b'movie', [0.49166005849838257, 0.5083400011062622]),
 (b'goes', [0.4903869032859802, 0.509613037109375]),
 (b',', [0.43732041120529175, 0.5626795887947083]),
 (b'the', [0.33485114574432373, 0.6651487946510315]),
 (b'worse', [0.9999983310699463, 1.6490405414515408e-06]),
 (b'it', [0.7699010968208313, 0.2300989329814911]),
 (b'gets', [0.8522055149078369, 0.14779449999332428]),
 (b',', [0.16093102097511292, 0.8390689492225647]),
 (b'but', [0.06107287108898163, 0.938927173614502]),
 (b'it', [0.2142852544784546, 0.7857147455215454]),
 (b"'s", [0.3198399543762207, 0.6801600456237793]),
 (b'actually', [0.18980059027671814, 0.8101994395256042]),
 (b'pretty', [0.14130070805549622, 0.8586993217468262]),
 (b'good', [0.05624309182167053, 0.9437569379806519]),
 (b'in', [0.667020320892334, 0.33297961950302124]),
 (b'the', [0.607681453