In [1]:
# Main impots
from alibi.explainers import IntegratedGradients
from IPython.display import HTML
import numpy
import tensorflow

# Test impots
from tensorflow.keras.datasets import imdb
from tensorflow.keras.layers import Input, Dense, Embedding, Conv1D, GlobalMaxPooling1D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import os

2024-06-04 16:01:02.479786: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-04 16:01:02.491793: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-04 16:01:02.546088: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Taken from Alibi docs

def colorize(attrs, cmap='PiYG'):
    """
    Compute hex colors based on the attributions for a single instance.
    Uses a diverging colorscale by default and normalizes and scales
    the colormap so that colors are consistent with the attributions.
    """
    import matplotlib as mpl
    cmap_bound = numpy.abs(attrs).max()
    norm = mpl.colors.Normalize(vmin=-cmap_bound, vmax=cmap_bound)
    cmap = plt.get_cmap(cmap)
    # now compute hex values of colors
    colors = list(map(lambda x: cmap(norm(x)), attrs))
    return colors

def  hlstr(string, color='white'):
    """
    Return HTML markup highlighting text with the desired color.
    """
    return f"<mark style=background-color:{color}>{string} </mark>"

In [3]:
# Params fo integated gradients
n_steps = 50
method = 'gausslegendre'
internal_batch_size = 100

def gen_sent_visual(model, sample, index):
    predictions = model(sample).numpy().argmax(axis=1)
    gradients = IntegratedGradients(model,
                    layer= model.layers[1],
                    n_steps=n_steps,
                    method=method,
                    internal_batch_size=internal_batch_size)
    explanation = gradients.explain(
                    sample,
                    baselines=None,
                    target=predictions,
                    attribute_to_layer_inputs=False
                )

    print(explanation.meta)

    attrs = explanation.attributions[0]

    # probably should be a loop fom here
    i = 1
    x_i = sample[i]
    attrs_i = attrs[i]
    pred = predictions[i]
    pred_dict = {1: 'Positive review', 0: 'Negative review'}

    reverse_index = {value: key for (key, value) in index.items()}
    words = " ".join([reverse_index.get(i - 3, 'UNK') for i in x_i])
    colors = colorize(attrs_i)
    # to here

    HTML("".join(list(map(hlstr, words, colors))))

Test Code Begins

In [4]:
max_features = 10000
maxlen = 100

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
test_labels = y_test.copy()
train_labels = y_train.copy()
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
y_train, y_test = to_categorical(y_train), to_categorical(y_test)

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 100)
x_test shape: (25000, 100)


In [5]:
batch_size = 32
embedding_dims = 50
filters = 250
kernel_size = 3
hidden_dims = 250

load_model = False
save_model = True

filepath = './model_imdb/'  # change to directory where model is downloaded
if load_model:
    model = tensorflow.keras.models.load_model(os.path.join(filepath, 'model.h5'))
else:
    print('Build model...')

    inputs = Input(shape=(maxlen,), dtype=tensorflow.int32)
    embedded_sequences = Embedding(max_features,
                                   embedding_dims)(inputs)
    out = Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1)(embedded_sequences)
    out = Dropout(0.4)(out)
    out = GlobalMaxPooling1D()(out)
    out = Dense(hidden_dims,
                activation='relu')(out)
    out = Dropout(0.4)(out)
    outputs = Dense(2, activation='softmax')(out)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    print('Train...')
    model.fit(x_train, y_train,
              batch_size=256,
              epochs=3,
              validation_data=(x_test, y_test))
    if save_model:
        if not os.path.exists(filepath):
            os.makedirs(filepath)
        model.save(os.path.join(filepath, 'model.h5'))

Build model...
Train...
Epoch 1/3


2024-06-04 16:01:09.096124: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-06-04 16:01:09.097029: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 111ms/step - accuracy: 0.5657 - loss: 0.6673 - val_accuracy: 0.8169 - val_loss: 0.4273
Epoch 2/3
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 112ms/step - accuracy: 0.8442 - loss: 0.3581 - val_accuracy: 0.8533 - val_loss: 0.3527
Epoch 3/3
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 120ms/step - accuracy: 0.9117 - loss: 0.2287 - val_accuracy: 0.8528 - val_loss: 0.3378




In [6]:
nb_samples = 10
temp = gen_sent_visual(model, x_test[:nb_samples], imdb.get_word_index())
type(temp)

2024-06-04 16:01:44.711954: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


{'name': 'IntegratedGradients', 'type': ['whitebox'], 'explanations': ['local'], 'params': {'target_fn': None, 'method': 'gausslegendre', 'n_steps': 50, 'internal_batch_size': 100, 'layer': 1}, 'version': '0.9.6'}


NoneType

In [7]:
import json

print(json.dumps(temp, indent=2))

null


Test Code ends