In [1]:
import tensorflow as tf

import numpy as np

import models
import utils

Matplotlib is building the font cache; this may take a moment.


In [2]:
x_train, y_train, x_valid, y_valid, x_test, y_test, model_test = utils.get_synthetic_coded_dataset()

print(y_train)
print(x_train.shape)

[[1.]
 [1.]
 [0.]
 ...
 [1.]
 [0.]
 [1.]]
(14000, 200, 4)


In [3]:
model = models.CNN_ATT(num_out=1)

auroc = tf.keras.metrics.AUC(curve='ROC', name='auroc')
aupr = tf.keras.metrics.AUC(curve='PR', name='aupr')
model.compile(tf.keras.optimizers.Adam(0.0005), loss='binary_crossentropy', metrics=[auroc, aupr])

lr_decay = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_aupr', factor=0.2, patient=5, verbose=1, min_lr=1e-7, mode='max')
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_aupr', patience=15, verbose=1, mode='max')
model.fit(x_train, y_train, epochs=2, validation_data=(x_valid, y_valid), callbacks=[lr_decay, early_stop], verbose=1)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x208ef32d4c0>

In [10]:
sal_roc, sal_pr, snr = utils.get_saliency_scores(model, x_test, y_test, model_test)
print(np.array([sal_roc, sal_pr, snr]).shape)

(3, 500)


In [4]:
# number of test sequences to analyze (set this to 500 because expintgrad takes long)
num_analyze = 500

# get positive label sequences and sequence model
pos_index = np.where(y_test[:,0] == 1)[0]   
X = x_test[pos_index[:num_analyze]]
X_model = model_test[pos_index[:num_analyze]]

# instantiate explainer class
explainer = explain.Explainer(model, class_index=0)

# calculate attribution maps
saliency_scores = explainer.saliency_maps(X)

# reduce attribution maps to 1D scores
sal_scores = explain.grad_times_input(X, saliency_scores)

In [10]:
# compare distribution of attribution scores at positions with and without motifs

threshold = 0.1
saliency_roc, saliency_pr = evaluate.interpretability_performance(sal_scores, X_model, threshold)

print("%s: %.3f+/-%.3f"%('saliency', np.mean(saliency_roc), np.std(saliency_roc)))

saliency: 0.742+/-0.068


In [11]:
# compare distribution of attribution scores at positions with and without motifs
threshold = 0.1
top_k = 10

sal_signal, sal_noise_max, sal_noise_mean, sal_noise_topk = evaluate.signal_noise_stats(sal_scores, X_model, top_k, threshold)

score = evaluate.calculate_snr(sal_signal, sal_noise_topk)

print("%s: %.3f+/-%.3f"%('saliency', np.mean(score), np.std(score)))

saliency: 0.917+/-0.402
