In [1]:
import tensorflow as tf

import numpy as np
import requests as rq
import os, io, h5py

from tfomics import explain, evaluate

import models

In [2]:
data = rq.get('https://www.dropbox.com/s/5iww0ootxkr6e21/synthetic_code_dataset.h5?raw=true')
data.raise_for_status()

with h5py.File(io.BytesIO(data.content), 'r') as dataset:
    x_train = np.array(dataset['X_train']).astype(np.float32).transpose([0, 2, 1])
    y_train = np.array(dataset['Y_train']).astype(np.float32)
    x_valid = np.array(dataset['X_valid']).astype(np.float32).transpose([0, 2, 1])
    y_valid = np.array(dataset['Y_valid']).astype(np.int32)
    x_test = np.array(dataset['X_test']).astype(np.float32).transpose([0, 2, 1])
    y_test = np.array(dataset['Y_test']).astype(np.int32)
    model_test = np.array(dataset['model_test']).astype(np.float32).transpose([0, 2, 1])

In [3]:
model = models.CNN_ATT(num_out=1)

auroc = tf.keras.metrics.AUC(curve='ROC', name='auroc')
aupr = tf.keras.metrics.AUC(curve='PR', name='aupr')
model.compile(tf.keras.optimizers.Adam(0.0005), loss='binary_crossentropy', metrics=[auroc, aupr])

lr_decay = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_aupr', factor=0.2, patient=5, verbose=1, min_lr=1e-7, mode='max')
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_aupr', patience=15, verbose=1, mode='max')
model.fit(x_train, y_train, epochs=1, validation_data=(x_valid, y_valid), callbacks=[lr_decay, early_stop], verbose=1)



<tensorflow.python.keras.callbacks.History at 0x224cd507070>

In [4]:
# number of test sequences to analyze (set this to 500 because expintgrad takes long)
num_analyze = 500

# get positive label sequences and sequence model
pos_index = np.where(y_test[:,0] == 1)[0]   
X = x_test[pos_index[:num_analyze]]
X_model = model_test[pos_index[:num_analyze]]

# instantiate explainer class
explainer = explain.Explainer(model, class_index=0)

# calculate attribution maps
saliency_scores = explainer.saliency_maps(X)

# reduce attribution maps to 1D scores
sal_scores = explain.grad_times_input(X, saliency_scores)

In [5]:
# compare distribution of attribution scores at positions with and without motifs

threshold = 0.1
saliency_roc, saliency_pr = evaluate.interpretability_performance(sal_scores, X_model, threshold)

print("%s: %.3f+/-%.3f"%('saliency', np.mean(saliency_roc), np.std(saliency_roc)))

saliency: 0.599+/-0.050


In [6]:
# compare distribution of attribution scores at positions with and without motifs
threshold = 0.1
top_k = 10

sal_signal, sal_noise_max, sal_noise_mean, sal_noise_topk = evaluate.signal_noise_stats(sal_scores, X_model, top_k, threshold)

score = evaluate.calculate_snr(sal_signal, sal_noise_topk)

print("%s: %.3f+/-%.3f"%('saliency', np.mean(score), np.std(score)))

saliency: 0.055+/-0.117
