In [1]:
from __future__ import print_function 
import os, sys, h5py
import numpy as np
from six.moves import cPickle
from collections import OrderedDict

import tensorflow as tf
sys.path.append('..')
from tfomics import neuralnetwork as nn
from tfomics import utils, fit, init, visualize, saliency
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def load_simulation(filepath):
    # setup paths for file handling

    trainmat = h5py.File(filepath, 'r')

    print("loading training data")
    X_train = np.array(trainmat['X_train']).astype(np.float32)
    y_train = np.array(trainmat['Y_train']).astype(np.float32)

    print("loading cross-validation data")
    X_valid = np.array(trainmat['X_valid']).astype(np.float32)
    y_valid = np.array(trainmat['Y_valid']).astype(np.int32)

    print("loading test data")
    X_test = np.array(trainmat['X_test']).astype(np.float32)
    y_test = np.array(trainmat['Y_test']).astype(np.int32)


    X_train = np.expand_dims(X_train, axis=3).transpose([0,2,3,1])
    X_valid = np.expand_dims(X_valid, axis=3).transpose([0,2,3,1])
    X_test = np.expand_dims(X_test, axis=3).transpose([0,2,3,1])

    return X_train, y_train, X_valid, y_valid, X_test, y_test


# load data
filename = 'Synthetic_TF_dataset_50000.h5'
data_path = '../../real_gneomic_representations/data/synthetic_TF_dataset'
file_path = os.path.join(data_path, filename)
X_train, y_train, X_valid, y_valid, X_test, y_test = load_simulation(file_path)

loading training data
loading cross-validation data
loading test data


In [3]:

def model(input_shape, output_shape):

    # create model
    layer1 = {'layer': 'input',
            'input_shape': input_shape
            }
    layer2 = {'layer': 'conv1d', 
            'num_filters': 32,
            'filter_size': 11,
            'norm': 'batch',
            'activation': 'relu',
            'dropout': 0.1,
            'padding': 'SAME',
            'pool_size': 40,
            }
    layer3 = {'layer': 'conv1d', 
            'num_filters': 64,
            'filter_size': 5,
            'norm': 'batch',
            'activation': 'relu',
            'dropout': 0.2,
            'padding': 'VALID',
            }  
    layer4 = {'layer': 'dense', 
            'num_units': output_shape[1],
            'activation': 'sigmoid'
            }

    #from tfomics import build_network
    model_layers = [layer1, layer2, layer3, layer4]
    
    # optimization parameters
    optimization = {"objective": "binary",
                  "optimizer": "adam",
                  "learning_rate": 0.001, 
                  #"l2": 1e-6,
                  #"l1": 0.1, 
                  }
    return model_layers, optimization


# get shapes
num_data, height, width, dim = X_train.shape
input_shape=[None, height, width, dim]
output_shape = y_train.shape

model_layers, optimization = model(input_shape, output_shape)

In [4]:
# set output file paths
results_path = utils.make_directory(data_path, 'tmp')
output_name = 'test'
file_path = os.path.join(results_path, output_name)

# build neural network class
nnmodel = nn.NeuralNet()
nnmodel.build_layers(model_layers, optimization)
# nnmodel.inspect_layers()

# compile neural trainer
nntrainer = nn.NeuralTrainer(nnmodel, save='best', file_path=file_path)

In [5]:
sess = utils.initialize_session(nnmodel.placeholders)

In [6]:
train = {'inputs': X_train, 'targets': y_train}
valid = {'inputs': X_valid, 'targets': y_valid}
test = {'inputs': X_test, 'targets': y_test}

In [7]:
data = {'train': train, 'valid': valid, 'test': test}
fit.train_minibatch(sess, nntrainer, data, batch_size=100, num_epochs=50, 
                      patience=20, verbose=2, shuffle=True, save_all=False)

Epoch 1 out of 50 

KeyboardInterrupt: 

In [7]:
nntrainer.set_best_parameters(sess)

test = {'inputs': X_test, 'targets': y_test}
loss, mean_vals, std_vals = nntrainer.test_model(sess, test, batch_size=128, name='test', verbose=1)

loading model from:  ../../real_gneomic_representations/data/synthetic_TF_dataset/tmp/test_best.ckpt
  test  loss:		0.36129
  test  accuracy:	0.84027+/-0.00000
  test  auc-roc:	0.92014+/-0.00000
  test  auc-pr:		0.91335+/-0.00000


In [8]:
params = nnmodel.get_trainable_parameters()

In [9]:
test = {'inputs': X_test, 'targets': y_test}
nntrainer.test_feed

{<tf.Tensor 'is_training:0' shape=<unknown> dtype=bool>: False,
 <tf.Tensor 'Placeholder:0' shape=<unknown> dtype=float32>: 0.001,
 <tf.Tensor 'keep_prob_0:0' shape=<unknown> dtype=float32>: 1.0,
 <tf.Tensor 'inputs:0' shape=(?, 200, 1, 4) dtype=float32>: [],
 <tf.Tensor 'keep_prob_1:0' shape=<unknown> dtype=float32>: 1.0}

In [10]:
num_data = test['targets'].shape[0]
batch_generator = nn.BatchGenerator(num_data, 1000, True)
nntrainer.test_feed = batch_generator.next_minibatch(test, nntrainer.test_feed, nntrainer.placeholders)

dx = params
dy = nnmodel.loss
sess.run(tf.gradients(dy, dx), feed_dict=nntrainer.test_feed)

[array([-0.00350777, -0.00703558,  0.00535235, ..., -0.00426305,
         0.00186531,  0.0053092 ], dtype=float32),
 array([-0.01504638, -0.00255676,  0.00442328,  0.01059202,  0.03387694,
        -0.00236827, -0.01294479,  0.0256978 ,  0.00550709,  0.00934643,
         0.01505037,  0.01004567,  0.03109215,  0.00361257,  0.01900271,
        -0.01999425,  0.00580315, -0.00819865, -0.00436363, -0.0157215 ,
        -0.0064188 ,  0.01047697,  0.01292237,  0.00525506,  0.01601962,
         0.00991549,  0.0055623 ,  0.00213059, -0.00968827, -0.01860135,
         0.0088518 ,  0.00856402], dtype=float32),
 array([-0.0160378 , -0.00695984,  0.00246645,  0.0106992 ,  0.0317835 ,
        -0.00263872, -0.01740194,  0.02314704,  0.00798777,  0.0063303 ,
         0.01554891,  0.0100847 ,  0.02810453,  0.00460645,  0.01817757,
        -0.02146973,  0.00397175, -0.00814741, -0.00380266, -0.01858133,
        -0.00376066,  0.00961024,  0.00779915,  0.00401985,  0.01365141,
         0.00851226,  0.003055

In [11]:
num_data = test['targets'].shape[0]
batch_generator = nn.BatchGenerator(num_data, 1000, True)
nntrainer.test_feed = batch_generator.next_minibatch(test, nntrainer.test_feed, nntrainer.placeholders)

dx = params
dy = nnmodel.loss
sess.run(tf.hessians(dy, dx), feed_dict=nntrainer.test_feed)

AttributeError: 'NoneType' object has no attribute 'pred'

In [12]:
params

[<tensorflow.python.ops.variables.Variable at 0x121596748>,
 <tensorflow.python.ops.variables.Variable at 0x1215b4f98>,
 <tensorflow.python.ops.variables.Variable at 0x121595fd0>,
 <tensorflow.python.ops.variables.Variable at 0x105b5acf8>,
 <tensorflow.python.ops.variables.Variable at 0x106dbfe80>,
 <tensorflow.python.ops.variables.Variable at 0x128106a58>,
 <tensorflow.python.ops.variables.Variable at 0x106e83438>,
 <tensorflow.python.ops.variables.Variable at 0x106ea1908>]

In [None]:
W = nnmodel.get_parameters(sess, layer='conv1d_0')[0]
W = np.squeeze(W.transpose([3, 2, 0, 1]))
fig, plt = visualize.plot_filter_logos(W, normalize=True, figsize=(100,50), height=25, 
                            nt_width=10, norm=0, alphabet='dna')

In [None]:
# sequence to perform saliency analysis
index = 1
X = np.expand_dims(X_test[index], axis=0)

# parameters for saliency analysis
params = {'genome_model': model, 
          'input_shape': input_shape, 
          'output_shape': output_shape, 
          'model_path': file_path+'_best.ckpt',
          'optimization': optimization
         }

# backprop saliency
backprop_saliency = saliency.backprop(X, layer='output', class_index=0, params=params)

# guided backprop saliency
guided_saliency = saliency.guided_backprop(X, layer='output', class_index=0, params=params)

# stochastic guided backprop saliency
stochastic_saliency = saliency.stochastic_guided_backprop(X, layer='output', class_index=0, params=params,
                                                         num_average=200, threshold=1.0, stochastic_val=0.5)

# plot saliency comparison
fig, ax = plt.subplots(nrows=4, ncols=1, sharex=True, sharey=True, figsize=(15, 3));
plt.subplot(4,1,1)
visualize.plot_pos_saliency(np.squeeze(backprop_saliency[0]).T)    
plt.ylabel(' backprop', fontsize=6)
plt.subplot(4,1,2)
visualize.plot_pos_saliency(np.squeeze(guided_saliency[0]).T)    
plt.ylabel(' guided', fontsize=6)
plt.subplot(4,1,3)
visualize.plot_pos_saliency(np.squeeze(stochastic_saliency[0]).T)    
plt.ylabel(' stochastic', fontsize=6)
plt.subplot(4,1,4)
visualize.plot_pos_saliency(np.squeeze(X).T)    
plt.axis('off')