In [15]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os, sys
import numpy as np
import helper
import tensorflow as tf
from deepomics import neuralnetwork as nn
from deepomics import utils, fit


all_models = ['DistNet', 'LocalNet', 'DeepBind', 'StandardNet']
num_models = len(all_models)
dropout_status = [True, True, 	False, 	False, 	False, True,  True,  False]
l2_status = 	 [True, False, 	True, 	False, 	False, True,  False, True]
bn_status = 	 [True, False, 	False, 	True, 	False, False, True,  True]
num_reg = len(dropout_status)

Names = []
# loop through models
for t, model_name in enumerate(all_models): 
    #Names[t] = []
    #loop through every regularization type
    modelsreg = []
    for i in range(len(dropout_status)):

        # compile neural trainer
        name = model_name
        if dropout_status[i]:
            name += '_do'
        if l2_status[i]:
            name += '_l2'
        if bn_status[i]:
            name += '_bn'
        modelsreg.append(name)
        #Names[t].append(name)
    Names.append(modelsreg)

In [18]:
import pandas as pd
pd.DataFrame(Names)

Unnamed: 0,0,1,2,3,4,5,6,7
0,DistNet_do_l2_bn,DistNet_do,DistNet_l2,DistNet_bn,DistNet,DistNet_do_l2,DistNet_do_bn,DistNet_l2_bn
1,LocalNet_do_l2_bn,LocalNet_do,LocalNet_l2,LocalNet_bn,LocalNet,LocalNet_do_l2,LocalNet_do_bn,LocalNet_l2_bn
2,DeepBind_do_l2_bn,DeepBind_do,DeepBind_l2,DeepBind_bn,DeepBind,DeepBind_do_l2,DeepBind_do_bn,DeepBind_l2_bn
3,StandardNet_do_l2_bn,StandardNet_do,StandardNet_l2,StandardNet_bn,StandardNet,StandardNet_do_l2,StandardNet_do_bn,StandardNet_l2_bn


In [19]:
analysis_idx = {}
for m, model in enumerate(Names):
    for r, reg in enumerate(model):
        analysis_idx[reg] = [m,r]

In [20]:
analysis_idx

{'DeepBind': [2, 4],
 'DeepBind_bn': [2, 3],
 'DeepBind_do': [2, 1],
 'DeepBind_do_bn': [2, 6],
 'DeepBind_do_l2': [2, 5],
 'DeepBind_do_l2_bn': [2, 0],
 'DeepBind_l2': [2, 2],
 'DeepBind_l2_bn': [2, 7],
 'DistNet': [0, 4],
 'DistNet_bn': [0, 3],
 'DistNet_do': [0, 1],
 'DistNet_do_bn': [0, 6],
 'DistNet_do_l2': [0, 5],
 'DistNet_do_l2_bn': [0, 0],
 'DistNet_l2': [0, 2],
 'DistNet_l2_bn': [0, 7],
 'LocalNet': [1, 4],
 'LocalNet_bn': [1, 3],
 'LocalNet_do': [1, 1],
 'LocalNet_do_bn': [1, 6],
 'LocalNet_do_l2': [1, 5],
 'LocalNet_do_l2_bn': [1, 0],
 'LocalNet_l2': [1, 2],
 'LocalNet_l2_bn': [1, 7],
 'StandardNet': [3, 4],
 'StandardNet_bn': [3, 3],
 'StandardNet_do': [3, 1],
 'StandardNet_do_bn': [3, 6],
 'StandardNet_do_l2': [3, 5],
 'StandardNet_do_l2_bn': [3, 0],
 'StandardNet_l2': [3, 2],
 'StandardNet_l2_bn': [3, 7]}

In [28]:
# save path
results_path = '../results'
params_path = utils.make_directory(results_path, 'model_params')

# dataset path
data_path = '../data/Synthetic_dataset.h5'

# load dataset
train, valid, test = helper.load_synthetic_dataset(data_path)
#Get the indices of correctly labelled sequences
right_index = np.where(train['targets'][:,0]==1)[0]
num_pos = len(right_index)

loading training data
loading cross-validation data
loading test data


In [34]:
np.zeros((2,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os, sys
import numpy as np
import helper
import tensorflow as tf
from deepomics import neuralnetwork as nn
from deepomics import utils, fit

#------------------------------------------------------------------------------------------------

all_models = ['DistNet', 'LocalNet', 'DeepBind', 'StandardNet']
dropout_status = [True, True, 	False, 	False, 	False, True,  True,  False]
l2_status = 	 [True, False, 	True, 	False, 	False, True,  False, True]
bn_status = 	 [True, False, 	False, 	True, 	False, False, True,  True]

# save path
results_path = '../results'
params_path = utils.make_directory(results_path, 'model_params')

# dataset path
data_path = '../data/Synthetic_dataset.h5'

# load dataset
train, valid, test = helper.load_synthetic_dataset(data_path)

# get data shapes
input_shape = list(train['inputs'].shape)
input_shape[0] = None


for i in range(len(dropout_status)):

	# loop through models
	for model_name in all_models:
		tf.reset_default_graph()
		print('model: ' + model_name)

		# compile neural trainer
		name = model_name
		if dropout_status[i]:
			name += '_do'
		if l2_status[i]:
			name += '_l2'
		if bn_status[i]:
			name += '_bn'
		model_path = utils.make_directory(params_path, model_name)
		file_path = os.path.join(model_path, name)

		# load model parameters
		model_layers, optimization, _ = helper.load_model(model_name, 
														  input_shape,
														  dropout_status[i], 
														  l2_status[i], 
														  bn_status[i])

		# build neural network class
		nnmodel = nn.NeuralNet(seed=247)
		nnmodel.build_layers(model_layers, optimization, supervised=True)

		nntrainer = nn.NeuralTrainer(nnmodel, save='best', file_path=file_path)

		# initialize session
		sess = utils.initialize_session()

		# set data in dictionary
		data = {'train': train, 'valid': valid, 'test': test}
		fit.train_minibatch(sess, nntrainer, data, batch_size=100, num_epochs=100, 
							patience=100, verbose=2, shuffle=True, save_all=False)	

loading training data
loading cross-validation data
loading test data
model: DistNet
Epoch 1 out of 100 
 
  valid loss:		0.71985
  valid accuracy:	0.50133+/-0.00000
  valid auc-roc:	0.50517+/-0.00000
  valid auc-pr:		0.50299+/-0.00000
  lower cross-validation found
  saving model to:  ../results/model_params/DistNet/DistNet_do_l2_bn_best.ckpt
Epoch 2 out of 100 
 
  valid loss:		0.69978
  valid accuracy:	0.50200+/-0.00000
  valid auc-roc:	0.49576+/-0.00000
  valid auc-pr:		0.49182+/-0.00000
  lower cross-validation found
  saving model to:  ../results/model_params/DistNet/DistNet_do_l2_bn_best.ckpt
Epoch 3 out of 100 
 
  valid loss:		0.69660
  valid accuracy:	0.50300+/-0.00000
  valid auc-roc:	0.50205+/-0.00000
  valid auc-pr:		0.49346+/-0.00000
  lower cross-validation found
  saving model to:  ../results/model_params/DistNet/DistNet_do_l2_bn_best.ckpt
Epoch 4 out of 100 
 
  valid loss:		0.69672
  valid accuracy:	0.50267+/-0.00000
  valid auc-roc:	0.49162+/-0.00000
  valid auc-pr:	

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os, sys
import numpy as np
import helper
import tensorflow as tf
from deepomics import neuralnetwork as nn
from deepomics import utils, fit, visualize, saliency, metrics
import helper
sys.path.append('../..')
import mutagenesisfunctions as mf

#-----------------------------------------------------------------------------------------------

def fom_heatmap(X, layer, alphabet, nntrainer, sess, eps=1e-7):
    
    def mutate(sequence, seq_length, dims):
        num_mutations = seq_length * dims
        hotplot_mutations = np.zeros((num_mutations,seq_length,1,dims)) 

        for position in range(seq_length):
            for nuc in range(dims):
                mut_seq = np.copy(sequence)          
                mut_seq[0, position, 0, :] = np.zeros(dims)
                mut_seq[0, position, 0, nuc] = 1.0

                hotplot_mutations[(position*dims)+nuc] = mut_seq
        return hotplot_mutations

    #first mutate the sequence
    X_mut = mutate(X, X.shape[1], X.shape[3])

    #take all the mutations and assign them into a dict for deepomics
    mutations = {'inputs': X_mut, 'targets': np.ones((X_mut.shape[0], 1))}
    #Get output or logits activations for the mutations
    mut_predictions = nntrainer.get_activations(sess, mutations, layer=layer)

    #take the WT and put it into a dict for deepomics
    WT = {'inputs': X, 'targets': np.ones((X.shape[0], 1))}
    #Get output or logits activations for the WT sequence
    predictions = nntrainer.get_activations(sess, WT, layer=layer)

    #shape the predictions of the mutations into the shape of a heatmap
    heat_mut = mut_predictions.reshape(X.shape[1],4).T
    
    #normalize the heat map rearrangement by minusing it by the true prediction score of that test sequence
    #norm_heat_mut = heat_mut - predictions[0] + eps
    #norm_heat_mut = utils.normalize_pwm(norm_heat_mut, factor=4)

    return (heat_mut)
#-----------------------------------------------------------------------------------------------

'''
This script outputs the First Order Mutagenesis scores for every model and every regularization mode over every positively labelled sequence.
It outputs a single np.array with the shape (num_models, num_reg, num_pos, dims, seqlen)

It will also output an array in the shape (num_models, num_reg) showing the name of each model for ease of indexing into the larger model.

'''
#------------------------------------------------------------------------------------------------

all_models = ['DistNet', 'LocalNet', 'DeepBind', 'StandardNet']
num_models = len(all_models) # number of models
dropout_status = [True, True, 	False, 	False, 	False, True,  True,  False]
l2_status = 	 [True, False, 	True, 	False, 	False, True,  False, True]
bn_status = 	 [True, False, 	False, 	True, 	False, False, True,  True]
num_reg = len(dropout_status) # number of regularization types

# save path
results_path = '../results'
params_path = utils.make_directory(results_path, 'model_params')

# dataset path
data_path = '../data/Synthetic_dataset.h5'

# load dataset
train, valid, test = helper.load_synthetic_dataset(data_path)
#Get the indices of correctly labelled sequences
right_index = np.where(test['targets'][:,0]==1)[0]
num_pos = len(right_index) # number of positively labelled sequences

# get data shapes
input_shape = list(train['inputs'].shape)
input_shape[0] = None

_, seqlen, _, dims = test['inputs'].shape # the length of each sequence and the number of dimensions

#initialize an array to hold the FoM results
full_fom_predictions = np.zeros((num_models, num_reg, num_pos, dims, seqlen))

# loop through models
for m, model_name in enumerate(all_models):

    #loop through every regularization type
    for r in range(len(dropout_status)):
        tf.reset_default_graph()
        print('model: ' + model_name)

        # compile neural trainer
        name = model_name
        if dropout_status[i]:
            name += '_do'
        if l2_status[i]:
            name += '_l2'
        if bn_status[i]:
            name += '_bn'
        model_path = utils.make_directory(params_path, model_name)
        file_path = os.path.join(model_path, name)

        # load model parameters
        model_layers, optimization, _ = helper.load_model(model_name, 
                                                          input_shape,
                                                          dropout_status[i], 
                                                          l2_status[i], 
                                                          bn_status[i])
        # build neural network class
        nnmodel = nn.NeuralNet(seed=247)
        nnmodel.build_layers(model_layers, optimization, supervised=True)

        # compile neural trainers
        nntrainer = nn.NeuralTrainer(nnmodel, save='best', file_path=file_path)

        # initialize session
        sess = utils.initialize_session()

        # load best parameters
        nntrainer.set_best_parameters(sess)
        
        for p, index in enumerate(right_index):
            #Extract a sequence and perform First Order Mutagenesis
            X = np.expand_dims(test['inputs'][index], axis=0)
            
            full_fom_predictions[m, r, p, :, :] = fom_heatmap(X, 'output', 'dna', nntrainer, sess)


        
        
        


loading training data
loading cross-validation data
loading test data
model: DistNet
loading model from:  ../results/model_params/DistNet/DistNet_l2_bn_best.ckpt
INFO:tensorflow:Restoring parameters from ../results/model_params/DistNet/DistNet_l2_bn_best.ckpt
model: DistNet
loading model from:  ../results/model_params/DistNet/DistNet_l2_bn_best.ckpt
INFO:tensorflow:Restoring parameters from ../results/model_params/DistNet/DistNet_l2_bn_best.ckpt
model: DistNet
loading model from:  ../results/model_params/DistNet/DistNet_l2_bn_best.ckpt
INFO:tensorflow:Restoring parameters from ../results/model_params/DistNet/DistNet_l2_bn_best.ckpt
model: DistNet
loading model from:  ../results/model_params/DistNet/DistNet_l2_bn_best.ckpt
INFO:tensorflow:Restoring parameters from ../results/model_params/DistNet/DistNet_l2_bn_best.ckpt
model: DistNet
loading model from:  ../results/model_params/DistNet/DistNet_l2_bn_best.ckpt
INFO:tensorflow:Restoring parameters from ../results/model_params/DistNet/Dist

NameError: name 'h5py' is not defined

In [8]:
import h5py

#save the array
save_path = utils.make_directory(results_path, 'FoM_full_analysis')
hdf5path = os.path.join(save_path, 'full_fom_predictions'+'.hdf5')
with h5py.File(hdf5path, 'w') as f:
    f.create_dataset('full_fom', data=full_fom_predictions.astype(np.float32), compression='gzip')

        

making directory: ../results/FoM_full_analysis


In [9]:
data_path = hdf5path
with h5py.File(data_path, 'r') as dataset:
    full_fom = np.array(dataset['full_fom'])

In [10]:
full_fom.shape

(4, 8, 3030, 4, 200)