In [1]:
import keras 
import keras_genomics
import numpy as np
import keras.layers as k1

from keras import backend as K 
from keras.layers.core import Dropout 
from keras.layers.core import Flatten
from keras.layers import Input
from keras.engine import Layer
from keras.models import Sequential 
from keras.engine.base_layer import InputSpec
from keras.models import Model
from keras.models import load_model
from keras.layers.convolutional import Conv1D
from keras.utils import conv_utils
from matplotlib import pyplot as plt
from scipy.stats import spearmanr

Using TensorFlow backend.


In [16]:
import os 
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [17]:
from keras.initializers import Initializer
def _compute_fans(shape, data_format='channels_last'):
    """Computes the number of input and output units for a weight shape.
    # Arguments
        shape: Integer shape tuple.
        data_format: Image data format to use for convolution kernels.
            Note that all kernels in Keras are standardized on the
            `channels_last` ordering (even when inputs are set
            to `channels_first`).
    # Returns
        A tuple of scalars, `(fan_in, fan_out)`.
    # Raises
        ValueError: in case of invalid `data_format` argument.
    """
    if len(shape) == 2:
        fan_in = shape[0]
        fan_out = shape[1]
    elif len(shape) in {3, 4, 5}:
        # Assuming convolution kernels (1D, 2D or 3D).
        # TH kernel shape: (depth, input_depth, ...)
        # TF kernel shape: (..., input_depth, depth)
        if data_format == 'channels_first':
            receptive_field_size = np.prod(shape[2:])
            fan_in = shape[1] * receptive_field_size
            fan_out = shape[0] * receptive_field_size
        elif data_format == 'channels_last':
            receptive_field_size = np.prod(shape[:-2])
            fan_in = shape[-2] * receptive_field_size
            fan_out = shape[-1] * receptive_field_size
        else:
            raise ValueError('Invalid data_format: ' + data_format)
    else:
        # No specific assumptions.
        fan_in = np.sqrt(np.prod(shape))
        fan_out = np.sqrt(np.prod(shape))
    return fan_in, fan_out

class RevcompVarianceScaling(Initializer):
    def __init__(self, scale=1.0,
                 mode='fan_in',
                 distribution='normal',
                 seed=None):
        if scale <= 0.:
            raise ValueError('`scale` must be a positive float. Got:', scale)
        mode = mode.lower()
        if mode not in {'fan_in', 'fan_out', 'fan_avg'}:
            raise ValueError('Invalid `mode` argument: '
                             'expected on of {"fan_in", "fan_out", "fan_avg"} '
                             'but got', mode)
        distribution = distribution.lower()
        if distribution not in {'normal', 'uniform'}:
            raise ValueError('Invalid `distribution` argument: '
                             'expected one of {"normal", "uniform"} '
                             'but got', distribution)
        self.scale = scale
        self.mode = mode
        self.distribution = distribution
        self.seed = seed

    def __call__(self, shape, dtype=None):
        fan_in, fan_out = _compute_fans(shape)
        fan_out = fan_out*2 #revcomp kernel underestimates fan_out
        print("fanin:",fan_in, "fanout:",fan_out, self.scale, self.mode)
        scale = self.scale
        if self.mode == 'fan_in':
            scale /= max(1., fan_in)
        elif self.mode == 'fan_out':
            scale /= max(1., fan_out)
        else:
            scale /= max(1., float(fan_in + fan_out) / 2)
        if self.distribution == 'normal':
            # 0.879... = scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
            stddev = np.sqrt(scale) / .87962566103423978
            return K.truncated_normal(shape, 0., stddev,
                                      dtype=dtype, seed=self.seed)
        else:
            limit = np.sqrt(3. * scale)
            return K.random_uniform(shape, -limit, limit,
                                    dtype=dtype, seed=self.seed)

    def get_config(self):
        return {
            'scale': self.scale,
            'mode': self.mode,
            'distribution': self.distribution,
            'seed': self.seed
        }

In [18]:
from seqdataloader.batchproducers import coordbased
import gzip
import numpy as np

class ColsInBedFile(
    coordbased.coordstovals.core.AbstractSingleNdarrayCoordsToVals):
    def __init__(self, gzipped_bed_file, **kwargs):
        super(ColsInBedFile, self).__init__(**kwargs)
        self.gzipped_bed_file = gzipped_bed_file
        coords_to_vals = {}
        for row in gzip.open(gzipped_bed_file, 'rb'):
            row = row.decode("utf-8").rstrip()
            split_row = row.split("\t")
            chrom_start_end = split_row[0]+":"+split_row[1]+"-"+split_row[2]
            vals = np.array([float(x) for x in split_row[4:]])
            coords_to_vals[chrom_start_end] = vals
        self.coords_to_vals = coords_to_vals
        
    def _get_ndarray(self, coors):
        to_return = []
        for coor in coors:
            chrom_start_end = (coor.chrom+":"
                               +str(coor.start)+"-"+str(coor.end))
            to_return.append(self.coords_to_vals[chrom_start_end])
        return np.array(to_return)
    
    
inputs_coordstovals = coordbased.coordstovals.fasta.PyfaidxCoordsToVals(
  genome_fasta_path= '/mnt/data/annotations/by_release/hg38/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta',
  center_size_to_use=1000)

targets_coordstovals = ColsInBedFile(
       gzipped_bed_file="summits_with_signal.bed.gz")
            
keras_train_batch_generator = coordbased.core.KerasBatchGenerator(
    coordsbatch_producer=coordbased.coordbatchproducers.SimpleCoordsBatchProducer(
      bed_file="train_summits_with_signal.bed.gz",
      #coord_batch_transformer=coordbased.coordbatchtransformers.ReverseComplementAugmenter(),
      batch_size=64,
      shuffle_before_epoch=True,
      seed=1234
    ),
    inputs_coordstovals=inputs_coordstovals,
    targets_coordstovals=targets_coordstovals
)


keras_valid_batch_generator = coordbased.core.KerasBatchGenerator(
    coordsbatch_producer = coordbased.coordbatchproducers.SimpleCoordsBatchProducer(
        bed_file="valid_summits_with_signal.bed.gz", 
        batch_size=64, 
        shuffle_before_epoch=True, 
        seed=1234
    ),
    inputs_coordstovals=inputs_coordstovals, 
    targets_coordstovals=targets_coordstovals
)

keras_test_batch_generator = coordbased.core.KerasBatchGenerator(
    coordsbatch_producer = coordbased.coordbatchproducers.SimpleCoordsBatchProducer(
        bed_file="test_summits_with_signal.bed.gz", 
        batch_size = 64, 
        shuffle_before_epoch = True, 
        seed = 1234
    ), 
    inputs_coordstovals = inputs_coordstovals, 
    targets_coordstovals = targets_coordstovals
)


keras_train_batch_generator_augment = coordbased.core.KerasBatchGenerator(
    coordsbatch_producer=coordbased.coordbatchproducers.SimpleCoordsBatchProducer(
      bed_file="train_summits_with_signal.bed.gz",
      coord_batch_transformer=coordbased.coordbatchtransformers.ReverseComplementAugmenter(),
      batch_size=128,
      shuffle_before_epoch=True,
      seed=1234
    ),
    inputs_coordstovals=inputs_coordstovals,
    targets_coordstovals=targets_coordstovals
)


keras_valid_batch_generator_augment = coordbased.core.KerasBatchGenerator(
    coordsbatch_producer = coordbased.coordbatchproducers.SimpleCoordsBatchProducer(
        bed_file="valid_summits_with_signal.bed.gz",
        coord_batch_transformer=coordbased.coordbatchtransformers.ReverseComplementAugmenter(),
        batch_size=128, 
        shuffle_before_epoch=True, 
        seed=1234
    ),
    inputs_coordstovals=inputs_coordstovals, 
    targets_coordstovals=targets_coordstovals
)

keras_test_batch_generator_augment = coordbased.core.KerasBatchGenerator(
    coordsbatch_producer = coordbased.coordbatchproducers.SimpleCoordsBatchProducer(
        bed_file="test_summits_with_signal.bed.gz",
        coord_batch_transformer=coordbased.coordbatchtransformers.ReverseComplementAugmenter(),
        batch_size = 128, 
        shuffle_before_epoch = True, 
        seed = 1234
    ), 
    inputs_coordstovals = inputs_coordstovals, 
    targets_coordstovals = targets_coordstovals
)

In [19]:
inputs_coordstovals.ltrdict = {
           'a':[1,0,0,0],'c':[0,1,0,0],'g':[0,0,1,0],'t':[0,0,0,1],
           'n':[0,0,0,0],'A':[1,0,0,0],'C':[0,1,0,0],'G':[0,0,1,0],
           'T':[0,0,0,1],'N':[0,0,0,0],'R': [0.5,0,0.5,0],'Y':[0,0.5,0,0.5]}

In [20]:
y_train = np.array([val for batch in keras_train_batch_generator for val in batch[1]], dtype = 'float32') 

In [21]:
len(y_train)

36436

In [22]:
class RevCompSumPool(Layer): 
    def __init__(self, **kwargs): 
        super(RevCompSumPool, self).__init__(**kwargs)

    def build(self, input_shape):
        self.num_input_chan = input_shape[2]
        super(RevCompSumPool, self).build(input_shape)

    def call(self, inputs): 
        #divide by sqrt 2 for variance preservation
        inputs = (inputs[:,:,:int(self.num_input_chan/2)] + inputs[:,:,int(self.num_input_chan/2):][:,::-1,::-1])/(1.41421356237)
        return inputs
      
    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1], int(input_shape[2]/2))


In [23]:
class RevCompSpatialDropout1D(Dropout): 
    def __init__(self, rate,**kwargs): 
        super(RevCompSpatialDropout1D, self).__init__(rate, **kwargs)
        self.seed = 3
        self.input_spec = InputSpec(ndim = 3)

    def _get_noise_shape(self, inputs): 
        input_shape = K.shape(inputs)
        noise_shape = (input_shape[0], 1, 1, int(self.num_input_chan/2)) 
        return noise_shape
        
    def build(self, input_shape):
        self.num_input_chan = input_shape[2]
        self.input_len = input_shape[1]
        super(RevCompSpatialDropout1D, self).build(input_shape)

    def call(self, inputs, training=None): 
        inputs_fwdandrevconcat = K.concatenate(
                tensors = [
                    inputs[:,:,None,:int(self.num_input_chan/2)],
                    inputs[:,:,None,int(self.num_input_chan/2):][:,:,:,::-1]],
                axis=2)

        if 0. < self.rate < 1.: 
            noise_shape = self._get_noise_shape(inputs)
            def dropped_inputs(): 
                dropped = K.dropout(inputs_fwdandrevconcat,
                                    self.rate, noise_shape, seed = self.seed)
                dropped = K.reshape(dropped, (-1, int(self.input_len), int(self.num_input_chan)))
                return K.concatenate(
                    tensors = [
                        dropped[:,:,:int(self.num_input_chan/2)],
                        dropped[:,:,int(self.num_input_chan/2):][:,:,::-1]],
                    axis=-1)

            return K.in_train_phase(dropped_inputs, inputs, training = training)

        return inputs

In [24]:
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
from tensorflow.python.framework import ops
import numbers
from tensorflow.python.framework import tensor_util
def _get_noise_shape(x, noise_shape):
  # If noise_shape is none return immediately.
  if noise_shape is None:
    return array_ops.shape(x)

  try:
    # Best effort to figure out the intended shape.
    # If not possible, let the op to handle it.
    # In eager mode exception will show up.
    noise_shape_ = tensor_shape.as_shape(noise_shape)
  except (TypeError, ValueError):
    return noise_shape

  if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_.dims):
    new_dims = []
    for i, dim in enumerate(x.shape.dims):
      if noise_shape_.dims[i].value is None and dim.value is not None:
        new_dims.append(dim.value)
      else:
        new_dims.append(noise_shape_.dims[i].value)
    return tensor_shape.TensorShape(new_dims)

  return noise_shape

class MCRCDropout(Layer):
    """Applies MC Dropout to the input.
       The applied noise vector is symmetric to reverse complement symmetry
       Class structure only slightly adapted 
    Dropout consists in randomly setting
    a fraction `rate` of input units to 0 at each update during training time,
    which helps prevent overfitting.
    Remains active ative at test time so sampling is required
    # Arguments
        rate: float between 0 and 1. Fraction of the input units to drop.
        noise_shape: 1D integer tensor representing the shape of the
            binary dropout mask that will be multiplied with the input.
            For instance, if your inputs have shape
            `(batch_size, timesteps, features)` and
            you want the dropout mask to be the same for all timesteps,
            you can use `noise_shape=(batch_size, 1, features)`.
        seed: A Python integer to use as random seed.
    # References
        - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting](http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf)
    """
    def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
        super(MCRCDropout, self).__init__(**kwargs)
        self.rate = min(1., max(0., rate))
        self.noise_shape = noise_shape
        self.seed = seed
        self.supports_masking = True
        
    def build(self, input_shape):
        self.num_input_chan = input_shape[2]
        super(MCRCDropout, self).build(input_shape)

    def _get_noise_shape(self, inputs):
        if self.noise_shape is None:
            return self.noise_shape

        symbolic_shape = K.shape(inputs)
        noise_shape = [symbolic_shape[axis] if shape is None else shape
                       for axis, shape in enumerate(self.noise_shape)]
        return tuple(noise_shape)

    def call(self, inputs, training=None):
        if 0. < self.rate < 1.:
            import numpy as np
            noise_shape = self._get_noise_shape(inputs)
            x = inputs
            seed = self.seed
            keep_prob = 1. - self.rate
            if seed is None:
                seed = np.random.randint(10e6)
            # the dummy 1. works around a TF bug
            # (float32_ref vs. float32 incompatibility)
            x= x*1
            name = None
            with ops.name_scope(name, "dropout", [x]) as name:
                x = ops.convert_to_tensor(x, name="x")
                if not x.dtype.is_floating:
                    raise ValueError("x has to be a floating point tensor since it's going to"
                       " be scaled. Got a %s tensor instead." % x.dtype)
                if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:
                    raise ValueError("keep_prob must be a scalar tensor or a float in the "
                       "range (0, 1], got %g" % keep_prob)
                keep_prob = ops.convert_to_tensor(
                             keep_prob, dtype=x.dtype, name="keep_prob")
                keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

                # Do nothing if we know keep_prob == 1
                if tensor_util.constant_value(keep_prob) == 1:
                    return x

                noise_shape = _get_noise_shape(x, noise_shape)
                # uniform [keep_prob, 1.0 + keep_prob)
                random_tensor = keep_prob
                random_tensor += random_ops.random_uniform(
                noise_shape, seed=seed, dtype=x.dtype)
               
                # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
                binary_tensor = math_ops.floor(random_tensor)
                dim = binary_tensor.shape[2]//2

                symmetric_binary = K.concatenate(
                    tensors = [
                      binary_tensor[:,:,int(self.num_input_chan/2):], 
                      binary_tensor[:,:,int(self.num_input_chan/2):][::,::-1,::-1]], 
                  axis=2)
                ret = math_ops.div(x, keep_prob) * symmetric_binary
                
                return ret


    def get_config(self):
        config = {'rate': self.rate,
                  'noise_shape': self.noise_shape,
                  'seed': self.seed}
        base_config = super(MCRCDropout, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_output_shape(self, input_shape):
        return input_shape

In [25]:
class RevComp(Layer): 
    def __init__(self, **kwargs): 
      super(RevComp, self).__init__(**kwargs)

    def build(self, input_shape):
      super(RevComp, self).build(input_shape)

    def call(self, inputs): 
      return inputs[:,::-1,::-1]
      
    def compute_output_shape(self, input_shape):
      return input_shape

# custom_objects = {'RevComp':RevComp}
# siamese_model_final = load_model('siamese_1000.h5', custom_objects)

In [26]:
class AveragePool(Initializer): 
    def __call__(self, shape, dtype = None): 
        return K.constant(1/(shape[0]), shape=shape, dtype=dtype)

class WeightDistConv(Conv1D): 
    def __init__(self, filters, 
                kernel_size, 
                strides =1, 
                padding = 'valid', 
                data_format = 'channels_last',
                dilation_rate = 1, 
                activation = None, 
                use_bias = False, 
                kernel_initializer = AveragePool(), 
                bias_initializer = 'zeros', 
                kernel_regularizer = None, 
                bias_regularizer = None, 
                activity_regularizer = None, 
                kernel_constraint = None,
                bias_constraint = None, 
                **kwargs): 
        super(WeightDistConv, self).__init__(
            filters=filters, 
            kernel_size=kernel_size, 
            strides = strides, 
            padding=padding,
            data_format=data_format,
            dilation_rate=dilation_rate,
            activation=activation,
            use_bias=False,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            **kwargs) 


    def build(self, input_shape): 
        self.bias = None
        self.filters = input_shape[-1]
        if self.data_format == 'channels_first':
            channel_axis = 1
        else:
            channel_axis = -1
        if input_shape[channel_axis] is None:
            raise ValueError('The channel dimension of the inputs '
                             'should be defined. Found `None`.')
        input_dim = input_shape[channel_axis]
        kernel_shape = self.kernel_size + (self.filters,)
        self.kernel = self.add_weight(shape=kernel_shape,
                                        initializer = self.kernel_initializer, 
                                        name ='kernel',
                                        regularizer = self.kernel_regularizer, 
                                        constraint = self.kernel_constraint)

        self.input_spec = InputSpec(ndim=3,
                                    axes={channel_axis: input_dim})
        self.num_input_channels = input_shape[1]
        self.built = True
       
      
    #Layer's logic
    def call(self, inputs):
        result = []
        for x in range(self.kernel_size[0]): 
            result.append((self.kernel[x][:,None]*K.eye(self.filters))[None,:,:])

        curr_kernel = K.concatenate(result, axis = 0)
#         print("curr kernel: ", curr_kernel)
        outputs = K.conv1d(inputs, curr_kernel,
                         strides=self.strides[0],
                         padding=self.padding,
                         data_format=self.data_format,
                         dilation_rate=self.dilation_rate[0])

        if (self.activation is not None):
            outputs = self.activation(outputs)

        return outputs
  
    def compute_output_shape(self, input_shape):
        length = conv_utils.conv_output_length(input_length = self.num_input_channels, 
                                               filter_size = self.filters,
                                               padding=self.padding,
                                               stride=self.strides[0])
        return (input_shape[0],length, self.filters)

In [27]:
custom_objects = {'RevCompConv1D':keras_genomics.layers.RevCompConv1D,
                  'RevCompSumPool':RevCompSumPool,
                  'RevcompVarianceScaling':RevcompVarianceScaling, 
                  'MCRCDropout':MCRCDropout,
                  'RevCompSpatialDropout1D': RevCompSpatialDropout1D,
                  'RevComp':RevComp,
                  'AveragePool': AveragePool,
                  'WeightDistConv': WeightDistConv}

In [28]:
def plot_and_spearmanr(y_test, name, test_batch_generator): 
    model = load_model(name, custom_objects)
    y_pred = model.predict_generator(test_batch_generator)
    plt.scatter(y_test, y_pred, alpha = 0.1)
    plt.xlabel("True Labels: %s" % name)
    plt.ylabel("Predicted Labels")
    plt.show()
    print(spearmanr(y_test, y_pred))

# Runs dropout during testing time and takes the average
def predict_with_uncertainty(f, x, no_classes, n_iter=100):
    result = np.zeros((n_iter,) + (x.shape[0], no_classes) )

    for i in range(n_iter):
        result[i,:, :] = f((x, 1))[0]

    prediction = result.mean(axis=0)
    uncertainty = result.std(axis=0)
    return prediction

def spearmanr_all(y_test, name, test_batch_generator): 
    model = load_model(name, custom_objects)
    f2 = K.function([model.layers[0].input, K.learning_phase()],
                    [model.layers[-1].output])
    
    y_pred = np.concatenate(np.array([predict_with_uncertainty(f2, test_batch_generator[i][0], 1) for i in range(len(test_batch_generator))]), axis = 0)
    rho, pval = spearmanr(y_test, y_pred)
    return rho

def get_results(filename, y_test, test_batch_generator): 
    result_arr = []
    for i in range(10): 
        seed_num = seed_nums[i]
        result_arr.append([spearmanr_all(y_test, ('/users/hannahgz/revcomp_experiments/SPI1_Results/%s_' % filename + str(seed_num) + '.h5'),
                                      test_batch_generator), rates[i]])
        print(str(result_arr[i][0]) + " " + str(result_arr[i][1]))
    return result_arr

In [29]:
y_test = np.array([val for batch in keras_test_batch_generator for val in batch[1]], dtype = 'float32') 
y_test_augment = np.array([val for batch in keras_test_batch_generator_augment for val in batch[1]], dtype = 'float32')

In [30]:
import json
from abstention.figure_making_utils import wilcox_srs
from matplotlib import pyplot as plt
from scipy.stats import spearmanr

In [32]:
rc_dropout = []
rc_mc_dropout = []
rc_orig_spatial_dropout = []
rc_rc_spatial_dropout = []
siamese_dropout = []
siamese_spatial_dropout = []
def get_results_from_config(name, seed_num): 
    with open("/users/hannahgz/revcomp_experiments/CTCF_Architecture_Results/CTCF_5_Filters_2/config_%s_5_filters_%s.json" % (name, str(seed_num))) as json_file: 
        data = json.load(json_file)
        print(name)
        print("Correlation: ", data["correlation"])
    return data["correlation"]
        
def get_results_model(name, seed_num):
    rho = spearmanr_all(y_test, ("/users/hannahgz/revcomp_experiments/CTCF_Architecture_Results/CTCF_5_Filters/%s_5_filters_%s.h5" % (name, str(seed_num))), keras_test_batch_generator)
    print(name)
    print("Correlation: ", rho)

get_results_from_config("augment_dropout", 5068)
get_results_from_config("augment_spatial_dropout", 9337)
rc_dropout.append(get_results_from_config("rc_dropout", 8157))
rc_mc_dropout.append(get_results_from_config("rc_mc_dropout", 1976))
rc_orig_spatial_dropout.append(get_results_model("rc_orig_spatial_dropout", 7975))
rc_rc_spatial_dropout.append(get_results_from_config("rc_rc_spatial_dropout", 8011))
get_results_from_config("reg_dropout", 6011)
get_results_from_config("reg_spatial_dropout", 2079)
siamese_dropout.append(get_results_from_config("siamese_dropout", 4563))
siamese_spatial_dropout.append(get_results_from_config("siamese_spatial_dropout", 8844))

augment_dropout
Correlation:  0.579961855577644
augment_spatial_dropout
Correlation:  0.6082566515197448
rc_dropout
Correlation:  0.629177295527509
rc_mc_dropout
Correlation:  0.6104919234604967
rc_orig_spatial_dropout
Correlation:  0.6504447102627249
rc_rc_spatial_dropout
Correlation:  0.6231500735506831
reg_dropout
Correlation:  0.5678962597791517
reg_spatial_dropout
Correlation:  0.5610326067089133
siamese_dropout
Correlation:  0.6218975446824649
siamese_spatial_dropout
Correlation:  0.6098560370986797


In [33]:
def get_results_from_config(name, seed_num): 
    with open("/users/hannahgz/revcomp_experiments/CTCF_Architecture_Results/CTCF_5_Filters_2/config_%s_5_filters_%s.json" % (name, str(seed_num))) as json_file: 
        data = json.load(json_file)
        print(name)
        print("Correlation: ", data["correlation"])
    return data["correlation"]
rc_dropout.append(get_results_from_config("rc_dropout", 5524))
rc_mc_dropout.append(get_results_from_config("rc_mc_dropout", 2958))
rc_orig_spatial_dropout.append(get_results_from_config("rc_orig_spatial_dropout", 5546))
rc_rc_spatial_dropout.append(get_results_from_config("rc_rc_spatial_dropout", 3730))
siamese_dropout.append(get_results_from_config("siamese_dropout", 7197))
siamese_spatial_dropout.append(get_results_from_config("siamese_spatial_dropout", 8944))

rc_dropout
Correlation:  0.6258305461356328
rc_mc_dropout
Correlation:  0.6141984098836304
rc_orig_spatial_dropout
Correlation:  0.6480089182456751
rc_rc_spatial_dropout
Correlation:  0.6341481393791699
siamese_dropout
Correlation:  0.5947071600795995
siamese_spatial_dropout
Correlation:  0.6280957386437396


In [34]:
def get_results_from_config(name, seed_num): 
    with open("/users/hannahgz/revcomp_experiments/CTCF_Architecture_Results/CTCF_5_Filters_3/config_%s_5_filters_%s.json" % (name, str(seed_num))) as json_file: 
        data = json.load(json_file)
        print(name)
        print("Correlation: ", data["correlation"])
    return data["correlation"]
rc_dropout.append(get_results_from_config("rc_dropout", 4675))
rc_mc_dropout.append(get_results_from_config("rc_mc_dropout", 5887))
rc_orig_spatial_dropout.append(get_results_from_config("rc_orig_spatial_dropout", 2451))
rc_rc_spatial_dropout.append(get_results_from_config("rc_rc_spatial_dropout", 9243))
siamese_dropout.append(get_results_from_config("siamese_dropout", 1365))
siamese_spatial_dropout.append(get_results_from_config("siamese_spatial_dropout", 4979))


rc_dropout
Correlation:  0.5971730699330292
rc_mc_dropout
Correlation:  0.6254929606793849
rc_orig_spatial_dropout
Correlation:  0.6040033839985453
rc_rc_spatial_dropout
Correlation:  0.6452457711437067
siamese_dropout
Correlation:  0.619605657740282
siamese_spatial_dropout
Correlation:  0.6461858878005585


In [35]:
def get_results_from_config(name, seed_num): 
    with open("/users/hannahgz/revcomp_experiments/CTCF_Architecture_Results/CTCF_5_Filters_4/config_%s_5_filters_%s.json" % (name, str(seed_num))) as json_file: 
        data = json.load(json_file)
        print(name)
        print("Correlation: ", data["correlation"])
    return data["correlation"]
rc_dropout.append(get_results_from_config("rc_dropout", 2646))
rc_mc_dropout.append(get_results_from_config("rc_mc_dropout", 1438))
rc_orig_spatial_dropout.append(get_results_from_config("rc_orig_spatial_dropout", 1652))
rc_rc_spatial_dropout.append(get_results_from_config("rc_rc_spatial_dropout", 8558))
siamese_dropout.append(get_results_from_config("siamese_dropout", 4677))
siamese_spatial_dropout.append(get_results_from_config("siamese_spatial_dropout", 7438))

rc_dropout
Correlation:  0.6133529275435395
rc_mc_dropout
Correlation:  0.6376209246386088
rc_orig_spatial_dropout
Correlation:  0.5928856304570961
rc_rc_spatial_dropout
Correlation:  0.6424079840878698
siamese_dropout
Correlation:  0.6188316253446644
siamese_spatial_dropout
Correlation:  0.6398062393482135


In [4]:
rc_dropout = []
rc_mc_dropout = []
rc_orig_spatial_dropout = []
rc_rc_spatial_dropout = []
import json
def get_results_from_config(name, seed_num): 
    with open("/users/hannahgz/revcomp_experiments/CTCF_Architecture_Results/CTCF_5_Filters_5/config_%s_5_filters_%s.json" % (name, str(seed_num))) as json_file: 
        data = json.load(json_file)
        print(name)
        print("Correlation: ", data["correlation"])
    return data["correlation"]
rc_dropout.append(get_results_from_config("rc_dropout", 4330))
rc_mc_dropout.append(get_results_from_config("rc_mc_dropout", 6721))
rc_orig_spatial_dropout.append(get_results_from_config("rc_orig_spatial_dropout", 6235))
rc_rc_spatial_dropout.append(get_results_from_config("rc_rc_spatial_dropout", 4305))
# siamese_dropout.append(get_results_from_config("siamese_dropout", 4677))
# siamese_spatial_dropout.append(get_results_from_config("siamese_spatial_dropout", 7438))

rc_dropout
Correlation:  0.6305932362750384
rc_mc_dropout
Correlation:  0.6451284228949195
rc_orig_spatial_dropout
Correlation:  0.647927720166936
rc_rc_spatial_dropout
Correlation:  0.6388134758891323


In [39]:
rc_dropout

[0.629177295527509, 0.6258305461356328, 0.5971730699330292, 0.6133529275435395]

In [40]:
rc_mc_dropout

[0.6104919234604967,
 0.6141984098836304,
 0.6254929606793849,
 0.6376209246386088]

In [41]:
rc_orig_spatial_dropout

[None, 0.6480089182456751, 0.6040033839985453, 0.5928856304570961]

In [42]:
rc_rc_spatial_dropout

[0.6231500735506831,
 0.6341481393791699,
 0.6452457711437067,
 0.6424079840878698]

In [43]:
siamese_dropout

[0.6218975446824649, 0.5947071600795995, 0.619605657740282, 0.6188316253446644]

In [44]:
siamese_spatial_dropout

[0.6098560370986797,
 0.6280957386437396,
 0.6461858878005585,
 0.6398062393482135]

In [31]:
augment_dropout_SPI1 = get_results('augment_dropout/augment_dropout', y_test_augment, keras_test_batch_generator_augment)
augment_spatial_dropout_SPI1 = get_results('augment_spatial_dropout/augment_spatial_dropout', y_test_augment, keras_test_batch_generator_augment)
rc_dropout_SPI1 = get_results('rc_dropout/rc_dropout', y_test, keras_test_batch_generator)
rc_mc_dropout_SPI1 = get_results('rc_mc_dropout/rc_mc_dropout', y_test, keras_test_batch_generator)
rc_orig_spatial_dropout_SPI1 = get_results('rc_orig_spatial_dropout/rc_orig_spatial_dropout', y_test, keras_test_batch_generator)
rc_rc_spatial_droput_SPI1 = get_results('rc_rc_spatial_dropout/rc_rc_spatial_dropout', y_test, keras_test_batch_generator)
reg_dropout_SPI1 = get_results('reg_dropout/reg_dropout', y_test, keras_test_batch_generator)