In [17]:
from __future__ import division, print_function

In [18]:
from seqdataloader.batchproducers import coordbased
import gzip
import numpy as np
class SiameseAugmenter(coordbased.coordbatchtransformers.AbstractCoordBatchTransformer): 
  def __call__(self, coords): 
    return [x.get_revcomp() for x in coords]

In [19]:
class ColsInBedFile(
    coordbased.coordstovals.core.AbstractSingleNdarrayCoordsToVals):
    def __init__(self, gzipped_bed_file, **kwargs):
        super(ColsInBedFile, self).__init__(**kwargs)
        self.gzipped_bed_file = gzipped_bed_file
        coords_to_vals = {}
        for row in gzip.open(gzipped_bed_file, 'rb'):
            row = row.decode("utf-8").rstrip()
            split_row = row.split("\t")
            chrom_start_end = split_row[0]+":"+split_row[1]+"-"+split_row[2]
            vals = np.array([float(x) for x in split_row[4:]])
            coords_to_vals[chrom_start_end] = vals
        self.coords_to_vals = coords_to_vals
        
    def _get_ndarray(self, coors):
        to_return = []
        for coor in coors:
            chrom_start_end = (coor.chrom+":"
                               +str(coor.start)+"-"+str(coor.end))
            to_return.append(self.coords_to_vals[chrom_start_end])
        return np.array(to_return)
    
    
inputs_coordstovals = coordbased.coordstovals.fasta.PyfaidxCoordsToVals(
  genome_fasta_path='/mnt/data/annotations/by_release/hg38/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta',
  center_size_to_use=1000)

targets_coordstovals = ColsInBedFile(
       gzipped_bed_file="summits_with_signal.bed.gz")
            
keras_train_batch_generator = coordbased.core.KerasBatchGenerator(
    coordsbatch_producer=coordbased.coordbatchproducers.SimpleCoordsBatchProducer(
      bed_file="train_summits_with_signal.bed.gz",
      batch_size=64,
      shuffle_before_epoch=True,
      seed=1234
    ),
    inputs_coordstovals=inputs_coordstovals,
    targets_coordstovals=targets_coordstovals
)

keras_valid_batch_generator = coordbased.core.KerasBatchGenerator(
    coordsbatch_producer = coordbased.coordbatchproducers.SimpleCoordsBatchProducer(
        bed_file="valid_summits_with_signal.bed.gz",
        batch_size=64, 
        shuffle_before_epoch=True, 
        seed=1234
    ),
    inputs_coordstovals=inputs_coordstovals, 
    targets_coordstovals=targets_coordstovals
)

keras_test_batch_generator = coordbased.core.KerasBatchGenerator(
    coordsbatch_producer = coordbased.coordbatchproducers.SimpleCoordsBatchProducer(
        bed_file="test_summits_with_signal.bed.gz", 
        batch_size = 64, 
        shuffle_before_epoch = True, 
        seed = 1234
    ), 
    inputs_coordstovals = inputs_coordstovals, 
    targets_coordstovals = targets_coordstovals
)

In [20]:
y_test = np.array([val for batch in keras_test_batch_generator for val in batch[1]], dtype = 'float32') 

In [21]:
import keras 
import keras_genomics
import numpy as np
import keras.layers as k1

from keras import backend as K 
from keras.layers.core import Dropout 
from keras.layers.core import Flatten
from keras.layers import Input
from keras.engine import Layer
from keras.models import Sequential 
from keras.engine.base_layer import InputSpec
from keras.models import Model
from keras.models import load_model

In [22]:
kernel_size = 15
filters= 15
input_length = 1000

from numpy.random import seed
from tensorflow import set_random_seed
from keras.callbacks import EarlyStopping, History, ModelCheckpoint

seed_num = 6000
seed(seed_num)
set_random_seed(seed_num)

In [23]:
class RevComp(Layer): 
    def __init__(self, **kwargs): 
      super(RevComp, self).__init__(**kwargs)

    def build(self, input_shape):
      super(RevComp, self).build(input_shape)

    def call(self, inputs): 
      return inputs[:,::-1,::-1]
      
    def compute_output_shape(self, input_shape):
      return input_shape

In [24]:
from keras import backend as K 
from keras.layers.core import Dropout 
from keras.layers.core import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers import Input
from keras.engine import Layer
from keras.models import Sequential 
from keras.engine.base_layer import InputSpec
from keras.models import Model
from keras.models import load_model
from keras.initializers import Initializer
from keras.utils import conv_utils
from scipy.stats import spearmanr

In [25]:
import os 
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [26]:
class AveragePool(Initializer): 
    def __call__(self, shape, dtype = None): 
        print(shape[0])
        return K.constant(1/(shape[0]), shape=shape, dtype=dtype)

class WeightDistConv(Conv1D): 
    def __init__(self, filters,
                kernel_size, 
                strides = 1, 
                padding = 'valid', 
                data_format = 'channels_last',
                dilation_rate = 1, 
                activation = None, 
                use_bias = False, 
                kernel_initializer = AveragePool(), 
                bias_initializer = 'zeros', 
                kernel_regularizer = None, 
                bias_regularizer = None, 
                activity_regularizer = None, 
                kernel_constraint = None,
                bias_constraint = None, 
                **kwargs): 
        super(WeightDistConv, self).__init__(
            filters=filters, 
            kernel_size=kernel_size, 
            strides = strides, 
            padding=padding,
            data_format=data_format,
            dilation_rate=dilation_rate,
            activation=activation,
            use_bias=False,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            **kwargs) 


    def build(self, input_shape): 
        self.bias = None
        self.filters = input_shape[-1]
        if self.data_format == 'channels_first':
            channel_axis = 1
        else:
            channel_axis = -1
        if input_shape[channel_axis] is None:
            raise ValueError('The channel dimension of the inputs '
                             'should be defined. Found `None`.')
        input_dim = input_shape[channel_axis]
        kernel_shape = self.kernel_size + (self.filters,)
        self.kernel = self.add_weight(shape=kernel_shape,
                                        initializer = self.kernel_initializer, 
                                        name ='kernel',
                                        regularizer = self.kernel_regularizer, 
                                        constraint = self.kernel_constraint)

        self.input_spec = InputSpec(ndim=3,
                                    axes={channel_axis: input_dim})
        self.num_input_channels = input_shape[1]
        self.built = True
       
      
    #Layer's logic
    def call(self, inputs):
        result = []
        for x in range(self.kernel_size[0]): 
            result.append((self.kernel[x][:,None]*K.eye(self.filters))[None,:,:])

        curr_kernel = K.concatenate(result, axis = 0)
        print("curr kernel: ", curr_kernel)
        outputs = K.conv1d(inputs, curr_kernel,
                         strides=self.strides[0],
                         padding=self.padding,
                         data_format=self.data_format,
                         dilation_rate=self.dilation_rate[0])

        if (self.activation is not None):
            outputs = self.activation(outputs)

        return outputs
  
    def compute_output_shape(self, input_shape):
        length = conv_utils.conv_output_length(input_length = self.num_input_channels, 
                                               filter_size = self.filters,
                                               padding=self.padding,
                                               stride=self.strides[0])
        return (input_shape[0],length, self.filters)

In [None]:
s_model = Sequential([
    k1.Conv1D(filters=filters, kernel_size=kernel_size,
            input_shape=keras_train_batch_generator[0][0].shape[1:], padding="same"), 
    k1.core.Activation("relu"),
    k1.Conv1D(filters=filters, kernel_size=kernel_size,
              padding="same"), 
    k1.core.Activation("relu"),
    k1.Conv1D(filters=filters, kernel_size=kernel_size,
              padding="same"), 
    k1.core.Activation("relu"),
    WeightDistConv(kernel_size = 40, strides = 40, input_shape = keras_train_batch_generator[0][0].shape[1:], 
                  padding = "same"),
    k1.pooling.MaxPooling1D(pool_size=40,padding="same",
                                               strides=40), 
    Flatten(), 
    k1.Dense(units = 100, activation = "relu"),
    k1.Dense(units = 1)
], name = "shared_layers")

s_model.summary()
main_input = Input(shape=keras_train_batch_generator[0][0].shape[1:])
rev_input = Input(shape=keras_train_batch_generator[0][0].shape[1:])

rev_input = RevComp()(main_input)

main_output = s_model(main_input)
rev_output = s_model(rev_input)

avg = k1.Average()([main_output, rev_output])
siamese_model = Model(inputs = main_input, outputs = avg)

merged = keras.layers.concatenate([main_output, rev_output])
                                  
siamese_model.compile(optimizer="adam", loss="mean_squared_error")
early_stopping_callback = keras.callbacks.EarlyStopping(
                              monitor='val_loss',
                              patience= 60,
                              restore_best_weights=True)
siamese_model.fit_generator(generator= keras_train_batch_generator, 
                           epochs=300, callbacks=[early_stopping_callback],
                           validation_data=keras_valid_batch_generator)
siamese_model.set_weights(early_stopping_callback.best_weights)  

siamese_filename = ('siamese_%s.h5' % seed_num, str(seed_num))[0]
siamese_model.save(siamese_filename)
custom_objects = {"RevComp":RevComp}
siamese_model_final = load_model(siamese_filename, custom_objects)

W0728 16:38:36.382161 139663289767680 deprecation_wrapper.py:119] From /users/hannahgz/anaconda3/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_4 (Conv1D)            (None, 1000, 15)          915       
_________________________________________________________________
activation_4 (Activation)    (None, 1000, 15)          0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 1000, 15)          3390      
_________________________________________________________________
activation_5 (Activation)    (None, 1000, 15)          0         
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 1000, 15)          3390      
_________________________________________________________________
activation_6 (Activation)    (None, 1000, 15)          0         
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 25, 15)            0         
__________

W0728 16:38:36.697080 139663289767680 deprecation_wrapper.py:119] From /users/hannahgz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

W0728 16:38:36.810040 139663289767680 deprecation_wrapper.py:119] From /users/hannahgz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.



Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
 11/570 [..............................] - ETA: 1:11 - loss: 4346.0338

In [None]:
s_model_dropout = Sequential([
    k1.Conv1D(filters=filters, kernel_size=kernel_size,
            input_shape=keras_train_batch_generator[0][0].shape[1:], padding="same"), 
    k1.core.Activation("relu"),
    k1.Dropout(0.2)
    k1.Conv1D(filters=filters, kernel_size=kernel_size,
              padding="same"), 
    k1.core.Activation("relu"),
    k1.Dropout(0.2)
    k1.Conv1D(filters=filters, kernel_size=kernel_size,
              padding="same"), 
    k1.core.Activation("relu"),
    k1.pooling.MaxPooling1D(pool_size=40,padding="same",
                                               strides=40), 
    Flatten(), 
    k1.Dense(units = 100, activation = "relu"),
    k1.Dense(units = 1)
], name = "shared_layers")

main_input = Input(shape=keras_train_batch_generator[0][0].shape[1:])
rev_input = Input(shape=keras_train_batch_generator[0][0].shape[1:])

rev_input = RevComp()(main_input)

main_output = s_model(main_input)
rev_output = s_model(rev_input)

avg = k1.Average()([main_output, rev_output])
siamese_model_dropout = Model(inputs = main_input, outputs = avg)

merged = keras.layers.concatenate([main_output, rev_output])
                                  
siamese_model_dropout.compile(optimizer="adam", loss="mean_squared_error")
early_stopping_callback = keras.callbacks.EarlyStopping(
                              monitor='val_loss',
                              patience= 60,
                              restore_best_weights=True)
siamese_model_dropout.fit_generator(generator= keras_train_batch_generator, 
                           epochs=300, callbacks=[early_stopping_callback],
                           validation_data=keras_valid_batch_generator)
siamese_model_dropout.set_weights(early_stopping_callback.best_weights)  

siamese_filename = ('siamese_dropout_%s.h5' % seed_num, str(seed_num))[0]
siamese_model_dropout.save(siamese_filename)
custom_objects = {"RevComp":RevComp}
siamese_model_final = load_model(siamese_filename, custom_objects)

In [None]:
s_model_spatial_dropout = Sequential([
    k1.Conv1D(filters=filters, kernel_size=kernel_size,
            input_shape=keras_train_batch_generator[0][0].shape[1:], padding="same"), 
    k1.core.Activation("relu"),
    k1.core.SpatialDropout1D(0.2),
    k1.Conv1D(filters=filters, kernel_size=kernel_size,
              padding="same"), 
    k1.core.Activation("relu"),
    k1.core.SpatialDropout1D(0.2),
    k1.Conv1D(filters=filters, kernel_size=kernel_size,
              padding="same"), 
    k1.core.Activation("relu"),
    k1.pooling.MaxPooling1D(pool_size=40,padding="same",
                                               strides=40), 
    Flatten(), 
    k1.Dense(units = 100, activation = "relu"),
    k1.Dense(units = 1)
], name = "shared_layers")

main_input = Input(shape=keras_train_batch_generator[0][0].shape[1:])
rev_input = Input(shape=keras_train_batch_generator[0][0].shape[1:])

rev_input = RevComp()(main_input)

main_output = s_model_spatial_dropout(main_input)
rev_output = s_model_spatial_dropout(rev_input)

avg = k1.Average()([main_output, rev_output])
siamese_model_spatial_dropout = Model(inputs = main_input, outputs = avg)

merged = keras.layers.concatenate([main_output, rev_output])
                                  
siamese_model_spatial_dropout.compile(optimizer="adam", loss="mean_squared_error")
early_stopping_callback = keras.callbacks.EarlyStopping(
                              monitor='val_loss',
                              patience= 60,
                              restore_best_weights=True)
siamese_model_spatial_dropout.fit_generator(generator= keras_train_batch_generator, 
                           epochs=300, callbacks=[early_stopping_callback],
                           validation_data=keras_valid_batch_generator)
siamese_model_spatial_dropout.set_weights(early_stopping_callback.best_weights)  

siamese_filename = ('siamese_spatial_dropout_%s.h5' % seed_num, str(seed_num))[0]
siamese_model_spatial_dropout.save(siamese_filename)

W0730 16:20:48.535155 140274394318592 deprecation_wrapper.py:119] From /users/hannahgz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0730 16:20:48.538982 140274394318592 deprecation_wrapper.py:119] From /users/hannahgz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0730 16:20:48.543577 140274394318592 deprecation_wrapper.py:119] From /users/hannahgz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0730 16:20:48.575086 140274394318592 deprecation_wrapper.py:119] From /users/hannahgz/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300