In [1]:
import os, h5py, pandas as pd, sys
import numpy as np
srcpath = os.path.join(os.getcwd(), '..')
sys.path.append(srcpath)
import tensorflow as tf
from tensorflow import keras as tfk

from paramiko import SSHClient
from scp import SCPClient

from pairwise_conv_1d import PairwiseConv1D, PairwiseFromStdConv1D
from nearest_neighbor_conv_1d import NearestNeighborConv1D, NearestNeighborFromStdConv1D

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
# set up ssh and scp connection 
username, hostname = os.getenv('BAMDEV1').split("@")
ssh = SSHClient()
ssh.load_system_host_keys()
ssh.connect(hostname=hostname, username=username)
remotehome = ssh.exec_command("echo $HOME")[1].readlines()[0].strip()
scp = SCPClient(ssh.get_transport())
REMOTEDATADIR = os.path.join(remotehome, "projects/higher_order_convolutions", 'data', 'deepbind_encode_chipseq')

In [3]:
# download data for a particular tfid
tfid = "POLR2A_K562_Pol2_HudsonAlpha"
print("Downloading data ...")
if not os.path.exists("data.h5"):
    scp.get(remote_path=os.path.join(REMOTEDATADIR, tfid, 'data.h5'))
print("Loading data ...")
data = h5py.File('data.h5', 'r')
x_train = data['X_train'][:]
y_train = data['Y_train'][:]
x_test = data['X_test'][:]
y_test = data['Y_test'][:]
if y_train.ndim == 1:
    y_train = y_train[:, None]
if y_test.ndim == 1:
    y_test = y_test[:, None]
data.close()
!rm data.h5

Downloading data ...
Loading data ...


In [4]:
x = tf.random.normal((2, 50, 4))
diag_regularizer = tfk.regularizers.l2(1e-6)
conv_kwargs = {'filters':15, 'kernel_size':9, 'padding':'same', 'kernel_regularizer':diag_regularizer}
stdconv = tfk.layers.Conv1D(**conv_kwargs)
y = stdconv(x)
print(y.shape, stdconv.losses)

(2, 50, 15) [<tf.Tensor: shape=(), dtype=float32, numpy=6.1221967e-06>]


In [None]:
offdiag_regularizer = tfk.regularizers.l1(1e-2)
pconv = PairwiseFromStdConv1D(stdconv=stdconv, offdiag_regularizer=offdiag_regularizer)
y = pconv(x)
print(y.shape, pconv.losses)

In [None]:
offdiag_regularizer = tfk.regularizers.l1(1e-2)
nnconv = NearestNeighborFromStdConv1D(stdconv=stdconv, offdiag_regularizer=offdiag_regularizer)
y = nnconv(x)
print(y.shape, nnconv.losses)

In [None]:
print(pconv.diag_kernel.shape, pconv.offdiag_kernel.shape)
print(nnconv.diag_kernel.shape, nnconv.offdiag_kernel.shape)

In [None]:
conv_type_dict = {'standard':tfk.layers.Conv1D, 'pairwise':PairwiseConv1D, 'nearest_neighbor':NearestNeighborConv1D}

def cnn25_model(conv_type='standard', kernel_regularizer=tfk.regularizer.l2(1e-6)):
    x = tfk.layers.Input(shape=input_shape)
    
    # 1st conv layer 
    assert conv_type in conv_type_dict
    Conv1D = conv_type_dict[conv_type]
    y = Conv1D(filters=32, kernel_size=19, use_bias=True, padding='same', kernel_regularizer=kernel_regularizer)(x)
    y = tfk.layers.BatchNormalization()(y)
    y = tfk.layers.Activation('relu')(y)
    y = tfk.layers.MaxPool1D(pool_size=25)

# create keras model
return keras.Model(inputs=inputs, outputs=outputs)

In [None]:
def model(input_shape, num_labels, activation='relu', pool_size=[25, 4], 
          units=[32, 128, 512], dropout=[0.2, 0.2, 0.5], 
          bn=[True, True, True], l2=None):
  
    # l2 regularization
    if l2 is not None:
        l2 = keras.regularizers.l2(l2)

    # input layer
    inputs = keras.layers.Input(shape=input_shape)

    # layer 1 - convolution
    use_bias = []
    for status in bn:
        if status:
            use_bias.append(True)
        else:
            use_bias.append(False)

    nn = keras.layers.Conv1D(filters=units[0],
                            kernel_size=19,
                            strides=1,
                            activation=None,
                            use_bias=use_bias[0],
                            padding='same',
                            kernel_regularizer=l2, 
                            )(inputs)
    if bn[0]:
        nn = keras.layers.BatchNormalization()(nn)
    nn = keras.layers.Activation(activation)(nn)
    nn = keras.layers.MaxPool1D(pool_size=pool_size[0])(nn)
    nn = keras.layers.Dropout(dropout[0])(nn)

    # layer 2 - convolution
    nn = keras.layers.Conv1D(filters=units[1],
                            kernel_size=7,
                            strides=1,
                            activation=None,
                            use_bias=use_bias[1],
                            padding='same',
                            kernel_regularizer=l2, 
                            )(nn)  
    if bn[1]:        
    nn = keras.layers.BatchNormalization()(nn)
    nn = keras.layers.Activation('relu')(nn)
    nn = keras.layers.MaxPool1D(pool_size=pool_size[1])(nn)
    nn = keras.layers.Dropout(dropout[1])(nn)

    # layer 3 - Fully-connected 
    nn = keras.layers.Flatten()(nn)
    nn = keras.layers.Dense(units[2],
    activation=None,
    use_bias=use_bias[2],
    kernel_regularizer=l2, 
    )(nn)      
    if bn[2]:
    nn = keras.layers.BatchNormalization()(nn)
    nn = keras.layers.Activation('relu')(nn)
    nn = keras.layers.Dropout(dropout[2])(nn)

    # Output layer
    logits = keras.layers.Dense(num_labels, activation='linear', use_bias=True)(nn)
    outputs = keras.layers.Activation('sigmoid')(logits)

    # create keras model
    return keras.Model(inputs=inputs, outputs=outputs)