In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import time

In [2]:
import sys
sys.path.append("../")
import fastISM

In [3]:
!nvidia-smi

Mon Jan 31 17:37:20 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  A100-SXM4-40GB      Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    50W / 400W |      0MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
!nvidia-smi -L

GPU 0: A100-SXM4-40GB (UUID: GPU-27107bad-326f-92f9-7edd-df3591f33254)


In [5]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Mon_May__3_19:15:13_PDT_2021
Cuda compilation tools, release 11.3, V11.3.109
Build cuda_11.3.r11.3/compiler.29920130_0


In [11]:
!cat /usr/local/cuda/include/cudnn_version.h | grep CUDNN_MAJOR -A 2

#define CUDNN_MAJOR 8
#define CUDNN_MINOR 2
#define CUDNN_PATCHLEVEL 0
--
#define CUDNN_VERSION (CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100 + CUDNN_PATCHLEVEL)

#endif /* CUDNN_VERSION_H */


In [6]:
# get test seqs (randomly sampled from hg38 chr1-22)
# !wget http://mitra.stanford.edu/kundaje/surag/fastISM/test_long.seq.npy

In [12]:
seqs = np.load("test_long.seq.npy")
seqs = np.hstack([seqs,seqs,seqs,seqs,seqs,seqs])
seqs.shape

(100, 1179648, 4)

In [13]:
def time_ism(ism_model, batch_sizes, seqlen):
    times = []
    per_100 = []
    for b in batch_sizes:

        # dry run -- required as first batch slower for setting up
        #            and variable batch sizes (due to varying number
        #            of seqs that need to be mutated at a position) 
        #            also slows down first call
        # x = np.random.random((b,seqlen,4))
        x = seqs[:b, :seqlen]        
        x = tf.constant(x, dtype=ism_model.model.inputs[0].dtype)
        o = ism_model(x, [0,0,0,1])
        
        t = time.time()
        x = tf.constant(x, dtype=ism_model.model.inputs[0].dtype)
        
        # NOTE: computations are only performed at those positions
        # at which the existing base != replace_with
        del o
        o = ism_model(x, replace_with=[0,0,0,1])
        del o
        o = ism_model(x, replace_with=[0,0,1,0])
        del o
        o = ism_model(x, replace_with=[0,1,0,0])
        del o
        o = ism_model(x, replace_with=[1,0,0,0])
        del o
        
        times.append(time.time()-t)
        
        per_100.append((times[-1]/b)*100)
        print("BATCH: {}\tTIME: {:.2f}\tPER 100: {:.2f}".format(b, times[-1], (times[-1]/b)*100))
    
    print("BEST PER 100: {:.2f}".format(min(per_100)))

In [14]:
class OneToTwo(keras.layers.Layer):
    ''' Transform 1d to 2d with i,j vectors operated on.'''
    def __init__(self, seq_len, features, operation='mean', **kwargs):
        super(OneToTwo, self).__init__()
        self.operation = operation.lower()
        self.seq_len = seq_len
        self.features = features
        valid_operations = ['concat','mean','max','multipy','multiply1']
        assert self.operation in valid_operations

    def call(self, oned):
        # _, seq_len, features = oned.shape
  
        twod1 = tf.tile(oned, [1, self.seq_len, 1])
        twod1 = tf.reshape(twod1, [-1, self.seq_len, self.seq_len, self.features])
        twod2 = tf.transpose(twod1, [0,2,1,3])
  
        if self.operation == 'concat':
            twod  = tf.concat([twod1, twod2], axis=-1)
        
        elif self.operation == 'multiply':
            twod  = tf.multiply(twod1, twod2)
        
        elif self.operation == 'multiply1':
            twod = tf.multiply(twod1+1, twod2+1) - 1
        
        else:
            twod1 = tf.expand_dims(twod1, axis=-1)
            twod2 = tf.expand_dims(twod2, axis=-1)
            twod  = tf.concat([twod1, twod2], axis=-1)
        
            if self.operation == 'mean':
                twod = tf.reduce_mean(twod, axis=-1)
        
            elif self.operation == 'max':
                twod = tf.reduce_max(twod, axis=-1)
        
        return twod

    def get_config(self):
        config = super().get_config().copy()
        config['operation'] = self.operation
        config['seq_len'] = self.seq_len
        config['features'] = self.features
        return config
    
class Symmetrize2D(keras.layers.Layer):
    '''Take the average of a matrix and its transpose to enforce symmetry.'''
    def __init__(self, **kwargs):
        super(Symmetrize2D, self).__init__()
    def call(self, x):
        x_t = tf.transpose(x,[0,2,1,3])
        x_sym = (x+x_t)/2
        return x_sym
    
class UpperTri(tf.keras.layers.Layer):
    ''' Unroll matrix to its upper triangular portion.'''
    def __init__(self, seq_len, output_dim, diagonal_offset=2, **kwargs):
        super(UpperTri, self).__init__()
        self.diagonal_offset = diagonal_offset
        self.seq_len = seq_len
        self.output_dim = output_dim

    def call(self, inputs):
        triu_tup = np.triu_indices(self.seq_len, self.diagonal_offset)
        triu_index = list(triu_tup[0]+ self.seq_len*triu_tup[1])
        unroll_repr = tf.reshape(inputs, [-1, self.seq_len**2, self.output_dim])
        return tf.gather(unroll_repr, triu_index, axis=1)

    def get_config(self):
        config = super().get_config().copy()
        config['diagonal_offset'] = self.diagonal_offset
        config['seq_len'] = self.seq_len
        config['output_dim'] = self.output_dim
        return config


In [33]:
def akita(inlen=2**20, out_dim=5):
    inp = keras.layers.Input((inlen,4))
    
    x = keras.layers.Conv1D(96, 11, padding='same')(inp)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.MaxPooling1D(2)(x)
    
    for i in range(10):
        x = keras.layers.ReLU()(x)
        x = keras.layers.Conv1D(96, 5, padding='same')(x)
        x = keras.layers.BatchNormalization()(x)
        x = keras.layers.MaxPooling1D(2)(x)

    r=1
    for i in range(8):        
        y = keras.layers.ReLU()(x)
        y = keras.layers.Conv1D(48, 3, padding='same', dilation_rate=int(np.round(r)))(y)
        y = keras.layers.BatchNormalization()(y)
        y = keras.layers.ReLU()(y)
        y = keras.layers.Conv1D(96, 1, padding='same')(y)
        y = keras.layers.BatchNormalization()(y)
        x = keras.layers.Add()([x,y])
        r*=1.75

    # differs from original- uses 65 instead of 64. An extra layer is concatted in original.
    x = keras.layers.ReLU()(x)
    x = keras.layers.Conv1D(65, 5, padding='same')(x)
    
    x = OneToTwo(512, 65)(x)
    
    x = keras.layers.ReLU()(x)
    x = keras.layers.Conv2D(48, 3, padding='same')(x)
    x = Symmetrize2D()(x)
    
    r=1
    for i in range(6):        
        y = keras.layers.ReLU()(x)
        y = keras.layers.Conv2D(24, 3, padding='same', dilation_rate=int(np.round(r)))(y)
        y = keras.layers.BatchNormalization()(y)
        y = keras.layers.ReLU()(y)
        y = keras.layers.Conv2D(48, 1, padding='same')(y)
        y = keras.layers.BatchNormalization()(y)
        x = keras.layers.Add()([x,y])
        x = Symmetrize2D()(x)
        r*=1.75

    x = keras.layers.Cropping2D(32)(x)
    x = UpperTri(448, 48)(x)
    x = keras.layers.Dense(5)(x)
    
    m = keras.Model(inputs=inp, outputs=x)
    return m

### Full Akita

In [34]:
model = akita()

In [35]:
model.count_params()

754438

In [13]:
# tf.keras.utils.plot_model(model, "./lol.png")

In [27]:
loltf = tf.constant(np.random.random((100,2**20,4)), dtype=model.input.dtype)

In [19]:
model(loltf[:21], training=False).numpy().shape

2022-01-31 17:39:26.868528: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8200
2022-01-31 17:39:28.320158: W tensorflow/core/kernels/gpu_utils.cc:49] Failed to allocate memory for convolution redzone checking; skipping this check. This is benign and only means that we won't check cudnn for out-of-bounds reads and writes. This message will only be printed once.
2022-01-31 17:39:31.247049: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


(21, 99681, 5)

In [21]:
%timeit model(loltf[:7], training=False).numpy().shape

196 ms ± 196 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [18]:
%timeit model(loltf[:14], training=False).numpy().shape

416 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [19]:
%timeit model(loltf[:21], training=False).numpy().shape

613 ms ± 356 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


**Scoring 100bp**

In [17]:
fast_ism_model = fastISM.FastISM(model, 
                                 early_stop_layers=['conv1d_11'], # stop before first dilated convs
                                 test_correctness=False, 
                                 change_ranges=[(x,x+1) for x in range(model.input_shape[1]//2-50, model.input_shape[1]//2 + 50)])

In [19]:
time_ism(fast_ism_model, [21], 2**20)

BATCH: 21	TIME: 140.00	PER 100: 666.65
BEST PER 100: 666.65


**Scoring 1000bp**

In [17]:
fast_ism_model = fastISM.FastISM(model, 
                                 early_stop_layers=['conv1d_11'], # stop before first dilated convs
                                 test_correctness=False, 
                                 change_ranges=[(x,x+1) for x in range(model.input_shape[1]//2-500, model.input_shape[1]//2 + 500)])

In [18]:
time_ism(fast_ism_model, [21], 2**20)

BATCH: 21	TIME: 1357.26	PER 100: 6463.16
BEST PER 100: 6463.16


**Scoring 100bp**

In [16]:
naive_ism_model = fastISM.NaiveISM(model, 
                                   change_ranges=[(x,x+1) for x in range(model.input_shape[1]//2-50, model.input_shape[1]//2 + 50)])

In [17]:
time_ism(naive_ism_model, [8,10,12], 2**20)

BATCH: 8	TIME: 73.84	PER 100: 922.97
BATCH: 10	TIME: 87.95	PER 100: 879.50
BATCH: 12	TIME: 106.67	PER 100: 888.92
BEST PER 100: 879.50


**Scoring 1000bp**

In [18]:
naive_ism_model = fastISM.NaiveISM(model, 
                                   change_ranges=[(x,x+1) for x in range(model.input_shape[1]//2-500, model.input_shape[1]//2 + 500)])

In [19]:
time_ism(naive_ism_model, [10], 2**20)

BATCH: 10	TIME: 870.05	PER 100: 8700.52
BEST PER 100: 8700.52


---