# Grad x Input Benchmark

In [1]:
import sys
sys.path.append("../")

import fastISM
from fastISM.models.basset import basset_model

from fastISM.models.factorized_basset import factorized_basset_model
from fastISM.models.bpnet import bpnet_model
import tensorflow as tf
import numpy as np
from importlib import reload
import time

In [2]:
reload(fastISM.flatten_model)
reload(fastISM.models)
reload(fastISM.ism_base)
reload(fastISM.change_range)
reload(fastISM.fast_ism_utils)
reload(fastISM)

<module 'fastISM' from '../fastISM/__init__.py'>

In [3]:
tf.__version__

'2.3.0'

In [4]:
!nvidia-smi

Tue Sep  8 09:09:35 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.05    Driver Version: 450.51.05    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:82:00.0 Off |                    0 |
| N/A   30C    P0    27W / 250W |      0MiB / 16280MiB |      0%   E. Process |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [5]:
!nvidia-smi -L

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-0d9a859c-ce19-78f3-2f87-aade11d14bae)


In [6]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Wed_Apr_24_19:10:27_PDT_2019
Cuda compilation tools, release 10.1, V10.1.168


In [7]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [8]:
device = 'GPU:0' if tf.config.experimental.list_physical_devices('GPU') else '/device:CPU:0'
device

'GPU:0'

## Benchmark

### Basset/Factorized Basset

In [9]:
BATCH_SIZES = [1,32,64,128,256]

In [10]:
# shap_values most likely internally creates a batch for each example
# thus time per 100 examples stays near constant with batch size

NUM_TO_AVG = 100

for model_type in [basset_model, factorized_basset_model]:
    for seqlen in [1000, 2000]:
        print("\n------------------")
        print("MODEL: {}".format(model_type))
        print("SEQLEN: {}".format(seqlen))
        model = model_type(seqlen=seqlen, num_outputs=1)
        
        # dry run 
        p = model(np.random.random((10,seqlen,4)))
        
        times = []
        per_100 = []
        for b in BATCH_SIZES:
            tot = 0
            for i in range(NUM_TO_AVG):
                x = np.random.random((b,seqlen,4))
                
                t = time.time()
                x = tf.constant(x)
                with tf.GradientTape() as tape:
                    tape.watch(x)
                    pred = model(x)
                g = (x*tape.gradient(pred, x)).numpy()
                
                tot+= time.time() - t
                
            times.append(tot/NUM_TO_AVG)
            per_100.append((times[-1]/b)*100)
            print("BATCH SIZE: {}\tTIME: {:.2f}\tPER 100: {:.2f}".format(b, times[-1], (times[-1]/b)*100))
        
        print("BEST PER 100: {:.2f}".format(min(per_100)))


------------------
MODEL: <function basset_model at 0x7fe080533b90>
SEQLEN: 1000
BATCH SIZE: 1	TIME: 0.03	PER 100: 2.54
BATCH SIZE: 32	TIME: 0.02	PER 100: 0.06
BATCH SIZE: 64	TIME: 0.03	PER 100: 0.05
BATCH SIZE: 128	TIME: 0.05	PER 100: 0.04
BATCH SIZE: 256	TIME: 0.10	PER 100: 0.04
BEST PER 100: 0.04

------------------
MODEL: <function basset_model at 0x7fe080533b90>
SEQLEN: 2000
BATCH SIZE: 1	TIME: 0.01	PER 100: 1.26
BATCH SIZE: 32	TIME: 0.03	PER 100: 0.09
BATCH SIZE: 64	TIME: 0.05	PER 100: 0.08
BATCH SIZE: 128	TIME: 0.10	PER 100: 0.08
BATCH SIZE: 256	TIME: 0.20	PER 100: 0.08
BEST PER 100: 0.08

------------------
MODEL: <function factorized_basset_model at 0x7fe080533c20>
SEQLEN: 1000
BATCH SIZE: 1	TIME: 0.03	PER 100: 2.78
BATCH SIZE: 32	TIME: 0.04	PER 100: 0.13
BATCH SIZE: 64	TIME: 0.06	PER 100: 0.10
BATCH SIZE: 128	TIME: 0.12	PER 100: 0.09
BATCH SIZE: 256	TIME: 0.23	PER 100: 0.09
BEST PER 100: 0.09

------------------
MODEL: <function factorized_basset_model at 0x7fe080533c20>
SEQ

### BPNet

In [17]:
BATCH_SIZES = [64, 128]

In [18]:
for seqlen in [1000, 2000]:
    print("\n------------------")
    print("SEQLEN: {}".format(seqlen))
    model = bpnet_model(seqlen=seqlen, num_dilated_convs=9)

    # run explainers for each position
    times = []
    per_100 = []

    # dry run 
    p = model(np.random.random((10,seqlen,4)))

    for b_idx, b in enumerate(BATCH_SIZES):
        x = np.random.random((b,seqlen,4))

        t = time.time()
        x = tf.constant(x)
        g=[]
        
        with tf.GradientTape(persistent=True) as tape:
            tape.watch(x)
            prof, ct = model(x)
            prof = [prof[:,i:i+1] for i in range(seqlen)]

        for i in range(seqlen):
            g.append((x*tape.gradient(prof[i], x)).numpy())
        g.append((x*tape.gradient(ct, x)).numpy())
        times.append(time.time()-t)

        per_100.append((times[-1]/b)*100)
        print("BATCH SIZE: {}\tTIME: {:.2f}\tPER 100: {:.2f}".format(b, times[-1], per_100[-1]))

    print("BEST PER 100: {:.2f}".format(min(per_100)))


------------------
SEQLEN: 1000
BATCH SIZE: 64	TIME: 27.25	PER 100: 42.58
BATCH SIZE: 128	TIME: 54.40	PER 100: 42.50
BEST PER 100: 42.50

------------------
SEQLEN: 2000
BATCH SIZE: 64	TIME: 80.90	PER 100: 126.41
BATCH SIZE: 128	TIME: 164.60	PER 100: 128.60
BEST PER 100: 126.41
