# ISM Benchmark

Benchmark NaiveISM and fastISM.

In [1]:
import sys
sys.path.append("../")

import fastISM
from fastISM.models.basset import basset_model
from fastISM.models.factorized_basset import factorized_basset_model
from fastISM.models.bpnet import bpnet_model
import tensorflow as tf
import numpy as np
from importlib import reload
import time

In [2]:
tf.__version__

'2.3.0'

In [3]:
!nvidia-smi

Tue Sep  8 08:29:05 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.05    Driver Version: 450.51.05    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:82:00.0 Off |                    0 |
| N/A   30C    P0    27W / 250W |      0MiB / 16280MiB |      0%   E. Process |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
!nvidia-smi -L

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-0d9a859c-ce19-78f3-2f87-aade11d14bae)


In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Wed_Apr_24_19:10:27_PDT_2019
Cuda compilation tools, release 10.1, V10.1.168


In [6]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [7]:
device = 'GPU:0' if tf.config.experimental.list_physical_devices('GPU') else '/device:CPU:0'
device

'GPU:0'

In [8]:
seqs = np.load("test.seq.npy")

In [9]:
seqs.shape

(5000, 2000, 4)

## Benchmark

Best practice would be to restart kernel after benchmarking each model!

In [10]:
def time_ism(ism_model, batch_sizes, seqlen):
    times = []
    per_100 = []
    for b in batch_sizes:

        # dry run -- required as first batch slower for setting up
        #            and variable batch sizes (due to varying number
        #            of seqs that need to be mutated at a position) 
        #            also slows down first call
        # x = np.random.random((b,seqlen,4))
        x = seqs[:b, :seqlen]        
        x = tf.constant(x, dtype=ism_model.model.inputs[0].dtype)
        o = ism_model(x, [0,0,0,1])
        
        t = time.time()
        x = tf.constant(x, dtype=ism_model.model.inputs[0].dtype)
        
        # NOTE: computations are only performed at those positions
        # at which the existing base != replace_with
        o = ism_model(x, replace_with=[0,0,0,1])
        o = ism_model(x, replace_with=[0,0,1,0])
        o = ism_model(x, replace_with=[0,1,0,0])
        o = ism_model(x, replace_with=[1,0,0,0])
        
        times.append(time.time()-t)
        
        per_100.append((times[-1]/b)*100)
        print("BATCH: {}\tTIME: {:.2f}\tPER 100: {:.2f}".format(b, times[-1], (times[-1]/b)*100))
    
    print("BEST PER 100: {:.2f}".format(min(per_100)))

### Basset (1000)

In [10]:
model = basset_model(seqlen=1000, num_outputs=1)

In [20]:
model_fism = fastISM.FastISM(model, test_correctness=False)

In [21]:
time_ism(model_fism, [64, 256, 2048, 3096, 4096], 1000)

BATCH: 64	TIME: 46.48	PER 100: 72.63
BATCH: 256	TIME: 47.21	PER 100: 18.44
BATCH: 2048	TIME: 64.28	PER 100: 3.14
BATCH: 3096	TIME: 86.50	PER 100: 2.79
BATCH: 4096	TIME: 110.41	PER 100: 2.70
BEST PER 100: 2.70


In [11]:
model_nism = fastISM.NaiveISM(model)

In [12]:
time_ism(model_nism, [128, 256, 512, 1024, 2048], 1000)

BATCH: 128	TIME: 44.07	PER 100: 34.43
BATCH: 256	TIME: 76.95	PER 100: 30.06
BATCH: 512	TIME: 143.60	PER 100: 28.05
BATCH: 1024	TIME: 280.21	PER 100: 27.36
BATCH: 2048	TIME: 562.39	PER 100: 27.46
BEST PER 100: 27.36


### Basset (2000)

In [10]:
model = basset_model(seqlen=2000, num_outputs=1)

In [11]:
model_fism = fastISM.FastISM(model, test_correctness=False)

In [12]:
time_ism(model_fism, [128, 1024, 2048], 2000)

BATCH: 128	TIME: 89.73	PER 100: 70.10
BATCH: 1024	TIME: 98.75	PER 100: 9.64
BATCH: 2048	TIME: 132.92	PER 100: 6.49
BEST PER 100: 6.49


In [11]:
model_nism = fastISM.NaiveISM(model)

In [12]:
time_ism(model_nism, [64, 128, 256, 512, 1024], 2000)

BATCH: 64	TIME: 87.72	PER 100: 137.06
BATCH: 128	TIME: 148.35	PER 100: 115.90
BATCH: 256	TIME: 273.88	PER 100: 106.99
BATCH: 512	TIME: 524.65	PER 100: 102.47
BATCH: 1024	TIME: 1028.54	PER 100: 100.44
BEST PER 100: 100.44


In [13]:
time_ism(model_nism, [2048], 2000)

BATCH: 2048	TIME: 2066.35	PER 100: 100.90
BEST PER 100: 100.90


### Factorized Basset (1000)

In [11]:
model = factorized_basset_model(seqlen=1000, num_outputs=1)

In [12]:
model_fism = fastISM.FastISM(model, test_correctness=False)

In [13]:
time_ism(model_fism, [64, 256, 2048, 3072], 1000)

BATCH: 64	TIME: 104.68	PER 100: 163.56
BATCH: 256	TIME: 105.98	PER 100: 41.40
BATCH: 2048	TIME: 140.13	PER 100: 6.84
BATCH: 3072	TIME: 167.94	PER 100: 5.47
BEST PER 100: 5.47


In [12]:
model_nism = fastISM.NaiveISM(model)

In [13]:
time_ism(model_nism, [64, 128, 256, 512, 1024], 1000)

BATCH: 64	TIME: 67.23	PER 100: 105.05
BATCH: 128	TIME: 105.19	PER 100: 82.18
BATCH: 256	TIME: 187.13	PER 100: 73.10
BATCH: 512	TIME: 355.94	PER 100: 69.52
BATCH: 1024	TIME: 706.25	PER 100: 68.97
BEST PER 100: 68.97


In [14]:
time_ism(model_nism, [2048], 1000)

BATCH: 2048	TIME: 1418.41	PER 100: 69.26
BEST PER 100: 69.26


### Factorized Basset (2000)

In [12]:
model = factorized_basset_model(seqlen=2000, num_outputs=1)

In [11]:
model_fism = fastISM.FastISM(model, test_correctness=False)

In [12]:
time_ism(model_fism, [128, 512, 1024], 2000)

BATCH: 128	TIME: 203.03	PER 100: 158.62
BATCH: 512	TIME: 207.94	PER 100: 40.61
BATCH: 1024	TIME: 218.77	PER 100: 21.36
BEST PER 100: 21.36


In [13]:
time_ism(model_fism, [1280], 2000)

BATCH: 1280	TIME: 230.89	PER 100: 18.04
BEST PER 100: 18.04


In [13]:
model_nism = fastISM.NaiveISM(model)

In [14]:
# FINAL
time_ism(model_nism, [64, 128, 256, 512], 2000)

BATCH: 64	TIME: 210.42	PER 100: 328.78
BATCH: 128	TIME: 366.18	PER 100: 286.08
BATCH: 256	TIME: 689.63	PER 100: 269.39
BATCH: 512	TIME: 1342.65	PER 100: 262.24
BEST PER 100: 262.24


### BPNet (1000)

In [10]:
model = bpnet_model(seqlen=1000, num_dilated_convs=9)

In [11]:
model_fism = fastISM.FastISM(model, test_correctness=False)

In [12]:
# FINAL
time_ism(model_fism, [64, 512, 1280], 1000)

BATCH: 64	TIME: 188.36	PER 100: 294.31
BATCH: 512	TIME: 228.79	PER 100: 44.69
BATCH: 1280	TIME: 370.86	PER 100: 28.97
BEST PER 100: 28.97


In [11]:
model_nism = fastISM.NaiveISM(model)

In [12]:
# FINAL
time_ism(model_nism, [128, 256, 512, 1024], 1000)

BATCH: 128	TIME: 64.59	PER 100: 50.46
BATCH: 256	TIME: 119.39	PER 100: 46.64
BATCH: 512	TIME: 235.98	PER 100: 46.09
BATCH: 1024	TIME: 483.09	PER 100: 47.18
BEST PER 100: 46.09


### BPNet (2000)

In [11]:
model = bpnet_model(seqlen=2000, num_dilated_convs=9)

In [13]:
model_fism = fastISM.FastISM(model, test_correctness=False)

In [16]:
# FINAL
time_ism(model_fism, [64,512,768], 2000)

BATCH: 64	TIME: 378.26	PER 100: 591.03
BATCH: 512	TIME: 507.65	PER 100: 99.15
BATCH: 768	TIME: 626.06	PER 100: 81.52
BEST PER 100: 81.52


In [12]:
model_nism = fastISM.NaiveISM(model)

In [13]:
# FINAL
time_ism(model_nism, [64, 128, 256, 512], 2000)

BATCH: 64	TIME: 126.32	PER 100: 197.38
BATCH: 128	TIME: 231.87	PER 100: 181.15
BATCH: 256	TIME: 445.35	PER 100: 173.96
BATCH: 512	TIME: 902.32	PER 100: 176.23
BEST PER 100: 173.96
