# Vary Parameters

For a given base architecture, vary different parameters and see the effect on speedup.

In [3]:
import sys
sys.path.append("../")

import fastISM
import tensorflow as tf
import numpy as np
from importlib import reload
import time

In [4]:
tf.__version__

'2.3.0'

In [5]:
!nvidia-smi

Tue Jan 25 08:45:43 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:03:00.0 Off |                    0 |
| N/A   29C    P0    28W / 250W |      0MiB / 16280MiB |      0%   E. Process |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
!nvidia-smi -L

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-27db8534-9b2b-8b1a-5889-9c77c0c7be4e)


In [7]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Wed_Apr_24_19:10:27_PDT_2019
Cuda compilation tools, release 10.1, V10.1.168


In [8]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [9]:
device = 'GPU:0' if tf.config.experimental.list_physical_devices('GPU') else '/device:CPU:0'
device

'GPU:0'

In [10]:
seqs = np.load("test.seq.npy")

In [11]:
# extending
seqs = np.hstack([seqs,seqs,seqs])

In [12]:
seqs.shape

(5000, 6000, 4)

## Model

In [13]:
def base_model(seqlen=1000, numchars=4, num_outputs=1, num_channels=256, conv_width=10, num_convs=4):
    inp = tf.keras.Input(shape=(seqlen, numchars))
    x = inp
    
    for i in range(num_convs):        
        x = tf.keras.layers.Conv1D(
            num_channels, conv_width, strides=1, padding='same', activation='relu')(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.MaxPool1D(2)(x)

    # fc
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(1000, activation='relu', name='fc1')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(1000, activation='relu', name='fc2')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(num_outputs, name='fc3')(x)

    model = tf.keras.Model(inputs=inp, outputs=x)

    return model

## Benchmark

Best practice would be to restart kernel after benchmarking each model!

In [14]:
def time_ism(ism_model, batch_sizes, seqlen):
    times = []
    per_100 = []
    for b in batch_sizes:

        # dry run -- required as first batch slower for setting up
        #            and variable batch sizes (due to varying number
        #            of seqs that need to be mutated at a position) 
        #            also slows down first call
        # x = np.random.random((b,seqlen,4))
        x = seqs[:b, :seqlen]        
        x = tf.constant(x, dtype=ism_model.model.inputs[0].dtype)
        o = ism_model(x, [0,0,0,1])
        
        t = time.time()
        x = tf.constant(x, dtype=ism_model.model.inputs[0].dtype)
        
        # NOTE: computations are only performed at those positions
        # at which the existing base != replace_with
        o = ism_model(x, replace_with=[0,0,0,1])
        o = ism_model(x, replace_with=[0,0,1,0])
        o = ism_model(x, replace_with=[0,1,0,0])
        o = ism_model(x, replace_with=[1,0,0,0])
        
        times.append(time.time()-t)
        
        per_100.append((times[-1]/b)*100)
        print("BATCH: {}\tTIME: {:.2f}\tPER 100: {:.2f}".format(b, times[-1], (times[-1]/b)*100))
    
    print("BEST PER 100: {:.2f}".format(min(per_100)))

### Varying Sequence Length

In [15]:
for seqlen in [64,128,256,512,1024]:
    print("SEQ LEN : {}".format(seqlen))

    model = base_model(seqlen=seqlen)

    model_fism = fastISM.FastISM(model, test_correctness=False)
    time_ism(model_fism, [4096], seqlen)
    
    model_nism = fastISM.NaiveISM(model)
    time_ism(model_nism, [512], seqlen)
    
    print("----")

SEQ LEN : 64
BATCH: 4096	TIME: 12.68	PER 100: 0.31
BEST PER 100: 0.31
BATCH: 512	TIME: 4.33	PER 100: 0.85
BEST PER 100: 0.85
----
SEQ LEN : 128
BATCH: 4096	TIME: 20.03	PER 100: 0.49
BEST PER 100: 0.49
BATCH: 512	TIME: 5.71	PER 100: 1.12
BEST PER 100: 1.12
----
SEQ LEN : 256
BATCH: 4096	TIME: 40.55	PER 100: 0.99
BEST PER 100: 0.99
BATCH: 512	TIME: 17.78	PER 100: 3.47
BEST PER 100: 3.47
----
SEQ LEN : 512
BATCH: 4096	TIME: 76.25	PER 100: 1.86
BEST PER 100: 1.86
BATCH: 512	TIME: 66.92	PER 100: 13.07
BEST PER 100: 13.07
----
SEQ LEN : 1024
BATCH: 4096	TIME: 182.41	PER 100: 4.45
BEST PER 100: 4.45
BATCH: 512	TIME: 245.77	PER 100: 48.00
BEST PER 100: 48.00
----


In [13]:
for seqlen in [2048]:
    print("SEQ LEN : {}".format(seqlen))

    model = base_model(seqlen=seqlen)

    model_fism = fastISM.FastISM(model, test_correctness=False)
    time_ism(model_fism, [2048], 2048)
    
    model_nism = fastISM.NaiveISM(model)
    time_ism(model_nism, [256], 2048)
    
    print("----")

SEQ LEN : 2048
BATCH: 2048	TIME: 267.30	PER 100: 13.05
BEST PER 100: 13.05
BATCH: 256	TIME: 475.71	PER 100: 185.82
BEST PER 100: 185.82
----


In [14]:
for seqlen in [4096]:
    print("SEQ LEN : {}".format(seqlen))

    model = base_model(seqlen=seqlen)

    model_fism = fastISM.FastISM(model, test_correctness=False)
    time_ism(model_fism, [512], 4096)
    
    model_nism = fastISM.NaiveISM(model)
    time_ism(model_nism, [64], 4096)
    
    print("----")

SEQ LEN : 4096
BATCH: 512	TIME: 319.20	PER 100: 62.34
BEST PER 100: 62.34
BATCH: 64	TIME: 521.99	PER 100: 815.61
BEST PER 100: 815.61
----


### Varying Conv Width

In [25]:
CANDIDATE_CONV_WIDTHS = [1,5,10,15,20,30,40,50]

In [26]:
for conv_width in CANDIDATE_CONV_WIDTHS:
    print("CONV WIDTH : {}".format(conv_width))

    model = base_model(conv_width=conv_width)

    model_fism = fastISM.FastISM(model, test_correctness=False)
    time_ism(model_fism, [4096], 1000)
    
    model_nism = fastISM.NaiveISM(model)
    time_ism(model_nism, [512], 1000)
    
    print("----")

CONV WIDTH : 1
BATCH: 4096	TIME: 113.93	PER 100: 2.78
BEST PER 100: 2.78
BATCH: 512	TIME: 123.87	PER 100: 24.19
BEST PER 100: 24.19
----
CONV WIDTH : 5
BATCH: 4096	TIME: 156.98	PER 100: 3.83
BEST PER 100: 3.83
BATCH: 512	TIME: 191.88	PER 100: 37.48
BEST PER 100: 37.48
----
CONV WIDTH : 10
BATCH: 4096	TIME: 196.85	PER 100: 4.81
BEST PER 100: 4.81
BATCH: 512	TIME: 239.44	PER 100: 46.77
BEST PER 100: 46.77
----
CONV WIDTH : 15
BATCH: 4096	TIME: 298.51	PER 100: 7.29
BEST PER 100: 7.29
BATCH: 512	TIME: 234.13	PER 100: 45.73
BEST PER 100: 45.73
----
CONV WIDTH : 20
BATCH: 4096	TIME: 607.98	PER 100: 14.84
BEST PER 100: 14.84
BATCH: 512	TIME: 254.19	PER 100: 49.65
BEST PER 100: 49.65
----
CONV WIDTH : 30
BATCH: 4096	TIME: 689.36	PER 100: 16.83
BEST PER 100: 16.83
BATCH: 512	TIME: 259.03	PER 100: 50.59
BEST PER 100: 50.59
----
CONV WIDTH : 40
BATCH: 4096	TIME: 809.05	PER 100: 19.75
BEST PER 100: 19.75
BATCH: 512	TIME: 269.25	PER 100: 52.59
BEST PER 100: 52.59
----
CONV WIDTH : 50
BATCH: 4096	TI

### Varying Number of Channels

In [14]:
for num_channels in [32,64,128,256]:
    print("NUM CHANNELS : {}".format(num_channels))

    model = base_model(num_channels=num_channels)

    model_fism = fastISM.FastISM(model, test_correctness=False)
    time_ism(model_fism, [4096], 1000)
    
    model_nism = fastISM.NaiveISM(model)
    time_ism(model_nism, [512], 1000)
    
    print("----")

NUM CHANNELS : 32
BATCH: 4096	TIME: 62.42	PER 100: 1.52
BEST PER 100: 1.52
BATCH: 512	TIME: 30.33	PER 100: 5.92
BEST PER 100: 5.92
----
NUM CHANNELS : 64
BATCH: 4096	TIME: 75.61	PER 100: 1.85
BEST PER 100: 1.85
BATCH: 512	TIME: 54.83	PER 100: 10.71
BEST PER 100: 10.71
----
NUM CHANNELS : 128
BATCH: 4096	TIME: 116.60	PER 100: 2.85
BEST PER 100: 2.85
BATCH: 512	TIME: 107.96	PER 100: 21.09
BEST PER 100: 21.09
----
NUM CHANNELS : 256
BATCH: 4096	TIME: 194.74	PER 100: 4.75
BEST PER 100: 4.75
BATCH: 512	TIME: 237.54	PER 100: 46.39
BEST PER 100: 46.39
----


In [13]:
for num_channels in [512]:
    print("NUM CHANNELS : {}".format(num_channels))

    model = base_model(num_channels=num_channels)

    model_fism = fastISM.FastISM(model, test_correctness=False)
    time_ism(model_fism, [2048], 1000)
    
    model_nism = fastISM.NaiveISM(model)
    time_ism(model_nism, [256], 1000)
    
    print("----")

NUM CHANNELS : 512
BATCH: 2048	TIME: 217.55	PER 100: 10.62
BEST PER 100: 10.62
BATCH: 256	TIME: 296.11	PER 100: 115.67
BEST PER 100: 115.67
----


In [12]:
for num_channels in [1024]:
    print("NUM CHANNELS : {}".format(num_channels))

    model = base_model(num_channels=num_channels)

    model_fism = fastISM.FastISM(model, test_correctness=False)
    time_ism(model_fism, [1024], 1000)
    
    model_nism = fastISM.NaiveISM(model)
    time_ism(model_nism, [128], 1000)
    
    print("----")

NUM CHANNELS : 1024
BATCH: 1024	TIME: 290.17	PER 100: 28.34
BEST PER 100: 28.34
BATCH: 128	TIME: 423.25	PER 100: 330.67
BEST PER 100: 330.67
----


### Varying Number of Conv Layers

In [12]:
for num_convs in [1,2,3]:
    print("NUM CONVS : {}".format(num_convs))

    model = base_model(num_convs=num_convs)

    model_fism = fastISM.FastISM(model, test_correctness=False)
    time_ism(model_fism, [4096], 1000)
    
    model_nism = fastISM.NaiveISM(model)
    time_ism(model_nism, [512], 1000)
    
    print("----")

NUM CONVS : 1
BATCH: 4096	TIME: 542.97	PER 100: 13.26
BEST PER 100: 13.26
BATCH: 512	TIME: 129.08	PER 100: 25.21
BEST PER 100: 25.21
----
NUM CONVS : 2
BATCH: 4096	TIME: 314.00	PER 100: 7.67
BEST PER 100: 7.67
BATCH: 512	TIME: 185.16	PER 100: 36.16
BEST PER 100: 36.16
----
NUM CONVS : 3
BATCH: 4096	TIME: 214.54	PER 100: 5.24
BEST PER 100: 5.24
BATCH: 512	TIME: 218.98	PER 100: 42.77
BEST PER 100: 42.77
----


In [14]:
for num_convs in [4,5,6]:
    print("NUM CONVS : {}".format(num_convs))

    model = base_model(num_convs=num_convs)

    model_fism = fastISM.FastISM(model, test_correctness=False)
    time_ism(model_fism, [4096], 1000)
    
    model_nism = fastISM.NaiveISM(model)
    time_ism(model_nism, [512], 1000)
    
    print("----")

NUM CONVS : 4
BATCH: 4096	TIME: 194.86	PER 100: 4.76
BEST PER 100: 4.76
BATCH: 512	TIME: 237.65	PER 100: 46.42
BEST PER 100: 46.42
----
NUM CONVS : 5
BATCH: 4096	TIME: 172.98	PER 100: 4.22
BEST PER 100: 4.22
BATCH: 512	TIME: 240.04	PER 100: 46.88
BEST PER 100: 46.88
----
NUM CONVS : 6
BATCH: 4096	TIME: 188.00	PER 100: 4.59
BEST PER 100: 4.59
BATCH: 512	TIME: 244.23	PER 100: 47.70
BEST PER 100: 47.70
----
