[View in Colaboratory](https://colab.research.google.com/github/lukasheinrich/pyhf-benchmarks/blob/master/colab/GPU_standalone.ipynb)

# GPU pyhf interpolation

In [1]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
import numpy as np
import timeit

def setup(N,float_t):
    def _hfinterp_code1(histogramssets, alphasets):
        allset_allhisto_deltas_up = tf.divide(histogramssets[:,:,2], histogramssets[:,:,1])
        allset_allhisto_deltas_dn = tf.divide(histogramssets[:,:,0], histogramssets[:,:,1])

        def ones(shape):
            return tf.ones(shape, dtype = float_t)
        def zeros(shape):
            return tf.zeros(shape, dtype = float_t)
        def where(mask,a,b):
            mask = tf.cast(mask, float_t)
            inv_mask = tf.cast(1-mask, float_t)
            return mask * a + inv_mask * b
        
        allsets_allhistos_masks = where(alphasets > 0, ones(alphasets.shape), zeros(alphasets.shape))
        bases_up = tf.einsum('sa,shb->shab', ones(alphasets.shape), allset_allhisto_deltas_up)
        bases_dn = tf.einsum('sa,shb->shab', ones(alphasets.shape), allset_allhisto_deltas_dn)
        exponents = tf.einsum('sa,shb->shab', tf.abs(alphasets), ones(allset_allhisto_deltas_up.shape))
        masks = tf.einsum('sa,shb->shab', allsets_allhistos_masks, ones(allset_allhisto_deltas_up.shape))

        bases = where(masks, bases_up, bases_dn)
        return tf.pow(bases, exponents)

    
    a_shape = (100,1)
    h_shape = (100,100,3,N)

    with tf.device('/gpu:0'):
      a = tf.placeholder(float_t, a_shape)
      h = tf.placeholder(float_t, h_shape)
      gpu_op = _hfinterp_code1(h,a)

    return [gpu_op], [a,h], [a_shape,h_shape]

def run_it(ops,args,shapes):
    r = session.run(ops, {args[0]: np.random.uniform(-1,1, size = shapes[0]), args[1]: np.random.uniform(-1,1, size = shapes[1])})
    return r

results = []
for n in np.linspace(500,7000,14):
    n = int(n)
    session = tf.Session()
    try:
        print('Running ops')
        exec_time = timeit.timeit('run_it(o,a,s)', number=10, setup="from __main__ import run_it, setup; import tensorflow as tf; o,a,s = setup({},tf.float16)".format(n))
        results.append((n,exec_time))
        print('N: {} time: {}'.format(n,exec_time))
    finally:
        # For now, TPU sessions must be shutdown separately from
        # closing the session.
        session.close()


Running ops
N: 500 time: 4.00114458400094
Running ops
N: 1000 time: 7.1576597379989835
Running ops
N: 1500 time: 10.509401482999237
Running ops
N: 2000 time: 13.962642597000013
Running ops
N: 2500 time: 17.150409201000002
Running ops
N: 3000 time: 20.761605435998717
Running ops
N: 3500 time: 24.125610609999057
Running ops
N: 4000 time: 27.18831246099944
Running ops
N: 4500 time: 30.785486556998876
Running ops
N: 5000 time: 33.70743698700062
Running ops
N: 5500 time: 37.93001072200059
Running ops
N: 6000 time: 41.75870220199977
Running ops
N: 6500 time: 45.12628070900064
Running ops
N: 7000 time: 48.2928322709995


In [3]:
results

[(500, 4.00114458400094),
 (1000, 7.1576597379989835),
 (1500, 10.509401482999237),
 (2000, 13.962642597000013),
 (2500, 17.150409201000002),
 (3000, 20.761605435998717),
 (3500, 24.125610609999057),
 (4000, 27.18831246099944),
 (4500, 30.785486556998876),
 (5000, 33.70743698700062),
 (5500, 37.93001072200059),
 (6000, 41.75870220199977),
 (6500, 45.12628070900064),
 (7000, 48.2928322709995)]

In [16]:
62.98668317800002

62.98668317800002

In [7]:
tf.bfloat16==tf.float16

False