In [1]:
import os

import tensorflow as tf
import numpy as np
from scipy.integrate import odeint
import time

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Simulation to test nUser varied from 1 to 100, expensive matrix multiplication (250 x 250 matrix, run 100 times) is inside tensorflow graph which will be called by odeint solver each dt step. <br>
<br>
In this time, each 250 x 250 small matrix is extracted by slicing from big matrix created in shape: (250 * nUser, 250)  <br>
Each user has different 250 x 250 matrix to begin with. <br>
<br>
To test if GPU operation can be scaled according to nUser.  <br>
<br>
Tensorflow has to run `n for-loop` and return outputls which is list of output from each user

In [2]:
def run_simulation(nUser):
    '''
    Given nUser, create graph for running nUser for-loop, while computing 250 x 250 matrix multiplication for 100
    times in each User for-loop. 
    '''

    tf.reset_default_graph()
    start        = time.time()

    with tf.device('/device:GPU:0'):

        bigMatrix = tf.Variable(np.random.random(size=(250 * nUser, 250)), dtype=tf.float32, name='bigMatrix')
        
        def func(bigMatrix, user):
            with tf.variable_scope('user{:05}'.format(user)):
                with tf.variable_scope('matrix_multiplication'):
                    smallMatrix = bigMatrix[(250*user):(250*user+250), :]
                    
                    for i in range(100):
                        if i == 0:
                            output = tf.sigmoid(smallMatrix * smallMatrix)
                        else:
                            output = tf.sigmoid(output * smallMatrix)
            
            return output

        outputls = [func(bigMatrix, user) for user in range(nUser)]
        result   = tf.reduce_mean(tf.concat(outputls, axis=0), axis=0)
        init     = tf.global_variables_initializer()
    
    Graphtime = time.time() - start
    config    = tf.ConfigProto(gpu_options={'allow_growth':True})
    sess      = tf.Session(config=config)
    sess.run( init )
    tfwriter  = tf.summary.FileWriter('./tensorlog/simulation_slice/nUser{}/'.format(nUser), sess.graph)
    tfwriter.close()

    def rhs(y, t):

        _ = sess.run(result)
        
        return -y

    y0        = 1.0
    tspan     = np.linspace(0, 100, 101)

    start     = time.time()
    y         = odeint(rhs, y0, tspan)
    ODEtime   = time.time() - start
    
    sess.close()

    return (Graphtime, ODEtime)

In [3]:
nUser_list = [1, 10, 1000]

for n in nUser_list:
    Graphtime, ODEtime = run_simulation(n)
    print('{}, {}, {}'.format(n, Graphtime, ODEtime))

1, 0.1308605670928955, 0.4203524589538574
10, 1.254892349243164, 2.8384456634521484
1000, 123.94565439224243, 284.97401118278503
