In [1]:
from numba import cuda
import numpy as np
import sympy as sp

import CudaExpression as ce

### Evaluating a symbolic expression on a GPU

First the expression is defined as a string and parameter values are created.

In [4]:
expression = 'H0*k1/(k2 + k3) - H1*k3/(k1 - k2) - k0'
n = 1000000
R = np.random.uniform(0,1,(n,6))

To evaluate the expression on a set of parameters, the eval method can be used:

In [5]:
gpu_expr = ce.GPUExpression(expression)
%time gpu_results = gpu_expr.eval(R)

CPU times: total: 547 ms
Wall time: 585 ms


To use the expression from within a kernel, the structure of the expression can be passed to the CudaExpression.eval_inline function.

In [6]:
# using the tensor representation to evaluate the expression from within a kernel
@cuda.jit
def kernel(params,expression_tensor,eval_buffer,results):
    i = cuda.grid(1)
    if i < params.shape[0]:
         results[i] = ce.eval_inline(expression_tensor,params[i],eval_buffer[i])

In [8]:
%%time
cuTensor = gpu_expr.toTensor(on_device=True)

cuR = cuda.to_device(R)
buffer = np.zeros((n,cuTensor.shape[0]))
cuBuffer = cuda.to_device(buffer)
results = np.zeros(n)
cuResults = cuda.to_device(results)

kernel[1000,1000](cuR,cuTensor,cuBuffer,cuResults)
cuda.synchronize()
results = cuResults.copy_to_host()

CPU times: total: 156 ms
Wall time: 164 ms


The cuda version runs much faster than the numpy version on CPU but deliveres the same results.

In [9]:
# evaluate the expression on the example parametrs using numpy to verify the results
function_numpy = gpu_expr.toNumpy(on_array=True)
%time results_numpy = function_numpy(R)
np.allclose( results, results_numpy )

CPU times: total: 1.53 s
Wall time: 1.53 s


True

### Vectorial expressions

A set of expressions (a vector function) can be created. The class
* returns an vector
* exploits sparsity of the vector
* exploits repetitive expressions between the components

In [10]:
# defining a vectorial expression
vector_expression = ['k4/(k1*k2)','k1/k4+k3/k2','k1+k2/(k1-k3)','k4 + k2']
# generating example parameters
n = 1000000
R2 = np.random.uniform(0,1,(n,4))

In [13]:
gpu_vec = ce.GPUExpressionVector(vector_expression)
%time results = gpu_vec.eval(R2)

CPU times: total: 312 ms
Wall time: 314 ms


In [12]:
vector_expr_numpy = gpu_vec.toNumpy()
%time results_numpy = vector_expr_numpy(R2)
np.allclose(results_numpy.reshape([R2.shape[0],-1]),results)

CPU times: total: 2.78 s
Wall time: 2.78 s


True