In [1]:
from numba import cuda
import numpy as np
import sympy as sp

import CudaExpression as ce

### Evaluating a symbolic expression on a GPU

First the expression is defined as a string and parameter values are created.

In [2]:
expression = 'H0*k1/(k2 + k3) - H1*k3/(k1 - k2) - k0'
n = 1000000
R = np.random.uniform(0,1,(n,6))

To evaluate the expression on a set of parameters, the eval method can be used:

In [3]:
gpu_expr = ce.GPUExpression(expression)
%time gpu_results = gpu_expr.eval(R)

CPU times: total: 500 ms
Wall time: 589 ms


To use the expression from within a kernel, the structure of the expression can be passed to the CudaExpression.eval_inline function.

In [4]:
# using the tensor representation to evaluate the expression from within a kernel
@cuda.jit
def kernel(expression_tensor,params,vars,eval_buffer,results):
    i = cuda.grid(1)
    if i < params.shape[0]:
         results[i] = ce.eval_inline(expression_tensor,params[i],vars[i],eval_buffer[i])

In [5]:
%%time
cuTensor = gpu_expr.toTensor(on_device=True)

cuR = cuda.to_device(R)
X = np.zeros((n,0))
cuX = cuda.to_device(X)
buffer = np.zeros((n,cuTensor.shape[0]))
cuBuffer = cuda.to_device(buffer)
results = np.zeros(n)
cuResults = cuda.to_device(results)

kernel[1000,1000](cuTensor,cuR,cuX,cuBuffer,cuResults)
cuda.synchronize()
results = cuResults.copy_to_host()

CPU times: total: 156 ms
Wall time: 155 ms


The cuda version runs much faster than the numpy version on CPU but deliveres the same results.

In [6]:
# evaluate the expression on the example parametrs using numpy to verify the results
function_numpy = gpu_expr.toNumpy(on_array=True)
results_numpy = function_numpy(R)
np.allclose(results, results_numpy )

True

### Vectorial expressions

A set of expressions (a vector function) can be created. The class
* returns an vector
* exploits sparsity of the vector
* exploits repetitive expressions between the components

In [7]:
# defining a vectorial expression
vector_expression = ['k4/(k1*k2)','k1/k4+k3/k2','k1+k2/(k1-k3)','k4 + k2']
# generating example parameters
n = 1000000
R2 = np.random.uniform(0,1,(n,4))

In [8]:
gpu_vec = ce.GPUExpressionVector(vector_expression)
%time results = gpu_vec.eval(R2,[])

CPU times: total: 594 ms
Wall time: 597 ms


In [9]:
vector_expr_numpy = gpu_vec.toNumpy()
%time results_numpy = vector_expr_numpy(R2)
np.allclose(results_numpy.reshape([R2.shape[0],-1]),results)

CPU times: total: 2.72 s
Wall time: 2.72 s


False

## Parameters vs. Variables

Both parameters and variables can be defined. They are provided from different input variables

In [10]:
R = np.random.uniform(0,1,(10000,4))
X = np.random.uniform(1,100,(1000,2))

In [11]:
gpu_expr = ce.GPUExpression('a1*x + a2*x**2 + a3*y + a4*y**2',
                            params_order=['a1','a2','a3','a4'],
                            variables_order=['x','y'])

gpu_results = gpu_expr.eval(R,X)

