In [1]:
import time

import numba
import numpy as np

# ExaFMM-T
import exafmm.laplace as laplace

# PyExaFMM
from fmm import Fmm
import fmm.kernel as kernel

In [7]:
! rm test.hdf5 && fmm generate-test-data -c test && fmm compute-operators -c test

Generating random sources & targets
Computing operators
Computing octree
Computing Inner Surface of Order 4
Computing Outer Surface of Order 4
Computing Inverse of Check To Equivalent Gram Matrix
Computing M2M & L2L Operators
Computed (1/8) M2M/L2L operators
Computed (2/8) M2M/L2L operators
Computed (3/8) M2M/L2L operators
Computed (4/8) M2M/L2L operators
Computed (5/8) M2M/L2L operators
Computed (6/8) M2M/L2L operators
Computed (7/8) M2M/L2L operators
Computed (8/8) M2M/L2L operators
Computed operators for (1/4) M2L Levels
Computed operators for (2/4) M2L Levels
Computed operators for (3/4) M2L Levels
Computed operators for (4/4) M2L Levels
Total time elapsed 0 minutes and 12 seconds
[0m

# Test the speed of kernel evaluation

In [8]:
M_INV_4PI = 1.0 / (4*np.pi)


def laplace_cpu_non_optimized(x, y):
    """
    Numba Laplace CPU kernel.

    Parameters:
    -----------
    x : np.array(shape=(3), dtype=np.float32)
        Source coordinate.
    y : np.array(shape=(3), dtype=np.float32)
        Target coordinate.

    Returns:
    --------
    np.float32
    """
    diff = (x[0]-y[0])**2+(x[1]-y[1])**2+(x[2]-y[2])**2
    tmp = np.reciprocal(np.sqrt(diff))*M_INV_4PI
    res = tmp if tmp < np.inf else 0.
    return res


@numba.njit(cache=True, parallel=True, fastmath=True, error_model="numpy")
def laplace_p2p(sources, targets, source_densities):
    """
    Numba P2P operator for Laplace kernel.
    """
    ntargets = len(targets)
    nsources = len(sources)

    target_densities = np.zeros(shape=(ntargets), dtype=np.float32)

    for i in numba.prange(ntargets):
        target = targets[i]
        potential = 0
        for j in range(nsources):
            source = sources[j]
            source_density = source_densities[j]
            potential += kernel.laplace_cpu(target, source)*source_density

        target_densities[i] = potential

    return target_densities


def laplace_p2p_non_optimized(sources, targets, source_densities):
    """
    Numba P2P operator for Laplace kernel.
    """
    ntargets = len(targets)
    nsources = len(sources)

    target_densities = np.zeros(shape=(ntargets), dtype=np.float32)

    for i in range(ntargets):
        target = targets[i]
        potential = 0
        for j in range(nsources):
            source = sources[j]
            source_density = source_densities[j]
            potential += laplace_cpu_non_optimized(target, source)*source_density

        target_densities[i] = potential

    return target_densities

### Setup ExaFMM-T Experiment

In [9]:
e = Fmm('test')

# create a list of source instances
sources = laplace.init_sources(e.sources, e.source_densities)

# create a list of target instances
targets = laplace.init_targets(e.targets)

p = e.config['order_equivalent']
fmm = laplace.LaplaceFmm(
    p=e.config['order_equivalent'], 
    ncrit=e.config['max_points'], 
    filename=f'C{p}E{p}.dat'
)    
tree = laplace.setup(sources, targets, fmm)
result = laplace.evaluate(tree, fmm)

### Run kernel evaluations

For ExaFMM-T, the source code in the verify function has been edited to remove error, and gradient calculations.

In [12]:
(4.71*1000)/60

78.5

In [5]:
# ExaFMM-T
%timeit r = fmm.verify(tree.leafs, sample=False)

23.3 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
# Numba Kernel
%timeit laplace_p2p(e.sources, e.targets, e.source_densities)

43.7 ms ± 1.21 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
