In [12]:
import numba
import numpy as np
from fmm import Fmm

In [13]:
# Numba Threading Layer Configuration

## (Intel CPUs Only) Sets lifetime of OpenMP threads to 0ms
## As computation contains large serial (Python) components
! export KMP_BLOCKTIME=0

## Limit number of threads created by BLAS/LAPACK functions
## Called by Numpy
! export OMP_NUM_THREADS=1

## Define 'places' at which threads are assigned
! export OMP_PLACES=cores

## Makes thread assignment go succesively through available
## places. In our case, through each core.
! export OMP_PROC_BIND=close

## Select OpenMP as threading layer for Numba, the uniformity
## of FMM operators makes it preferable to TBB
! export NUMBA_THREADING_LAYER='omp'

In [14]:
# Generate test data (optional)
# ! fmm generate-test-data -c test_config

In [15]:
# Run operator and tree pre-computations
# ! fmm compute-operators -c test_config

In [16]:
e = Fmm('test_config', verbose=True)

In [17]:
e.check_surface.shape

(152, 3)

In [18]:
e.run()

P2M: 3.335664987564087
M2M: 0.662036657333374
L2L: 0.5579426288604736
M2L: 15.78796100616455
L2T: 1.6008579730987549
M2T: 2.0403101444244385
S2L: 2.1673386096954346
P2P: 2.3011441230773926


In [19]:
e.target_potentials

array([[ 48807.39427834,  39154.28962847,  41700.33143874,
         43362.26889026],
       [ 50649.72737242,  48893.43492212,  48965.56770777,
         35361.40255406],
       [ 49043.70692062,  44094.72243262,  41416.28542653,
         40510.08719101],
       ...,
       [ 48949.69436782, -41555.64031006, -39893.9999803 ,
        -43953.823858  ],
       [ 48578.08066193, -38310.12512956, -41557.00835126,
        -43491.38561761],
       [ 49259.94644903, -39362.08272263, -42017.09630178,
        -48214.08700834]])

In [20]:
e.run()

P2M: 0.4088256359100342
M2M: 0.25632715225219727
L2L: 0.352445125579834
M2L: 12.961425304412842
L2T: 0.3796539306640625
M2T: 0.006421089172363281
S2L: 0.41264843940734863
P2P: 0.39309120178222656


In [21]:
e.target_potentials

array([[ 454185.34772359,  323603.23578108,  335454.21787825,
         342571.33442522],
       [ 468867.28058628,  365567.93760551,  368077.92547722,
         339777.29983884],
       [ 456053.12436632,  346895.0476667 ,  331924.28626897,
         333181.45585609],
       ...,
       [ 455029.38403143, -335231.91650552, -327929.56278   ,
        -343495.06312685],
       [ 452050.33700497, -319560.16742083, -331925.60667243,
        -340130.23705584],
       [ 457659.67606269, -327148.75827426, -336924.85721975,
        -360723.18015431]])

In [22]:
e.clear()

# Functions without Cache Local Optimizations

In [8]:
import adaptoctree.morton as morton


import fmm.kernel as kernel
import fmm.surface as surface

## L2T

In [23]:
@numba.njit(cache=True, parallel=True)
def u_l2t(
    leaves, 
    nleaves,
    targets,
    x0,
    r0,
    key_to_leaf_index,
    target_index_pointer,
    equivalent_surface,
    alpha_outer,
    key_to_index,
    nequivalent_points,
    local_expansions,
):
    """
    """
    
    for i in numba.prange(nleaves):
        target = leaves[i]
        level = morton.find_level(target)
        center = morton.find_physical_center_from_key(target, x0, r0)
        target_leaf_index = key_to_leaf_index[target]
        targets_at_node = targets[
            target_index_pointer[target_leaf_index]:target_index_pointer[target_leaf_index+1]
        ]
        sources_at_node = surface.scale_surface(
            equivalent_surface,
            r0,
            level,
            center,
            alpha_outer,
        )
        source_idx = key_to_index[target]
        source_lidx = source_idx*nequivalent_points
        source_ridx = source_lidx+nequivalent_points
        source_densities_at_node = local_expansions[source_lidx:source_ridx]
        
        # Find potential
        kernel.laplace_p2p_serial(sources_at_node, targets_at_node, source_densities_at_node)
        
        # Find potential gradient
        kernel.laplace_gradient(sources_at_node, targets_at_node, source_densities_at_node)

In [24]:
%%timeit
u_l2t(
    e.leaves,
    e.nleaves,
    e.targets,
    e.x0,
    e.r0,
    e.key_to_leaf_index,
    e.target_index_pointer,
    e.equivalent_surface,
    e.alpha_outer,
    e.key_to_index,
    e.nequivalent_points,
    e.local_expansions
)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
NameError: name 'morton' is not defined

In [11]:
(406/374 - 1)*100

8.55614973262031

## Near Field

In [18]:
@numba.njit(cache=True, parallel=True)
def u_near_field(
    leaves,
    nleaves,
    key_to_leaf_index,
    key_to_index,
    targets,
    u_lists,
    target_index_pointer,
    sources,
    source_densities,
    source_index_pointer,
    target_potentials,
):
    for i in numba.prange(nleaves):
        target = leaves[i]
        target_leaf_index = key_to_leaf_index[target]
        target_index = key_to_index[target]
        targets_at_node = targets[
            target_index_pointer[target_leaf_index]:target_index_pointer[target_leaf_index+1]
        ]
        ntargets_at_node = len(targets_at_node)
        
        u_list = u_lists[target_index]
        u_list = u_list[u_list != -1]
        
        # single threaded over inner loop over u list!
        for j in range(len(u_list)):
            source = u_list[j]
            source_leaf_index = key_to_leaf_index[source]
            
            sources_at_node = sources[
                source_index_pointer[source_leaf_index]:source_index_pointer[source_leaf_index+1]
            ]
            
            source_densities_at_node = source_densities[
                source_index_pointer[source_leaf_index]:source_index_pointer[source_leaf_index+1]
            ]
            
            # run p2p over u list
            kernel.laplace_p2p_serial(sources_at_node, targets_at_node, source_densities_at_node)

            kernel.laplace_gradient(sources_at_node, targets_at_node, source_densities_at_node)

            target_potentials[target_index_pointer[target_leaf_index]:target_index_pointer[target_leaf_index+1],1:] += \
                kernel.laplace_gradient(sources_at_node, targets_at_node, source_densities_at_node)
            target_potentials[target_index_pointer[target_leaf_index]:target_index_pointer[target_leaf_index+1], 0] += \
               kernel.laplace_p2p_serial(sources_at_node, targets_at_node, source_densities_at_node)

            
        # Now compute contribution due to sources in the node itself
        sources_at_node = sources[
            source_index_pointer[target_leaf_index]:source_index_pointer[target_leaf_index+1]
        ]

        source_densities_at_node = source_densities[
            source_index_pointer[target_leaf_index]:source_index_pointer[target_leaf_index+1]
        ]
        
        target_potentials[target_index_pointer[target_leaf_index]:target_index_pointer[target_leaf_index+1],1:] += \
            kernel.laplace_gradient(sources_at_node, targets_at_node, source_densities_at_node)
        target_potentials[target_index_pointer[target_leaf_index]:target_index_pointer[target_leaf_index+1], 0] += \
           kernel.laplace_p2p_serial(sources_at_node, targets_at_node, source_densities_at_node)
        

In [19]:
trgs = np.zeros_like(e.target_potentials)

In [20]:

u_near_field(
    e.leaves,
    e.nleaves,
    e.key_to_leaf_index,
    e.key_to_index,
    e.targets,
    e.u_lists,
    e.target_index_pointer,
    e.sources,
    e.source_densities,
    e.source_index_pointer,
    trgs
)

In [21]:
trgs

array([[  223.14999342,  1197.1537977 ,  2317.36129308,  3181.22905736],
       [  326.76690775,  6263.60121144,  5813.42888359, -7594.6074319 ],
       [  240.48383728,  3316.92081524,  2664.30640447,  1087.49962281],
       ...,
       [  283.60594509, -2173.29907485, -1333.2120276 , -3890.04652794],
       [  261.23388083,  -822.89158094, -2864.96064596, -3950.8588662 ],
       [  283.23126976,  -729.31850595, -2497.08182184, -6316.22367678]])