In [1]:
import reaction_parallel_cython
import numpy as np

In [2]:
from numba import jit, prange

In [3]:
N = 10000

cellsize = 100

Particle_dtype = np.dtype([
    ('pos', np.float64, (3,)),
    ('vel', np.float64, (3,)),
    ('E', np.float64),
    ('id', np.int32)
], align=True)  # 添加 align=True

Cell_dtype = np.dtype([
    ('id', np.int32),
    ('index', np.int32, (3,)),
    ('film', np.int32, (5,)),
    ('normal', np.float64, (3,))
], align=True)  # 添加 align=True

particle = np.zeros(N, dtype=Particle_dtype)
# cell = np.zeros((cellsize, cellsize, cellsize), dtype=CELLDTYPE)

pos = np.random.rand(N, 3)*99
vel = np.random.rand(N, 3)
vel = vel/np.linalg.norm(vel)

# 填充粒子数据
particle['pos'] = pos
particle['vel'] = vel
particle['E'] = 100
particle['id'] = 1

cell = np.zeros((cellsize, cellsize, cellsize), dtype=Cell_dtype)

nn = np.array([5, 6, 5])
nn = nn/np.linalg.norm(nn)

cell['id'] = 0
cell['index'] = np.array([1,2,3])
cell['film'] = np.array([10,2,3,0,0])
cell['normal'] = nn

cell[:,:,50:]['id'] = 1
print(Particle_dtype.itemsize)
print(Cell_dtype.itemsize)

64
64


In [4]:
a = reaction_parallel_cython.particle_parallel(particle, cell)

In [7]:
print(a.shape)
print(a[:100])

(10000,)
[0 0 0 2 0 2 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 1 0
 0 0 0 2 0 0 0 2 0 2 2 0 0 2 0 0 0 1 2 2 0 0 2 2 2 2 2 0 0 2 0 0 0 1 2 0 1
 0 2 2 2 0 2 0 2 0 1 0 0 0 0 0 0 0 2 0 0 2 2 2 2 2 2]


In [54]:
%timeit reaction_parallel_cython.particle_parallel(particle, cell)

471 μs ± 12.4 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [23]:
print(a)

[0.         0.01286841 0.02126602 ... 0.         0.         0.        ]


In [50]:
@jit(nopython=True, parallel=True)
def particle_parallel_numba(particles, cell):

    dot_product = np.zeros(particles.shape[0], dtype=np.double)

    for i in prange(particles.shape[0]):
        cellijk = particles[i]['pos'].astype(np.int32)

        if cell[cellijk[0],cellijk[1], cellijk[2]]['id'] == 1:
            dot_product[i] = np.dot(particles[i]['vel'], cell[cellijk[0],cellijk[1], cellijk[2]]['normal'])

    return dot_product

In [55]:
np.arange(0, np.pi/2, 0.01, dtype=np.double).shape

(158,)

In [58]:
np.arange(0, np.pi/2, 0.01, dtype=np.double)

array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
       0.99, 1.  , 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08, 1.09,
       1.1 , 1.11, 1.12, 1.13, 1.14, 1.15, 1.16, 1.17, 1.18, 1.19, 1.2 ,
       1.21, 1.22, 1.23, 1.24, 1.25, 1.26, 1.27, 1.28, 1.29, 1.3 , 1.31,
       1.32, 1.33, 1.34, 1.35, 1.36, 1.37, 1.38, 1.39, 1.4 , 1.41, 1.42,
       1.43, 1.44, 1.45, 1.46, 1.47, 1.48, 1.49, 1.

In [57]:
np.linspace(0, np.pi/2, 180, dtype=np.double)

array([0.        , 0.0087754 , 0.0175508 , 0.0263262 , 0.03510159,
       0.04387699, 0.05265239, 0.06142779, 0.07020319, 0.07897859,
       0.08775398, 0.09652938, 0.10530478, 0.11408018, 0.12285558,
       0.13163098, 0.14040638, 0.14918177, 0.15795717, 0.16673257,
       0.17550797, 0.18428337, 0.19305877, 0.20183416, 0.21060956,
       0.21938496, 0.22816036, 0.23693576, 0.24571116, 0.25448656,
       0.26326195, 0.27203735, 0.28081275, 0.28958815, 0.29836355,
       0.30713895, 0.31591435, 0.32468974, 0.33346514, 0.34224054,
       0.35101594, 0.35979134, 0.36856674, 0.37734213, 0.38611753,
       0.39489293, 0.40366833, 0.41244373, 0.42121913, 0.42999453,
       0.43876992, 0.44754532, 0.45632072, 0.46509612, 0.47387152,
       0.48264692, 0.49142231, 0.50019771, 0.50897311, 0.51774851,
       0.52652391, 0.53529931, 0.54407471, 0.5528501 , 0.5616255 ,
       0.5704009 , 0.5791763 , 0.5879517 , 0.5967271 , 0.60550249,
       0.61427789, 0.62305329, 0.63182869, 0.64060409, 0.64937

In [56]:
np.linspace(0, np.pi/2, 180, dtype=np.double).shape

(180,)

In [38]:

def particle_parallel(particles, cell):

    dot_product = np.zeros(particles.shape[0], dtype=np.double)

    for i in range(particles.shape[0]):
        celli = particles[i]['pos'][0].astype(np.int32)
        cellj = particles[i]['pos'][1].astype(np.int32)
        cellk = particles[i]['pos'][2].astype(np.int32)

        if cell[celli, cellj, cellk]['id'] == 1:
            dot_product[i] = np.dot(particles[i]['vel'], cell[celli, cellj, cellk]['normal'])

    return dot_product

In [53]:
%timeit particle_parallel_numba(particle, cell)

470 μs ± 2.53 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [48]:
%timeit particle_parallel(particle, cell)

94.7 ms ± 316 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
