In [17]:
%pylab inline
import openpmd_api as io
import numpy as np
from numba import cuda, jit
import math
import scipy.constants

Populating the interactive namespace from numpy and matplotlib


In [25]:
path = "/home/spreng88/runs/BunchInit2/simOutput/checkpoints/checkpoint_%T.h5"

series = io.Series( path, io.Access.read_write)

In [28]:
# add field to iteration 0
iteration = series.iterations[0] 

cell_depth = iteration.get_attribute("cell_depth")          # z
cell_height = iteration.get_attribute("cell_height")        # y
cell_width = iteration.get_attribute("cell_width")          # x

unit_efield = iteration.get_attribute("unit_efield")
unit_bfield = iteration.get_attribute("unit_bfield")
unit_charge = iteration.get_attribute("unit_charge")
unit_mass = iteration.get_attribute("unit_mass")
unit_speed = iteration.get_attribute("unit_speed")
unit_length = iteration.get_attribute("unit_length")
unit_time = iteration.get_attribute("unit_time")

pi = scipy.constants.pi
c = scipy.constants.c / unit_speed
eps0 = iteration.get_attribute("eps0")
mue0 = iteration.get_attribute("mue0")

In [29]:

@cuda.jit(device=True)
def particleV(px, py, pz, mass):
    """
    calculate particle speed from momentum
    """
    
    m2p2 = math.sqrt( (mass)**2 + px**2 + py**2 + pz**2)
    
    vx = px / m2p2 * c
    vy = py / m2p2 * c
    vz = pz / m2p2 * c
    
    return vx, vy, vz

@cuda.jit(device=True)
def param1(rx,ry,rz, rqx,rqy,rqz, vx,vy,vz):
    """
    calculate some parameters that could be reused
    r: position of field calculation
    rq: position of charge q at time t
    rq_tr: position of charge at retarded time tr
    """
    
    #
    # solution equation:
    # dt = t - tr
    # r where we want the field
    # rq position of charge
    # (c^2 - |v|^2) * dt^2 - 2(r*v - rq*v) * dt + 2 * r*rq - |r|^2 - |rq|^2
    #
    a = (c**2 - (vx**2 + vy**2 + vz**2))
    b = -2 * ((rx-rqx)*vx + (ry-rqy)*vy + (rz-rqz)*vz)
    d = 2 * (rx*rqx + ry*rqy + rz*rqz) - (rx**2 + ry**2 + rz**2) - (rqx**2 + rqy**2 + rqz**2)
    dt = (-b + math.sqrt(b**2 - 4*a*d)) / (2*a)
    
    rq_trx = rqx - vx * dt
    rq_try = rqy - vy * dt
    rq_trz = rqz - vz * dt

    dvx = rx - rq_trx
    dvy = ry - rq_try
    dvz = rz - rq_trz
    
    distance = math.sqrt(dvx**2 + dvy**2 + dvz**2)
    
    if distance == 0:
        distance = -1
    
    #n =  distanceVec / distance
    nx = dvx / distance
    ny = dvy / distance
    nz = dvz / distance
    
    return nx, ny, nz, distance, dvx, dvy, dvz #n, distance, distanceVec

@cuda.jit(device=True)
def retardedEFieldParallel(q, nx, ny, nz, distance, dvx, dvy, dvz, vx, vy, vz):
    """
    field for one position
    q: charge
    nx, ny, nz: unit vector to retarded position
    distance: distance to retarded position
    dvx, dvy, dvz: distance Vector to retarded position
    vx, vy, vz: velocity vector of the charge
    """
    
    if distance == -1:
        nx = 1
        ny = 0
        xz = 0
    
    factor = q / (4 * pi * eps0)
    
    ux = c * nx - vx
    uy = c * ny - vy
    uz = c * nz - vz
    

    scalar = factor * distance / (dvx*ux + dvy*uy + dvz*uz)**3 * (c**2 - (vx**2 + vy**2 + vz**2))
    
    return  scalar * ux, scalar * uy, scalar * uz



In [30]:
@cuda.jit
def particleParallel(Ex,Ey,Ez, Bx,By,Bz, q_, rqx_,rqy_,rqz_, px_,py_,pz_, mass_, weighting_, xdim, ydim, zdim):
    """
    Ex,Ey,Ez E field where new field is added
    Bx,By,Bz B -"-
    
    _ is on all input arrays read from checkpoint 
    q_ array from checkpoint with charges
    rq_ position data of particles
    p_ momentum of particles
    
    xdim, ydim, zdim length of dimension of Ex,Bx, ...
    Ex/Bx need to be 1D for atomic add
    """
    tix = cuda.threadIdx.x
    bix = cuda.blockIdx.x
    bdx = cuda.blockDim.x
    
    index = tix + bix * bdx    # particle index
    
    q = q_[index] * weighting_[index]
    rqx = rqx_[index] * cell_width
    rqy = rqy_[index] * cell_height
    rqz = rqz_[index] * cell_depth
    px = px_[index]
    py = py_[index]
    pz = pz_[index]
    mass = mass_[index] * weighting_[index]
    
    for x in range(xdim):
        
        rx = x * cell_width
        rex = (x + 0.5) * cell_width
        
        for y in range(ydim):
            
            ry = y * cell_height
            rey = (y + 0.5) * cell_height
            
            for z in range(zdim):    
                
                rz = z * cell_depth
                rez = (z + 0.5) * cell_depth

                vx, vy, vz = particleV(px, py, pz, mass)

                # x
                nxx, nxy, nxz, distancex, dvxx, dvxy, dvxz = param1(rex, ry, rz, rqx, rqy, rqz, vx,vy,vz)
                ex, _, _ = retardedEFieldParallel(q, nxx, nxy, nxz, distancex, dvxx, dvxy, dvxz, vx, vy, vz)

                # y
                nyx, nyy, nyz, distancey, dvyx, dvyy, dvyz = param1(rx, rey, rz, rqx, rqy, rqz, vx,vy,vz)
                _, ey, _ = retardedEFieldParallel(q, nyx, nyy, nyz, distancey, dvyx, dvyy, dvyz, vx, vy, vz)

                # z
                nzx, nzy, nzz, distancez, dvzx, dvzy, dvzz = param1(rx, ry, rez, rqx, rqy, rqz, vx,vy,vz)
                _, _, ez = retardedEFieldParallel(q, nzx, nzy, nzz, distancez, dvzx, dvzy, dvzz , vx, vy, vz)
                
                fieldIndex = x + y * xdim + z * xdim * ydim
                
                cuda.atomic.add(Ex, fieldIndex, ex)
                cuda.atomic.add(Ey, fieldIndex, ey)
                cuda.atomic.add(Ez, fieldIndex, ez)
                
                bx = ( nxy * ez - nxz * ey ) / c
                by = ( nxz * ex - nxx * ez ) / c
                bz = ( nxx * ey - nxy * ex ) / c

                cuda.atomic.add(Bx, fieldIndex, bx)
                cuda.atomic.add(By, fieldIndex, by)
                cuda.atomic.add(Bz, fieldIndex, bz)

In [35]:
species = "e"

xpos_incell = iteration.particles[species]["position"]["x"][:]
ypos_incell = iteration.particles[species]["position"]["y"][:]
zpos_incell = iteration.particles[species]["position"]["z"][:]
xpos_offset = iteration.particles[species]["positionOffset"]["x"][:]
ypos_offset = iteration.particles[species]["positionOffset"]["y"][:]
zpos_offset = iteration.particles[species]["positionOffset"]["z"][:]
momentumx = iteration.particles[species]["momentum"]["x"][:]
momentumy = iteration.particles[species]["momentum"]["y"][:]
momentumz = iteration.particles[species]["momentum"]["z"][:]
weightings = iteration.particles[species]["weighting"][io.Record_Component.SCALAR][:]
charge = iteration.particles[species]["charge"][io.Record_Component.SCALAR][:]
mass = iteration.particles[species]["mass"][io.Record_Component.SCALAR][:]

series.flush()

xpos = xpos_incell + np.float32(xpos_offset)
ypos = ypos_incell + np.float32(ypos_offset)
zpos = zpos_incell + np.float32(zpos_offset)

In [32]:
xdim = 128
ydim = 768
zdim = 128
shape = xdim * ydim * zdim
ex = np.zeros(shape=shape, dtype=np.float32)
ey = np.zeros(shape=shape, dtype=np.float32)
ez = np.zeros(shape=shape, dtype=np.float32)
bx = np.zeros(shape=shape, dtype=np.float32)
by = np.zeros(shape=shape, dtype=np.float32)
bz = np.zeros(shape=shape, dtype=np.float32)

starttime = time.time()
particleParallel[80, 32](ex, ey, ez, bx, by, bz, charge, xpos, ypos, zpos, momentumx, momentumy, momentumz, mass, weightings, xdim, ydim, zdim)
print(time.time()-starttime)



CudaAPIError: [700] Call to cuMemcpyDtoH results in UNKNOWN_CUDA_ERROR

In [None]:
ef = np.sqrt(ex**2+ey**2+ez**2)
bf = np.sqrt(bx**2+by**2+bz**2)

In [None]:
figsize(25, 16)
#imshow((ex[64, 375:425, 40:96].T))
#imshow((ex[2, :, :].T))
imshow((ef.reshape(128, 768, 128)[3, :300:4, :].T))
colorbar()

In [39]:
for i in iteration.particles:
    print(i)
    for j in iteration.particles[i]:
        print("    ", j)

e
     charge
     mass
     momentum
     position
     positionOffset
     weighting
i
     charge
     mass
     momentum
     position
     positionOffset
     weighting
