In [2]:
%pylab inline
import openpmd_api as io
import numpy as np
from numba import cuda, jit
import math
import scipy.constants

Populating the interactive namespace from numpy and matplotlib


In [None]:
del series

In [4]:
path = "/bigdata/hplsim/scratch/spreng88/runs/bigFieldWithoutInit2/simOutput/checkpoints/checkpoint_%T.h5"
#path = "/bigdata/hplsim/scratch/spreng88/runs/restart/checkpoint_%T.h5"

series = io.Series( path, io.Access.read_only)

In [5]:
nth = 1
write = False

# add field to iteration 0
iteration = series.iterations[0] 

cell_depth = iteration.get_attribute("cell_depth")          # z
cell_height = iteration.get_attribute("cell_height")        # y
cell_width = iteration.get_attribute("cell_width")          # x

unit_efield = iteration.get_attribute("unit_efield")
unit_bfield = iteration.get_attribute("unit_bfield")
unit_charge = iteration.get_attribute("unit_charge")
unit_mass = iteration.get_attribute("unit_mass")
unit_speed = iteration.get_attribute("unit_speed")
unit_length = iteration.get_attribute("unit_length")
unit_time = iteration.get_attribute("unit_time")

pi = scipy.constants.pi
c = scipy.constants.c / unit_speed
eps0 = iteration.get_attribute("eps0")
mue0 = iteration.get_attribute("mue0")

In [6]:

@cuda.jit(device=True)
def particleV(px, py, pz, mass):
    """
    calculate particle speed from momentum
    """
    
    m2p2 = math.sqrt( (mass)**2 + px**2 + py**2 + pz**2)
    
    vx = px / m2p2 * c
    vy = py / m2p2 * c
    vz = pz / m2p2 * c
    
    return vx, vy, vz

@cuda.jit(device=True)
def param1(rx,ry,rz, rqx,rqy,rqz, vx,vy,vz):
    """
    calculate some parameters that could be reused
    r: position of field calculation
    rq: position of charge q at time t
    rq_tr: position of charge at retarded time tr
    """
    
    #
    # solution equation:
    # dt = t - tr
    # r where we want the field
    # rq position of charge
    # (c^2 - |v|^2) * dt^2 - 2(r*v - rq*v) * dt + 2 * r*rq - |r|^2 - |rq|^2
    #
    a = (c**2 - (vx**2 + vy**2 + vz**2))
    b = -2 * ((rx-rqx)*vx + (ry-rqy)*vy + (rz-rqz)*vz)
    d = 2 * (rx*rqx + ry*rqy + rz*rqz) - (rx**2 + ry**2 + rz**2) - (rqx**2 + rqy**2 + rqz**2)
    dt = (-b + math.sqrt(b**2 - 4*a*d)) / (2*a)
    
    rq_trx = rqx - vx * dt
    rq_try = rqy - vy * dt
    rq_trz = rqz - vz * dt

    dvx = rx - rq_trx
    dvy = ry - rq_try
    dvz = rz - rq_trz
    
    distance = math.sqrt(dvx**2 + dvy**2 + dvz**2)
    
    if distance == 0:
        distance = -1
    
    #n =  distanceVec / distance
    nx = dvx / distance
    ny = dvy / distance
    nz = dvz / distance
    
    return nx, ny, nz, distance, dvx, dvy, dvz #n, distance, distanceVec

@cuda.jit(device=True)
def retardedEFieldParallel(q, nx, ny, nz, distance, dvx, dvy, dvz, vx, vy, vz):
    """
    field for one position
    q: charge
    nx, ny, nz: unit vector to retarded position
    distance: distance to retarded position
    dvx, dvy, dvz: distance Vector to retarded position
    vx, vy, vz: velocity vector of the charge
    """
    
    if distance == -1:
        nx = 1
        ny = 0
        xz = 0
    
    factor = q / (4 * pi * eps0)
    
    ux = c * nx - vx
    uy = c * ny - vy
    uz = c * nz - vz
    

    scalar = factor * distance / (dvx*ux + dvy*uy + dvz*uz)**3 * (c**2 - (vx**2 + vy**2 + vz**2))
    
    return  scalar * ux, scalar * uy, scalar * uz



In [7]:
@cuda.jit
def particleParallel(Ex,Ey,Ez, Bx,By,Bz, q_, rqx_,rqy_,rqz_, px_,py_,pz_, mass_, weighting_, xdim, ydim, zdim, particleCount):
    """
    Ex,Ey,Ez E field where new field is added
    Bx,By,Bz B -"-
    
    _ is on all input arrays read from checkpoint 
    q_ array from checkpoint with charges
    rq_ position data of particles
    p_ momentum of particles
    
    xdim, ydim, zdim length of dimension of Ex,Bx, ...
    Ex/Bx need to be 1D for atomic add
    """
    tix = cuda.threadIdx.x
    bix = cuda.blockIdx.x
    bdx = cuda.blockDim.x
    
    index = tix + bix * bdx    # particle index
    index = index * nth
    
    if index < particleCount:
    
        q = q_[index] * weighting_[index]
        rqx = rqx_[index] * cell_width
        rqy = rqy_[index] * cell_height
        rqz = rqz_[index] * cell_depth
        px = px_[index]
        py = py_[index]
        pz = pz_[index]
        mass = mass_[index] * weighting_[index]

        for x in range(xdim):

            rx = x * cell_width
            rex = (x + 0.5) * cell_width

            for y in range(ydim):

                ry = y * cell_height
                rey = (y + 0.5) * cell_height

                for z in range(zdim):    

                    rz = z * cell_depth
                    rez = (z + 0.5) * cell_depth

                    vx, vy, vz = particleV(px, py, pz, mass)
                    #print(vx, vy, vz)

                    # x
                    nx, ny, nz, distancex, dvx, dvy, dvz = param1(rex, ry, rz, rqx, rqy, rqz, vx,vy,vz)
                    ex, _, _ = retardedEFieldParallel(q, nx, ny, nz, distancex, dvx, dvx, dvz, vx, vy, vz)

                    # y
                    nx, ny, nz, distancey, dvx, dvy, dvz = param1(rx, rey, rz, rqx, rqy, rqz, vx,vy,vz)
                    _, ey, _ = retardedEFieldParallel(q, nx, ny, nz, distancey, dvx, dvy, dvz, vx, vy, vz)

                    # z
                    nx, ny, nz, distancez, dvx, dvy, dvz = param1(rx, ry, rez, rqx, rqy, rqz, vx,vy,vz)
                    _, _, ez = retardedEFieldParallel(q, nx, ny, nz, distancez, dvx, dvy, dvz , vx, vy, vz)

                    nx, ny, nz, distancez, dvx, dvy, dvz = param1(rx, ry, rz, rqx, rqy, rqz, vx,vy,vz)
                    ex, ey, ez = retardedEFieldParallel(q, nx, ny, nz, distancez, dvx, dvy, dvz , vx, vy, vz)

                    fieldIndex = x + y * xdim + z * xdim * ydim

                    cuda.atomic.add(Ex, fieldIndex, ex * nth)
                    cuda.atomic.add(Ey, fieldIndex, ey * nth)
                    cuda.atomic.add(Ez, fieldIndex, ez * nth)

                    #nx, ny, nz, distancex, dvx, dvy, dvz = param1(rx, rey, rez, rqx, rqy, rqz, vx,vy,vz)
                    #ex, ey, ez = retardedEFieldParallel(q, nx, ny, nz, distancex, dvx, dvy, dvz, vx, vy, vz)
                    bx = ( ny * ez - nz * ey ) / c
                    
                    #nx, ny, nz, distancex, dvx, dvy, dvz = param1(rex, ry, rez, rqx, rqy, rqz, vx,vy,vz)
                    #ex, ey, ez = retardedEFieldParallel(q, nx, ny, nz, distancex, dvx, dvy, dvz, vx, vy, vz)
                    by = ( nz * ex - nx * ez ) / c
                    
                    #nx, ny, nz, distancex, dvx, dvy, dvz = param1(rex, rey, rz, rqx, rqy, rqz, vx,vy,vz)
                    #ex, ey, ez = retardedEFieldParallel(q, nx, ny, nz, distancex, dvx, dvy, dvz, vx, vy, vz)
                    bz = ( nx * ey - ny * ex ) / c

                    cuda.atomic.add(Bx, fieldIndex, bx)
                    cuda.atomic.add(By, fieldIndex, by)
                    cuda.atomic.add(Bz, fieldIndex, bz)

In [8]:
@cuda.jit
def FieldParallel(Ex,Ey,Ez, Bx,By,Bz, q_, rqx_,rqy_,rqz_, px_,py_,pz_, mass_, weighting_, xdim, ydim, zdim, particleCount):
    """
    Ex,Ey,Ez E field where new field is added
    Bx,By,Bz B -"-
    
    _ is on all input arrays read from checkpoint 
    q_ array from checkpoint with charges
    rq_ position data of particles
    p_ momentum of particles
    
    xdim, ydim, zdim length of dimension of Ex,Bx, ...
    Ex/Bx need to be 1D for atomic add
    """
    tix = cuda.threadIdx.x
    bix = cuda.blockIdx.x
    bdx = cuda.blockDim.x
    
    fieldIndex = tix + bix * bdx    # field index
    
    #fieldIndex = x + y * xdim + z * xdim * ydim
    z = math.floor( fieldIndex / (xdim*ydim) )
    y = math.floor( (fieldIndex - z * xdim*ydim) / xdim )
    x = fieldIndex - z * xdim*ydim - y * xdim
    
    #print(fieldIndex)
    #print(x,y,z)
    
    if fieldIndex < xdim*ydim*zdim:
        
        #for index in range(particleCount):
        if True:
            index = 0
        
            q = q_[index] * weighting_[index]
            rqx = rqx_[index] * cell_width
            rqy = rqy_[index] * cell_height
            rqz = rqz_[index] * cell_depth
            px = px_[index]
            py = py_[index]
            pz = pz_[index]
            mass = mass_[index] * weighting_[index]
            
            rx = x * cell_width
            rex = (x + 0.5) * cell_width
            
            ry = y * cell_height
            rey = (y + 0.5) * cell_height
            
            rz = z * cell_depth
            rez = (z + 0.5) * cell_depth


            vx, vy, vz = particleV(px, py, pz, mass)
            

            # x
            nx, ny, nz, distancex, dvx, dvy, dvz = param1(rex, ry, rz, rqx, rqy, rqz, vx,vy,vz)
            ex, _, _ = retardedEFieldParallel(q, nx, ny, nz, distancex, dvx, dvx, dvz, vx, vy, vz)

            # y
            nx, ny, nz, distancey, dvx, dvy, dvz = param1(rx, rey, rz, rqx, rqy, rqz, vx,vy,vz)
            _, ey, _ = retardedEFieldParallel(q, nx, ny, nz, distancey, dvx, dvy, dvz, vx, vy, vz)

            # z
            nx, ny, nz, distancez, dvx, dvy, dvz = param1(rx, ry, rez, rqx, rqy, rqz, vx,vy,vz)
            _, _, ez = retardedEFieldParallel(q, nx, ny, nz, distancez, dvx, dvy, dvz , vx, vy, vz)

            nx, ny, nz, distancez, dvx, dvy, dvz = param1(rx, ry, rz, rqx, rqy, rqz, vx,vy,vz)
            ex, ey, ez = retardedEFieldParallel(q, nx, ny, nz, distancez, dvx, dvy, dvz , vx, vy, vz)

            cuda.atomic.add(Ex, fieldIndex, ex * nth)
            cuda.atomic.add(Ey, fieldIndex, ey * nth)
            cuda.atomic.add(Ez, fieldIndex, ez * nth)
    
            #nx, ny, nz, distancex, dvx, dvy, dvz = param1(rx, rey, rez, rqx, rqy, rqz, vx,vy,vz)
            #ex, ey, ez = retardedEFieldParallel(q, nx, ny, nz, distancex, dvx, dvy, dvz, vx, vy, vz)
            bx = ( ny * ez - nz * ey ) / c

            #nx, ny, nz, distancex, dvx, dvy, dvz = param1(rex, ry, rez, rqx, rqy, rqz, vx,vy,vz)
            #ex, ey, ez = retardedEFieldParallel(q, nx, ny, nz, distancex, dvx, dvy, dvz, vx, vy, vz)
            by = ( nz * ex - nx * ez ) / c

            #nx, ny, nz, distancex, dvx, dvy, dvz = param1(rex, rey, rz, rqx, rqy, rqz, vx,vy,vz)
            #ex, ey, ez = retardedEFieldParallel(q, nx, ny, nz, distancex, dvx, dvy, dvz, vx, vy, vz)
            bz = ( nx * ey - ny * ex ) / c

            cuda.atomic.add(Bx, fieldIndex, bx)
            cuda.atomic.add(By, fieldIndex, by)
            cuda.atomic.add(Bz, fieldIndex, bz)
            

In [9]:
species = "b"

xpos_incell = iteration.particles[species]["position"]["x"][:]
ypos_incell = iteration.particles[species]["position"]["y"][:]
zpos_incell = iteration.particles[species]["position"]["z"][:]
xpos_offset = iteration.particles[species]["positionOffset"]["x"][:]
ypos_offset = iteration.particles[species]["positionOffset"]["y"][:]
zpos_offset = iteration.particles[species]["positionOffset"]["z"][:]
momentumx = iteration.particles[species]["momentum"]["x"][:]
momentumy = iteration.particles[species]["momentum"]["y"][:]
momentumz = iteration.particles[species]["momentum"]["z"][:]
weightings = iteration.particles[species]["weighting"][io.Record_Component.SCALAR][:]
charge = iteration.particles[species]["charge"][io.Record_Component.SCALAR][:]
mass = iteration.particles[species]["mass"][io.Record_Component.SCALAR][:]

series.flush()

particleCount = len(mass)
xpos = xpos_incell + np.float32(xpos_offset)
ypos = ypos_incell + np.float32(ypos_offset)
zpos = zpos_incell + np.float32(zpos_offset)

# free some memory
del xpos_incell, ypos_incell, zpos_incell
del xpos_offset, ypos_offset, zpos_offset

In [11]:
print(particleCount)
print(iteration.meshes["E"]["x"][:].shape)

4308593
(192, 512, 192)


In [None]:
fieldShape = iteration.meshes["E"]["x"][:].shape
xdim = fieldShape[2]
ydim = fieldShape[1]
zdim = fieldShape[0]

print("Cells (x, y, z, total): (", xdim, ydim, zdim, xdim*ydim*zdim, ")")

blockdim = 32#256                                        # number of threads per block (multiple 32 for optimal speed)
griddim = (int) (np.ceil(particleCount / blockdim / nth))   # number of blocks in the grid

# for test on a smaller number of particles
#blockdim = 1
#griddim = 1

print("particles to be processed:", blockdim*griddim)

shape = xdim * ydim * zdim
ex = np.zeros(shape=shape, dtype=np.float32)
ey = np.zeros(shape=shape, dtype=np.float32)
ez = np.zeros(shape=shape, dtype=np.float32)
bx = np.zeros(shape=shape, dtype=np.float32)
by = np.zeros(shape=shape, dtype=np.float32)
bz = np.zeros(shape=shape, dtype=np.float32)

print("start time:", time.ctime())
starttime = time.time()
particleParallel[griddim, blockdim](ex, ey, ez, bx, by, bz, charge, xpos, ypos, zpos, momentumx, momentumy, momentumz, mass, weightings, xdim, ydim, zdim, particleCount)
exeTime = time.time()-starttime
print("time: ", exeTime, "s")
print("avgTime per particle per cell:", exeTime / (blockdim*griddim) / (xdim*ydim*zdim))

if write:
    iteration.meshes["E"]["x"].store_chunk(ex.reshape(zdim, ydim, xdim))
    iteration.meshes["E"]["y"].store_chunk(ey.reshape(zdim, ydim, xdim))
    iteration.meshes["E"]["z"].store_chunk(ez.reshape(zdim, ydim, xdim))
    iteration.meshes["B"]["x"].store_chunk(bx.reshape(zdim, ydim, xdim))
    iteration.meshes["B"]["y"].store_chunk(by.reshape(zdim, ydim, xdim))
    iteration.meshes["B"]["z"].store_chunk(bz.reshape(zdim, ydim, xdim))
    series.flush()
    print("wrote to checkpoint")
else:
    print("no data was written to the checkpoint")

In [None]:
# manual write
if write == False:
    iteration.meshes["E"]["x"].store_chunk(ex.reshape(zdim, ydim, xdim))
    iteration.meshes["E"]["y"].store_chunk(ey.reshape(zdim, ydim, xdim))
    iteration.meshes["E"]["z"].store_chunk(ez.reshape(zdim, ydim, xdim))
    iteration.meshes["B"]["x"].store_chunk(bx.reshape(zdim, ydim, xdim))
    iteration.meshes["B"]["y"].store_chunk(by.reshape(zdim, ydim, xdim))
    iteration.meshes["B"]["z"].store_chunk(bz.reshape(zdim, ydim, xdim))
    series.flush()
    print("data written")

In [None]:
del series

In [None]:
ef = np.sqrt(ex**2+ey**2+ez**2)
bf = np.sqrt(bx**2+by**2+bz**2)

In [None]:
ef

In [None]:
figsize(10, 10)
#for i in np.arange(100,251,25, dtype=np.int32):
imshow((ef.reshape(zdim, ydim, xdim)[650, :, :].T))#, vmin=0.000, vmax=0.001)
colorbar()
show()

In [None]:
q = np.array([1.0])
rqx = np.array([384.])
rqy = np.array([450.])
rqz = np.array([384.])
px = np.array([0.0])
py = np.array([8.42e-2])
pz = np.array([0.0])
mass = np.array([5.72455701e-06])
weighting = np.array([3e3])

In [None]:
#FieldParallel(Ex,Ey,Ez, Bx,By,Bz, q_, rqx_,rqy_,rqz_, px_,py_,pz_, mass_, weighting_, xdim, ydim, zdim, particleCount)
fieldShape = iteration.meshes["E"]["x"][:].shape
xdim = fieldShape[2]
ydim = fieldShape[1]
zdim = fieldShape[0]

shape = xdim*ydim*zdim
dtype = np.float32

ex = np.zeros(shape, dtype=dtype)
ey = np.zeros(shape, dtype=dtype)
ez = np.zeros(shape, dtype=dtype)
bx = np.zeros(shape, dtype=dtype)
by = np.zeros(shape, dtype=dtype)
bz = np.zeros(shape, dtype=dtype)

ex_d = cuda.to_device(ex)
ey_d = cuda.to_device(ey)
ez_d = cuda.to_device(ez)
bx_d = cuda.to_device(bx)
by_d = cuda.to_device(by)
bz_d = cuda.to_device(bz)

particleCount = 1

threads = 256
blocks = math.ceil(shape / threads)

print(blocks, threads)
print(time.ctime())
starttime = time.time()

#FieldParallel[blocks, threads](ex,ey,ez, bx,by,bz, q, rqx,rqy,rqz, px,py,pz, mass, weightings, xdim,ydim,zdim, particleCount)
FieldParallel[4194304, 256](ex_d,ey_d,ez_d, bx_d,by_d,bz_d, q, rqx,rqy,rqz, px,py,pz, mass, weighting, xdim,ydim,zdim, particleCount)
print(time.time()-starttime,"s")

In [None]:
time.ctime()#

In [None]:
fieldShape = iteration.meshes["E"]["x"][:].shape
xdim = 768#fieldShape[2]
ydim = 1024#fieldShape[1]
zdim = 768#fieldShape[0]
shape = xdim*ydim*zdim

dtype = np.float32

ex = np.zeros(shape, dtype=dtype)
ey = np.zeros(shape, dtype=dtype)
ez = np.zeros(shape, dtype=dtype)
bx = np.zeros(shape, dtype=dtype)
by = np.zeros(shape, dtype=dtype)
bz = np.zeros(shape, dtype=dtype)

ex_d = cuda.to_device(ex)
ey_d = cuda.to_device(ey)
ez_d = cuda.to_device(ez)
bx_d = cuda.to_device(bx)
by_d = cuda.to_device(by)
bz_d = cuda.to_device(bz)

particleCount = 1
starttime = time.time()
FieldParallel[4194304, 256](ex_d,ey_d,ez_d, bx_d,by_d,bz_d, q, rqx,rqy,rqz, px,py,pz, mass, weighting, xdim,ydim,zdim, particleCount)
print("{:.4} s".format(time.time()-starttime))


In [None]:
## if the kernel dies here, start the jupyter session with more memory
# floatSize = 4 # 4 for float32, 8 for float64
# print("min.", shape * 6 * floatSize , "GB for field arrays")
ex_d.copy_to_host(ex)
ey_d.copy_to_host(ey)
ez_d.copy_to_host(ez)
bx_d.copy_to_host(bx)
by_d.copy_to_host(by)
bz_d.copy_to_host(bz)


In [None]:
ex_d

In [None]:
del ex_d
del ey_d
del ez_d
del bx_d
del by_d
del bz_d

In [None]:
del ex
del ey
del ez
del bx
del by
del bz

In [None]:
ef = np.sqrt(ex**2+ey**2+ez**2)
bf = np.sqrt(bx**2+by**2+bz**2)

In [None]:
figsize(15,15)
imshow(np.log((bf.reshape(zdim,ydim,xdim).T[:,440])))#, vmin = 0, vmax = 1)
colorbar()

In [None]:
species = "b"

iteration.particles[species]["position"]["x"][:] = np.array([0.0])
iteration.particles[species]["position"]["y"][:] = np.array([0.0])
iteration.particles[species]["position"]["z"][:] = np.array([0.0])
iteration.particles[species]["positionOffset"]["x"][:] = rqx
iteration.particles[species]["positionOffset"]["y"][:] = rqy
iteration.particles[species]["positionOffset"]["z"][:] = rqz
iteration.particles[species]["momentum"]["x"][:] = px
iteration.particles[species]["momentum"]["y"][:] = py
iteration.particles[species]["momentum"]["z"][:] = pz
iteration.particles[species]["weighting"][io.Record_Component.SCALAR][:] = weighting
iteration.particles[species]["charge"][io.Record_Component.SCALAR][:] = q
iteration.particles[species]["mass"][io.Record_Component.SCALAR][:] = mass
series.flush()

In [None]:
iteration.particles[species]["position"]["x"].store_chunk(np.array([0.0],dtype=np.float32))

In [None]:
test = iteration.particles[species]["position"]["x"][:]
series.flush()

In [None]:
test

In [None]:
iteration.particles[species]['position'].delete_attribute('x')

In [None]:
iteration.particles[species].delete_attribute('position')

In [None]:
dir(io.ParticleSpecies)

In [None]:
for i in iteration.particles[species].attributes:
    print(i)

In [None]:
figsize(10,10)
contour(ef.reshape(zdim,ydim,xdim)[150].T)

In [None]:
hist(xpos)