In [1]:
from pycuda import gpuarray
import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
%load_ext autoreload
%autoreload 1

In [73]:
ker = SourceModule("""
#define _x  ( threadIdx.x + blockIdx.x * blockDim.x )
#define _y  ( threadIdx.y + blockIdx.y * blockDim.y )
#define _z  ( threadIdx.z + blockIdx.z * blockDim.z )
#define _width  ( blockDim.x * gridDim.x )
#define _height ( blockDim.y * gridDim.y  )
#define _depth  ( blockDim.z * gridDim.z  )
#define _xm(x)  ( (x + _width) % _width )
#define _ym(y)  ( (y + _height) % _height )
#define _zm(z)  ( (z + _depth) % _depth )
#define _index(x,y,z)  ( _zm(z)  + _depth * (_ym(y) + _xm(x) * _height) )

__global__ void poisson(float* V, int iters, int imax, int jmax, int kmax) {
    //int x = _x, y = _y, z = _z;
    for (int kk=0; kk<iters; kk++) {
        for (int x = _x; x < imax+1; x += _width) {
            for (int y = _y; y < jmax+1; y += _height) {
                for (int z = _z; z < kmax-1; z += _depth) {
                    float r = 0;
                    if (x >= 1  && y >= 1 && z >= 1 ) {
                            r = V[_index(x+1,y,z)]
                                     +V[_index(x-1,y,z)]
                                     +V[_index(x,y+1,z)]
                                     +V[_index(x,y-1,z)]
                                     +V[_index(x,y,z+1)]
                                     +V[_index(x,y,z-1)];
                            r = r/6.0-V[_index(x,y,z)]/6.0;
                    }
                    __syncthreads();
                    V[_index(x,y,z)]=r;                   
                }
            }
        }
    }
}
__global__ void test(float* V) {
    int x = _x, y = _y, z = _z;
    V[_index(x,y,z)] = _index(x,y,z);
}
""")


poisson_ker = ker.get_function("poisson")
test_ker    = ker.get_function("test")

In [74]:
v = np.zeros((8, 8, 8)).astype(np.float32)
v_gpu = gpuarray.to_gpu(v)
test_ker(v_gpu, grid=(1,1,1), block=(8,8,8))
v = v_gpu.get()
for i in range(511):
    unravelled_index = np.unravel_index(i, v.shape)
    assert i == int(v[unravelled_index])

In [75]:
def python_poisson(V, iters, imax, jmax, kmax):
    R = V.copy()
    for kk in range(iters):
        for x in range(1, imax+1):
            for y in range(1, jmax+1):
                for z in range(1, kmax-1):
                    r = V[x+1,y,z]\
                             +V[x-1,y,z]\
                             +V[x,y+1,z]\
                             +V[x,y-1,z]\
                             +V[x,y,z+1]\
                             +V[x,y,z-1]
                    r = r/6.0-V[x,y,z]/6.0;
                    R[x,y,z]=r
        V[:] = R[:]
    return R

In [76]:
imax = jmax = kmax = 30
V_orig = np.random.rand(imax+2, jmax+2, kmax+2).astype(np.float32) * 10
V_orig[0] = 0
V_orig[:,0,:] = 0
V_orig[:,:,0] = 0
V_orig[-1] = 0
V_orig[:,-1,:] = 0
V_orig[:,:,-1] = 0
g = np.random.rand(imax+2, jmax+2, kmax+2).astype(np.float32) * 10

In [77]:
V = V_orig.copy()
V = python_poisson(V, 2, imax, jmax, kmax)

In [78]:
V_gpu = gpuarray.to_gpu(V_orig.copy())
g_gpu = gpuarray.to_gpu(g)
poisson_ker(V_gpu, np.int32(2), np.int32(imax), np.int32(jmax), np.int32(kmax), grid=(1,1,32), block=(32, 32, 1))

In [79]:
max_index = np.argmax(np.abs(V_gpu.get() - V))
unravelled_index = np.unravel_index(max_index, V.shape)

In [80]:
print(V[unravelled_index])
print(V_gpu.get()[unravelled_index])
np.allclose(V_gpu.get(), V)

1.3628291
9.867972


False

In [15]:
unravelled_index

(27, 15, 16)

In [173]:
V_gpu.get()[unravelled_index]

4.425444

In [175]:
V[unravelled_index]

1.9709917

In [181]:
len(np.where(np.abs(V_gpu.get() - V) > 1)[0])

739