In [1]:
from pycuda import gpuarray
import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
import numpy as np
import matplotlib.pyplot as plt
import time
from IPython.display import clear_output
%load_ext autoreload
%autoreload 1

In [2]:
##important note: make sure to convert EVERYTHING into numpy numbers otherwise you're screwed
imax = np.int32(16)
jmax = np.int32(16)
kmax = np.int32(16)
n1 = np.int32(imax+3)
n2 = np.int32(jmax+3)
n3 = np.int32(kmax+3)
qi = np.float32(1.6E-19)
qe = np.float32(-1.6E-19)
kr = np.float32(0)
ki = np.float32(0)
si = np.float32(0)
alpha = np.float32(0)
q     = np.float32(1.6E-19)
pie   = np.float32(3.14159)
tmax  = np.float32(200)


Kb    = np.float32(1.38E-23)
B     = np.float32(0.5)
Te    = np.float32(2.5*11604.5)
Ti    = np.float32(0.025*11604.5)
me    = np.float32(9.109E-31)
mi    = np.float32(6.633E-26)
ki    = np.float32(0.0)
dt    = np.float32(1.0E-14)
h     = np.float32(4.0E-4)
eps0  = np.float32(8.854E-12)
si    = np.float32(0.0)
sf    = np.float32(0.0)

N=np.int32(n1*n2*n3)
iterations = np.int32(40);
tmax = np.int32(1000)

nn  =np.float32(10.0/(Kb*Ti)) #neutral density=p/(Kb.T)
nue =np.float32(nn*1.1E-19*np.sqrt(2.55*Kb*Te/me)) # electron collision frequency= neutral density * sigma_e*Vth_e
nui =np.float32(nn*4.4E-19*np.sqrt(2.55*Kb*Ti/mi))
wce =np.float32(q*B/me)
wci =np.float32(q*B/mi)
mue =np.float32(q/(me*nue))
mui =np.float32(q/(mi*nui))
dife=np.float32(Kb*Te/(me*nue))
difi=np.float32(Kb*Ti/(mi*nui))
ki=np.float32(0.00002/(nn*dt))
denominator_e= np.float32((1+wce*wce/(nue*nue)))
denominator_i= np.float32(1+wci*wci/(nui*nui))
# Ta and W are just some constants needed for the iterative method that we have used to solve Poisson eq.
Ta=np.float32(np.arccos((np.cos(pie/imax)+np.cos(pie/jmax)+np.cos(pie/kmax))/3.0))# needs to be float checked
w=np.float32(2.0/(1.0+np.sin(Ta)))
# -----------------------------------------------------------------------------------------------
#Density initialization
# To add multiple Gaussian sources, just simply use the density_initialization function at the (x,y) points that you want
x_position = 15; y_position = 15; z_position = 15;



In [3]:
new_ker = SourceModule("""
__global__ void poisson_solve_1it_cu(float *V, float *g, float *R, int imax, int jmax, int kmax, int n1, int n2, int n3, int N, float w, float h, int oddEven) {
    int index_x = threadIdx.x + blockDim.x * blockIdx.x;
    //int stride_x = blockDim.x * gridDim.x;
    int I = index_x;
        //for (int I = index_x; I < N; I +=stride_x) {
            int k = I % n3;
            int s1 = (I - k) / n3;
            int j = s1 % n2;
            int i = (s1 - j) / n2;
            if (i * j * k == 0 || i >= imax-1 || j >= jmax-1 || k >= kmax-1) return;
            if ((i+j+k) % 2 == oddEven) return;
            float r =
                (V[k + n3 * (j + n2 * (i+1))]+
                     V[k + n3 * (j + n2 * (i-1))]+
                     V[k + n3 * (j+1 + n2 * (i))]+
                     V[k + n3 * (j-1 + n2 * (i))]+
                     V[k+1 + n3 * (j + n2 * (i))]+
                     V[k-1 + n3 * (j + n2 * (i))]
                 ) / 6.0 - V[k + n3 * (j + n2 * (i))]- (h*h)*g[k + n3 * (j + n2 * (i))]/6.0;
            V[k + n3 * (j + n2 * (i))] = V[k + n3 * (j + n2 * (i))] + w*r;
        //}
}
""")
gpu_poisson_out_of_place_odd_even = new_ker.get_function("poisson_solve_1it_cu")

In [3]:
new_ker = SourceModule("""
__global__ void poisson_solve_1it_cu(float *V, float *g, float *R, int imax, int jmax, int kmax, int n1, int n2, int n3, int N, float w, float h) {
    int index_x = threadIdx.x + blockDim.x * blockIdx.x;
    //int stride_x = blockDim.x * gridDim.x;
    int I = index_x;
        //for (int I = index_x; I < N; I +=stride_x) {
            int k = I % n3;
            int s1 = (I - k) / n3;
            int j = s1 % n2;
            int i = (s1 - j) / n2;
            if (i * j * k == 0 || i >= imax-1 || j >= jmax-1 || k >= kmax-1) return;
            float r =
                (V[k + n3 * (j + n2 * (i+1))]+
                     V[k + n3 * (j + n2 * (i-1))]+
                     V[k + n3 * (j+1 + n2 * (i))]+
                     V[k + n3 * (j-1 + n2 * (i))]+
                     V[k+1 + n3 * (j + n2 * (i))]+
                     V[k-1 + n3 * (j + n2 * (i))]
                 ) / 6.0 - V[k + n3 * (j + n2 * (i))]- (h*h)*g[k + n3 * (j + n2 * (i))]/6.0;
            V[k + n3 * (j + n2 * (i))] = V[k + n3 * (j + n2 * (i))] + w*r;
        //}
}
""")
gpu_poisson_out_of_place_odd_even = new_ker.get_function("poisson_solve_1it_cu")

In [4]:
def python_poisson_out_of_place_odd_even(V, g, R, imax, jmax, kmax, n1, n2, n3, N, w, h, oddEven):
    R = V.copy()
    for I in range(N):
        k = I % n3;
        s1 = (I - k) // n3
        j = s1 % n2;
        i = (s1 - j) // n2
        if (i * j * k == 0 or i >= imax-1 or j >= jmax-1 or k >= kmax-1 or (i+j+k)%2==oddEven):
            continue
        r = (V[k + n3 * (j + n2 * (i+1))]+
                 V[k + n3 * (j + n2 * (i-1))]+
                 V[k + n3 * (j+1 + n2 * (i))]+
                 V[k + n3 * (j-1 + n2 * (i))]+
                 V[k+1 + n3 * (j + n2 * (i))]+
                 V[k-1 + n3 * (j + n2 * (i))]
             ) / 6.0 - V[k + n3 * (j + n2 * (i))]- \
                (h*h)*g[k + n3 * (j + n2 * (i))]/6.0;
        V[k + n3 * (j + n2 * (i))] = V[k + n3 * (j + n2 * (i))] + w*r;

In [9]:
def python_poisson_out_of_place(V, g, R, imax, jmax, kmax, n1, n2, n3, N, w, h):
    R = V.copy()
    for I in range(N):
        k = I % n3;
        s1 = (I - k) // n3
        j = s1 % n2;
        i = (s1 - j) // n2
        if (i * j * k == 0 or i >= imax-1 or j >= jmax-1 or k >= kmax-1):
            continue
        r = (V[k + n3 * (j + n2 * (i+1))]+
                 V[k + n3 * (j + n2 * (i-1))]+
                 V[k + n3 * (j+1 + n2 * (i))]+
                 V[k + n3 * (j-1 + n2 * (i))]+
                 V[k+1 + n3 * (j + n2 * (i))]+
                 V[k-1 + n3 * (j + n2 * (i))]
             ) / 6.0 - V[k + n3 * (j + n2 * (i))]- \
                (h*h)*g[k + n3 * (j + n2 * (i))]/6.0;
        V[k + n3 * (j + n2 * (i))] = V[k + n3 * (j + n2 * (i))] + w*r;

In [5]:
original_g = np.random.rand(N).astype(np.float32) * 10
original_V = np.random.rand(N).astype(np.float32) * 10
original_R = np.random.rand(N).astype(np.float32)

In [6]:
cpu_g = original_g.copy()
cpu_V = original_V.copy()
cpu_R = original_R.copy()
for i in range(iterations):
    python_poisson_out_of_place_odd_even(cpu_V, cpu_g, cpu_R, imax, jmax, kmax, n1, n2, n3, N, w, h, 0)
    python_poisson_out_of_place_odd_even(cpu_V, cpu_g, cpu_R, imax, jmax, kmax, n1, n2, n3, N, w, h, 1)

In [7]:
gpu_g = gpuarray.to_gpu(original_g)
gpu_V = gpuarray.to_gpu(original_V)
gpu_R = gpuarray.to_gpu(original_R)
for i in range(iterations):
    gpu_poisson_out_of_place_odd_even(gpu_V, gpu_g, gpu_R, imax, jmax, kmax, n1, n2, n3, N, w, h, np.int32(0)
                         , grid=(int(N)//512, 1, 1), block=(512, 1, 1))
    gpu_poisson_out_of_place_odd_even(gpu_V, gpu_g, gpu_R, imax, jmax, kmax, n1, n2, n3, N, w, h, np.int32(1)
                         , grid=(int(N)//512, 1, 1), block=(512, 1, 1))

In [8]:
np.allclose(cpu_V, gpu_V.get())

True

In [11]:
cpu_g = original_g.copy()
cpu_V = original_V.copy()
cpu_R = original_R.copy()
for i in range(iterations):
    python_poisson_out_of_place(cpu_V, cpu_g, cpu_R, imax, jmax, kmax, n1, n2, n3, N, w, h)

In [12]:
np.allclose(cpu_V, gpu_V.get())

False