In [1]:
from pycuda import gpuarray
import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
%load_ext autoreload
%autoreload 1

In [38]:
ker = SourceModule("""
#define _x (threadIdx.x + blockIdx.x * blockDim.x)
#define _y (threadIdx.y + blockIdx.y * blockDim.y)

#define _width (blockDim.x * gridDim.x) //total number of threads in dimension 0
#define _height (blockDim.y * gridDim.y) //total number of threads in dimension 1

#define _xm(x) ( (x + _width) % _width )
#define _ym(y) ( (y + _height) % _height )

#define _index(x, y) (_xm(x) + _ym(y) * _width)

__device__ int nbrs(int x, int y, int * in) {
    return in[_index(x-1, y-1)] + in[_index(x-1, y)] + in[_index(x-1, y+1)] \
           + in[_index(x, y-1)] + in[_index(x, y+1)] \
           + in[_index(x+1, y-1)] + in[_index(x+1, y)] + in[_index(x+1, y+1)];
}

__global__ void conway_ker(int * lattice, int iters) {
    int x = _x, y = _y;
    for (int i = 0; i < iters; i++) {
        int n = nbrs(x, y, lattice);
        int cell_value;
        if (lattice[_index(x, y)] == 1)
            switch (n) {
                case 2:
                case 3: cell_value = 1;
                        break;
                default: cell_value = 0;
            }
        else if (lattice[_index(x, y)] == 0 )
            switch(n) {
                case 3: cell_value = 1;
                        break;
                default: cell_value = 0;
            }
        __syncthreads();
        lattice[_index(x, y)] = cell_value;
        __syncthreads();
    }
}
""")
conway_ker = ker.get_function("conway_ker")

In [43]:
def update_gpu(lattice_gpu, N):
    iters = int(300)
    conway_ker(lattice_gpu, iters, grid=(N//32, N//32, 1), block=(32, 32, 1))
    plt.imshow(lattice_gpu.get())
    plt.show()

In [44]:
N = int(32 * 32)
lattice = np.random.choice([1, 0], N*N, p=[.25, .75]).reshape(N, N).astype('int32')
lattice_gpu = gpuarray.to_gpu(lattice)
update_gpu(lattice_gpu, N)

TypeError: invalid type on parameter #1 (0-based)