In [68]:
import numpy as np
import pycuda.driver as drv
import pycuda.autoinit
from pycuda.compiler import SourceModule
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import matplotlib as mpl

In [None]:
GPUkernel = """
#include <math.h>

__global__ void render(float *disp, int img_w, int img_h, float time)
{
    int row = blockIdx.y * blockDim.y + threadIdx.y;
    int col = blockIdx.x * blockDim.x + threadIdx.x;

    if(row < img_h && col < img_w)
    {
        int pix_location = row * img_w * 3 + col * 3;

        float x = float(col) / img_w;
        float y = float(row) / img_h;

        float dx1 = x - sin(time*0.5)*0.5;
        float dy1 = y - cos(time*0.3)*0.5;
        float wave1 = sin(sqrt(dx1*dx1 + dy1*dy1)*20 - time*5);
        
        float dx2 = x + sin(time*0.7)*0.5;
        float dy2 = y + cos(time*0.4)*0.5;
        float wave2 = sin(sqrt(dx2*dx2 + dy2*dy2)*18 - time*6);
        
        float interference = (wave1 + wave2) * 0.5;
        
        disp[pix_location + 0] = interference*interference;
        disp[pix_location + 1] = fabs(interference);
        disp[pix_location + 2] = 0.5 + 0.5*sin(interference*3.14 + time);
    }
}

"""

In [70]:
module = SourceModule(GPUkernel)
render = module.get_function("render")

img_w = 1920
img_h = 1080

n_pix = img_w * img_h
disp = np.zeros(3 * n_pix, dtype=np.float32)

threads = (16, 16, 1)
blocks = ((threads[0] + img_w - 1) // threads[0], (threads[1] + img_h - 1) // threads[1])

In [None]:
fig = plt.figure(figsize=(8, 6))
ax = plt.axes()
img_plot = ax.imshow(np.zeros((img_h, img_w, 3)))

def animate(frame):
    time = frame * 0.1
    render(drv.Out(disp), np.int32(img_w), np.int32(img_h), np.float32(time), block=threads, grid=blocks)
    img_data = disp.reshape(img_h, img_w, 3)
    img_plot.set_array(img_data)
    return [img_plot]

res = FuncAnimation(fig, animate, frames=50, interval=50, blit=True)

In [72]:
res.save('/mnt/d/CUDA/day35/cuda_animation.gif', writer='pillow', fps=20)