# Element-wise update

<a href="https://colab.research.google.com/github/mark-hobbs/articles/blob/main/cuda/element-wise-update.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Update particle positions

In [1]:
!uv pip install -q --system numba-cuda==0.15.0

import numpy as np
from numba import  njit, prange, config
config.CUDA_ENABLE_PYNVJITLINK = 1

try:
    import google.colab
    !git clone https://github.com/mark-hobbs/articles.git
    import os
    os.chdir('articles/cuda')  # Navigate to the cuda subdirectory
except ImportError:
    pass  # Already local, no need to clone

import utils

In [2]:
np.random.seed(42)

n_particles = 1500000
n_dimensions = 2

f = np.random.randn(n_particles, n_dimensions)
u = np.zeros((n_particles, n_dimensions))
v = np.zeros((n_particles, n_dimensions))
a = np.zeros((n_particles, n_dimensions))
bc_flag = np.ones((n_particles, n_dimensions))
bc_unit_vector = np.ones((n_particles, n_dimensions))
bc_magnitude = 1
density = 1.0
damping = 1.0
dt = 1.0

In [3]:
@utils.profile(runs=10)
def euler_cromer_a(
    f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt
):
    n_nodes = np.shape(f)[0]
    n_dimensions = np.shape(f)[1]

    for node_i in range(n_nodes):
        for dof in range(n_dimensions):
            a[node_i, dof] = (f[node_i, dof] - damping * v[node_i, dof]) / density
            v[node_i, dof] += (a[node_i, dof] * dt)
            u[node_i, dof] += (v[node_i, dof] * dt)

            if bc_flag[node_i, dof] != 0:
                u[node_i, dof] = bc_magnitude * bc_unit_vector[node_i, dof]

    return u, v

In [4]:
@utils.profile(runs=10)
def euler_cromer_b(
    f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt
):
    a[:] = (f - damping * v) / density
    v += a * dt
    u += v * dt

    mask = bc_flag != 0
    u[mask] = bc_magnitude * bc_unit_vector[mask]

    return u, v

In [5]:
@utils.profile(runs=10)
@njit(parallel=True)
def euler_cromer_c(
    f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt
):
    n_nodes = np.shape(f)[0]
    n_dimensions = np.shape(f)[1]

    for node_i in prange(n_nodes):
        for dof in range(n_dimensions):
            a[node_i, dof] = (f[node_i, dof] - damping * v[node_i, dof]) / density
            v[node_i, dof] += (a[node_i, dof] * dt)
            u[node_i, dof] += (v[node_i, dof] * dt)

            if bc_flag[node_i, dof] != 0:
                u[node_i, dof] = bc_magnitude * bc_unit_vector[node_i, dof]

    return u, v

In [6]:
euler_cromer_a(f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt);
euler_cromer_b(f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt);
euler_cromer_c(f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt);

Function 'euler_cromer_a' executed 10 time(s)
Average execution time: 2.8272 seconds
Min: 2.7299s, Max: 2.8974s

Function 'euler_cromer_b' executed 10 time(s)
Average execution time: 0.0159 seconds
Min: 0.0155s, Max: 0.0174s

Function 'euler_cromer_c' executed 10 time(s)
Average execution time: 0.0301 seconds
Min: 0.0014s, Max: 0.2879s



## Numba CUDA

In [7]:
from numba import cuda, float32

In [8]:
utils.get_cuda_device_info()

Error retrieving CUDA device information: Error at driver init: 

CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBA_CUDA_DRIVER
with the file path of the CUDA driver shared library.
:


In [9]:
@utils.profile(runs=10)
def euler_cromer_gpu(
    f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt
):
    """
    Update particle positions using an Euler-Cromer time integration scheme

    This function is a wrapper for the CUDA kernel `euler_cromer_kernel`
    """
    BLOCKS_PER_GRID = f.shape[0]
    THREADS_PER_BLOCK = 256
    euler_cromer_kernel[BLOCKS_PER_GRID, THREADS_PER_BLOCK](
        f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt
    )


@cuda.jit
def euler_cromer_kernel(
    f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt
):
    """
    CUDA kernel for Euler-Cromer time integration scheme
    """
    n_nodes = f.shape[0]
    n_dimensions = f.shape[1]

    idx = cuda.grid(1)
    total = n_nodes * n_dimensions

    if idx < total:
        node_i = idx // n_dimensions
        dof = idx % n_dimensions

        a[node_i, dof] = (f[node_i, dof] - damping * v[node_i, dof]) / density
        v[node_i, dof] += a[node_i, dof] * dt
        u[node_i, dof] += v[node_i, dof] * dt

        if bc_flag[node_i, dof] != 0:
            u[node_i, dof] = bc_magnitude * bc_unit_vector[node_i, dof]

In [10]:
# TODO: cuda.to_device()

euler_cromer_gpu(f, u, v, a, density, bc_flag, bc_magnitude, bc_unit_vector, damping, dt)

CudaSupportError: Error at driver init: 

CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBA_CUDA_DRIVER
with the file path of the CUDA driver shared library.
: