In [1]:
using EtherArrays
using StaticArrays
using KernelAbstractions
using oneAPI

In [2]:
const kBackend = oneAPI.oneAPIBackend()
const Tcontainer = oneAPI.oneArray
const Tf = Float32

Float32

In [3]:
const nt = (
    mass = 1,
    rho = 2,
    pos = 3,
    vel = 5,
    acc = 7,
    stress = 9,
    pressure = 13
)
data_cpu = randn(Tf, 4, 13)
data_gpu = Tcontainer(data_cpu)

4×13 oneArray{Float32, 2, oneAPI.oneL0.DeviceBuffer}:
 -0.318786    0.243488  -1.41303    …  -0.254535   -0.638911   0.4998
 -3.40059    -0.706039   1.93431       -0.514646   -0.868073  -0.744127
  1.22627     0.920235   1.18372       -1.61642    -0.96342   -0.313851
 -0.0893994  -1.11116    0.0689773      0.0419635   0.397212  -0.0322764

In [4]:
@inline function test_vis_force(
    mass::NTuple{2, <:Real},
    rho::NTuple{2, <:Real},
    x::Tuple{<:StaticVector{2, <:Real}, <:StaticVector{2, <:Real}},
    u::Tuple{<:StaticVector{2, <:Real}, <:StaticVector{2, <:Real}}
)
    rvec = x[1] .- x[2]
    r = StaticArrays.norm(rvec)
    vij = u[1] .- u[2]
    ai = (mass[2] / rho[2]) .* (vij ./ (r .^ 2 .+ 0.01)) .* rvec
    return ai
end

test_vis_force (generic function with 1 method)

In [5]:
@kernel function par_ker!(data)
    idx::Int = @index(Global)
    @inbounds m1 = data[idx, nt.mass]
    @inbounds rho1 = data[idx, nt.rho]
    x1 = E2Vector{2}(idx, nt.pos, data)
    u1 = E2Vector{2}(idx, nt.vel, data)
    a1 = E2Vector{2}(idx, nt.acc, data)
    jdx::Int = 2 * idx
    @inbounds m2 = data[jdx, nt.mass]
    @inbounds rho2 = data[jdx, nt.rho]
    x2 = E2Vector{2}(jdx, nt.pos, data)
    u2 = E2Vector{2}(jdx, nt.vel, data)
    a2 = E2Vector{2}(jdx, nt.acc, data)

    ai = test_vis_force((m1, m2), (rho1, rho2), (x1, x2), (u1, u2))
    a1 .+= ai
end

par_ker! (generic function with 4 methods)

In [6]:
par_ker!(kBackend, 2)(data_gpu, ndrange=(2,))
KernelAbstractions.synchronize(kBackend)

In [7]:
Array(data_gpu)[:, 7:8]

4×2 Matrix{Float32}:
  1.85308   -1.43986
  1.8451    -1.61682
 -0.413922   0.11575
 -0.488144   1.01706

In [8]:
data_cpu[:, 7:8]

4×2 Matrix{Float32}:
  1.22507   -0.846849
  1.86993   -1.67354
 -0.413922   0.11575
 -0.488144   1.01706