Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change scope of PS parameters to module and add unit tests for using ParallelStencil with extensions #130

Merged
merged 36 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
c593840
add extensions unit tests
omlins Dec 12, 2023
9c28831
add extensions unit tests
omlins Dec 12, 2023
d934bc3
rename extensions in unit test project
omlins Dec 13, 2023
adecfdb
add minimal extensions unit test
omlins Dec 13, 2023
eac88ad
update local test package directory
omlins Dec 14, 2023
0dc4395
move to per module PS parameters
omlins Dec 18, 2023
c7d9b55
move to per module PS parameters
omlins Dec 18, 2023
3924c80
move to per module PS parameters
omlins Dec 18, 2023
43fd2c0
move to per module PS parameters
omlins Dec 18, 2023
f0ccecc
move to per module PS parameters
omlins Dec 18, 2023
3132bb1
move to per module PS parameters
omlins Dec 18, 2023
4e52b07
move to per module PS parameters
omlins Dec 18, 2023
68b2fec
move to per module PS parameters
omlins Dec 18, 2023
0b3cb2f
move to per module PS parameters
omlins Dec 18, 2023
2b7be8a
move to per module PS parameters
omlins Dec 18, 2023
3828377
fix formatting
omlins Dec 18, 2023
d7a4790
adjust unit tests to per module PS parameters
omlins Dec 18, 2023
38ee711
adjust unit tests to per module PS parameters
omlins Dec 18, 2023
69832c1
adjust unit tests to per module PS parameters
omlins Dec 18, 2023
f0dbc01
adjust unit tests to per module PS parameters
omlins Dec 18, 2023
586187f
add parameter-complete test project
omlins Dec 18, 2023
03fe210
change function name
omlins Dec 20, 2023
a868a12
improve error message
omlins Dec 20, 2023
afce17c
enable overriding number of dimensions in need colonel definition
omlins Dec 20, 2023
5ada577
enable overwriting ndims in each kernel definition
omlins Dec 20, 2023
a51db22
improve error message
omlins Dec 20, 2023
338d884
add unit tests for ndims omission
omlins Dec 20, 2023
a23dc88
add unit test for defining ndims in kernel
omlins Dec 20, 2023
58fd371
update test project diffusion
omlins Dec 20, 2023
d14b157
update test project diffusion
omlins Dec 20, 2023
4d50a55
update test project diffusion
omlins Dec 20, 2023
be7d79b
update test project diffusion
omlins Dec 20, 2023
bc83ed1
update test project diffusion
omlins Dec 20, 2023
2a52248
update test project diffusion
omlins Dec 20, 2023
751b96e
Correct ParallelStencil compatibility in diffusion test project
omlins Dec 20, 2023
82a10cf
activate minimal extension test project
omlins Dec 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 53 additions & 53 deletions src/ParallelKernel/allocators.jl

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions src/ParallelKernel/hide_communication.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ Hide the communication behind the computation within the code `block`.
See also: [`@parallel`](@ref)
"""
@doc HIDE_COMMUNICATION_DOC
macro hide_communication(args...) check_initialized(); checkargs_hide_communication(args...); esc(hide_communication(__module__, args...)); end
macro hide_communication(args...) check_initialized(__module__); checkargs_hide_communication(args...); esc(hide_communication(__module__, args...)); end

macro get_priority_stream(args...) check_initialized(); checkargs_get_stream(args...); esc(get_priority_stream(args...)); end
macro get_stream(args...) check_initialized(); checkargs_get_stream(args...); esc(get_stream(args...)); end
macro get_priority_stream(args...) check_initialized(__module__); checkargs_get_stream(args...); esc(get_priority_stream(__module__, args...)); end
macro get_stream(args...) check_initialized(__module__); checkargs_get_stream(args...); esc(get_stream(__module__, args...)); end


## ARGUMENT CHECKS
Expand All @@ -109,7 +109,7 @@ end

## GATEWAY FUNCTIONS

function hide_communication(caller::Module, args::Union{Integer,Symbol,Expr}...; package::Symbol=get_package())
function hide_communication(caller::Module, args::Union{Integer,Symbol,Expr}...; package::Symbol=get_package(caller))
posargs, kwargs_expr = split_args(args)
kwargs, ~ = extract_kwargs(caller, kwargs_expr, (:computation_calls,), "@hide_communication", false; eval_args=(:computation_calls,))
if isgpu(package) hide_communication_gpu(posargs...; kwargs...)
Expand All @@ -118,14 +118,14 @@ function hide_communication(caller::Module, args::Union{Integer,Symbol,Expr}...;
end
end

function get_priority_stream(args::Union{Integer,Symbol,Expr}...; package::Symbol=get_package())
function get_priority_stream(caller::Module, args::Union{Integer,Symbol,Expr}...; package::Symbol=get_package(caller))
if (package == PKG_CUDA) get_priority_stream_cuda(args...)
elseif (package == PKG_AMDGPU) get_priority_stream_amdgpu(args...)
else @ArgumentError("unsupported GPU package (obtained: $package).")
end
end

function get_stream(args::Union{Integer,Symbol,Expr}...; package::Symbol=get_package())
function get_stream(caller::Module, args::Union{Integer,Symbol,Expr}...; package::Symbol=get_package(caller))
if (package == PKG_CUDA) get_stream_cuda(args...)
elseif (package == PKG_AMDGPU) get_stream_amdgpu(args...)
else @ArgumentError("unsupported GPU package (obtained: $package).")
Expand Down
50 changes: 25 additions & 25 deletions src/ParallelKernel/init_parallel_kernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Initialize the package ParallelKernel, giving access to its main functionality.
See also: [`Data`](@ref)
"""
macro init_parallel_kernel(args...)
check_already_initialized()
check_already_initialized(__module__)
posargs, kwargs_expr = split_args(args)
if (length(args) > 3) @ArgumentError("too many arguments.")
elseif (0 < length(posargs) < 2) @ArgumentError("there must be either two or zero positional arguments.")
Expand All @@ -20,7 +20,7 @@ macro init_parallel_kernel(args...)
if (length(posargs) == 2) package, numbertype_val = extract_posargs_init(__module__, posargs...)
else package, numbertype_val = extract_kwargs_init(__module__, kwargs)
end
inbounds_val = extract_kwargs_optional(__module__, kwargs)
inbounds_val = extract_kwargs_nopos(__module__, kwargs)
if (package == PKG_NONE) @ArgumentError("the package argument cannot be ommited.") end #TODO: this error message will disappear, once the package can be defined at runtime.
esc(init_parallel_kernel(__module__, package, numbertype_val, inbounds_val))
end
Expand Down Expand Up @@ -55,34 +55,34 @@ function init_parallel_kernel(caller::Module, package::Symbol, numbertype::DataT
end
@eval(caller, $pkg_import_cmd)
@eval(caller, $ad_import_cmd)
set_package(package)
set_numbertype(numbertype)
set_inbounds(inbounds)
set_initialized(true)
set_package(caller, package)
set_numbertype(caller, numbertype)
set_inbounds(caller, inbounds)
set_initialized(caller, true)
return nothing
end


macro is_initialized() is_initialized() end
macro get_package() get_package() end
macro get_numbertype() get_numbertype() end
macro get_inbounds() get_inbounds() end
macro is_initialized() is_initialized(__module__) end
macro get_package() get_package(__module__) end
macro get_numbertype() get_numbertype(__module__) end
macro get_inbounds() get_inbounds(__module__) end
let
global is_initialized, set_initialized, set_package, get_package, set_numbertype, get_numbertype, set_inbounds, get_inbounds, check_initialized, check_already_initialized
_is_initialized::Bool = false
package::Symbol = PKG_NONE
numbertype::DataType = NUMBERTYPE_NONE
inbounds::Bool = false
set_initialized(flag::Bool) = (_is_initialized = flag)
is_initialized() = _is_initialized
set_package(pkg::Symbol) = (package = pkg)
get_package() = package
set_numbertype(T::DataType) = (numbertype = T)
get_numbertype() = numbertype
set_inbounds(flag::Bool) = (inbounds = flag)
get_inbounds() = inbounds
check_initialized() = if !is_initialized() @NotInitializedError("no macro or function of the module can be called before @init_parallel_kernel.") end
check_already_initialized() = if is_initialized() @IncoherentCallError("ParallelKernel has already been initialized.") end
_is_initialized::Dict{Module, Bool} = Dict{Module, Bool}()
package::Dict{Module, Symbol} = Dict{Module, Symbol}()
numbertype::Dict{Module, DataType} = Dict{Module, DataType}()
inbounds::Dict{Module, Bool} = Dict{Module, Bool}()
set_initialized(caller::Module, flag::Bool) = (_is_initialized[caller] = flag)
is_initialized(caller::Module) = haskey(_is_initialized, caller) && _is_initialized[caller]
set_package(caller::Module, pkg::Symbol) = (package[caller] = pkg)
get_package(caller::Module) = package[caller]
set_numbertype(caller::Module, T::DataType) = (numbertype[caller] = T)
get_numbertype(caller::Module) = numbertype[caller]
set_inbounds(caller::Module, flag::Bool) = (inbounds[caller] = flag)
get_inbounds(caller::Module) = inbounds[caller]
check_initialized(caller::Module) = if !is_initialized(caller) @NotInitializedError("no ParallelKernel macro or function can be called before @init_parallel_kernel in each module (missing call in $caller).") end
check_already_initialized(caller::Module) = if is_initialized(caller) @IncoherentCallError("ParallelKernel has already been initialized for the module $caller.") end
end

function extract_posargs_init(caller::Module, package, numbertype) # NOTE: this function takes not only symbols: numbertype can be anything that evaluates to a type in the caller and for package will be checked wether it is a symbol in check_package and a proper error message given if not.
Expand All @@ -102,7 +102,7 @@ function extract_kwargs_init(caller::Module, kwargs::Dict)
return package, numbertype_val
end

function extract_kwargs_optional(caller::Module, kwargs::Dict)
function extract_kwargs_nopos(caller::Module, kwargs::Dict)
if (:inbounds in keys(kwargs)) inbounds_val = eval_arg(caller, kwargs[:inbounds]); check_inbounds(inbounds_val)
else inbounds_val = false
end
Expand Down
36 changes: 18 additions & 18 deletions src/ParallelKernel/kernel_language.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const GRIDDIM_DOC = """
Return the grid size (or "dimension") in x, y and z dimension. The grid size in a specific dimension is commonly retrieved directly as in this example in x dimension: `@gridDim().x`.
"""
@doc GRIDDIM_DOC
macro gridDim(args...) check_initialized(); checknoargs(args...); esc(gridDim(args...)); end
macro gridDim(args...) check_initialized(__module__); checknoargs(args...); esc(gridDim(__module__, args...)); end


##
Expand All @@ -19,7 +19,7 @@ const BLOCKIDX_DOC = """
Return the block ID in x, y and z dimension within the grid. The block ID in a specific dimension is commonly retrieved directly as in this example in x dimension: `@blockIdx().x`.
"""
@doc BLOCKIDX_DOC
macro blockIdx(args...) check_initialized(); checknoargs(args...); esc(blockIdx(args...)); end
macro blockIdx(args...) check_initialized(__module__); checknoargs(args...); esc(blockIdx(__module__, args...)); end


##
Expand All @@ -29,7 +29,7 @@ const BLOCKDIM_DOC = """
Return the block size (or "dimension") in x, y and z dimension. The block size in a specific dimension is commonly retrieved directly as in this example in x dimension: `@blockDim().x`.
"""
@doc BLOCKDIM_DOC
macro blockDim(args...) check_initialized(); checknoargs(args...); esc(blockDim(args...)); end
macro blockDim(args...) check_initialized(__module__); checknoargs(args...); esc(blockDim(__module__, args...)); end


##
Expand All @@ -39,7 +39,7 @@ const THREADIDX_DOC = """
Return the thread ID in x, y and z dimension within the block. The thread ID in a specific dimension is commonly retrieved directly as in this example in x dimension: `@threadIdx().x`.
"""
@doc THREADIDX_DOC
macro threadIdx(args...) check_initialized(); checknoargs(args...); esc(threadIdx(args...)); end
macro threadIdx(args...) check_initialized(__module__); checknoargs(args...); esc(threadIdx(__module__, args...)); end


##
Expand All @@ -49,7 +49,7 @@ const SYNCTHREADS_DOC = """
Synchronize the threads of the block: wait until all threads in the block have reached this point and all global and shared memory accesses made by these threads prior to the `sync_threads()` call are visible to all threads in the block.
"""
@doc SYNCTHREADS_DOC
macro sync_threads(args...) check_initialized(); checknoargs(args...); esc(sync_threads(args...)); end
macro sync_threads(args...) check_initialized(__module__); checknoargs(args...); esc(sync_threads(__module__, args...)); end


##
Expand All @@ -63,7 +63,7 @@ When multiple shared memory arrays are created within a kernel, then all arrays
The amount of shared memory needs to be specified when launching the kernel (keyword argument `shmem`).
"""
@doc SHAREDMEM_DOC
macro sharedMem(args...) check_initialized(); checkargs_sharedMem(args...); esc(sharedMem(args...)); end
macro sharedMem(args...) check_initialized(__module__); checkargs_sharedMem(args...); esc(sharedMem(__module__, args...)); end


##
Expand All @@ -73,7 +73,7 @@ const PKSHOW_DOC = """
Call a macro analogue to `Base.@show`, compatible with the package for parallelization selected with [`@init_parallel_kernel`](@ref) (Base.@show for Threads and CUDA.@cushow for CUDA).
"""
@doc PKSHOW_DOC
macro pk_show(args...) check_initialized(); esc(pk_show(args...)); end
macro pk_show(args...) check_initialized(__module__); esc(pk_show(__module__, args...)); end


##
Expand All @@ -83,15 +83,15 @@ const PKPRINTLN_DOC = """
Call a macro analogue to `Base.@println`, compatible with the package for parallelization selected with [`@init_parallel_kernel`](@ref) (Base.@println for Threads and CUDA.@cuprintln for CUDA).
"""
@doc PKPRINTLN_DOC
macro pk_println(args...) check_initialized(); esc(pk_println(args...)); end
macro pk_println(args...) check_initialized(__module__); esc(pk_println(__module__, args...)); end


##
macro return_value(args...) check_initialized(); checksinglearg(args...); esc(return_value(args...)); end
macro return_value(args...) check_initialized(__module__); checksinglearg(args...); esc(return_value(args...)); end


##
macro return_nothing(args...) check_initialized(); checknoargs(args...); esc(return_nothing(args...)); end
macro return_nothing(args...) check_initialized(__module__); checknoargs(args...); esc(return_nothing(args...)); end


## ARGUMENT CHECKS
Expand All @@ -111,31 +111,31 @@ end

## FUNCTIONS FOR INDEXING AND DIMENSIONS

function gridDim(args...; package::Symbol=get_package())
function gridDim(caller::Module, args...; package::Symbol=get_package(caller))
if (package == PKG_CUDA) return :(CUDA.gridDim($(args...)))
elseif (package == PKG_AMDGPU) return :(AMDGPU.gridGroupDim($(args...)))
elseif (package == PKG_THREADS) return :(ParallelStencil.ParallelKernel.@gridDim_cpu($(args...)))
else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).")
end
end

function blockIdx(args...; package::Symbol=get_package()) #NOTE: the CPU implementation relies on the fact that ranges are always of type UnitRange. If this changes, then this function needs to be adapted.
function blockIdx(caller::Module, args...; package::Symbol=get_package(caller)) #NOTE: the CPU implementation relies on the fact that ranges are always of type UnitRange. If this changes, then this function needs to be adapted.
if (package == PKG_CUDA) return :(CUDA.blockIdx($(args...)))
elseif (package == PKG_AMDGPU) return :(AMDGPU.workgroupIdx($(args...)))
elseif (package == PKG_THREADS) return :(ParallelStencil.ParallelKernel.@blockIdx_cpu($(args...)))
else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).")
end
end

function blockDim(args...; package::Symbol=get_package()) #NOTE: the CPU implementation follows the model that no threads are grouped into blocks, i.e. that each block contains only 1 thread (with thread ID 1). The parallelization happens only over the blocks.
function blockDim(caller::Module, args...; package::Symbol=get_package(caller)) #NOTE: the CPU implementation follows the model that no threads are grouped into blocks, i.e. that each block contains only 1 thread (with thread ID 1). The parallelization happens only over the blocks.
if (package == PKG_CUDA) return :(CUDA.blockDim($(args...)))
elseif (package == PKG_AMDGPU) return :(AMDGPU.workgroupDim($(args...)))
elseif (package == PKG_THREADS) return :(ParallelStencil.ParallelKernel.@blockDim_cpu($(args...)))
else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).")
end
end

function threadIdx(args...; package::Symbol=get_package()) #NOTE: the CPU implementation follows the model that no threads are grouped into blocks, i.e. that each block contains only 1 thread (with thread ID 1). The parallelization happens only over the blocks.
function threadIdx(caller::Module, args...; package::Symbol=get_package(caller)) #NOTE: the CPU implementation follows the model that no threads are grouped into blocks, i.e. that each block contains only 1 thread (with thread ID 1). The parallelization happens only over the blocks.
if (package == PKG_CUDA) return :(CUDA.threadIdx($(args...)))
elseif (package == PKG_AMDGPU) return :(AMDGPU.workitemIdx($(args...)))
elseif (package == PKG_THREADS) return :(ParallelStencil.ParallelKernel.@threadIdx_cpu($(args...)))
Expand All @@ -146,7 +146,7 @@ end

## FUNCTIONS FOR SYNCHRONIZATION

function sync_threads(args...; package::Symbol=get_package()) #NOTE: the CPU implementation follows the model that no threads are grouped into blocks, i.e. that each block contains only 1 thread (with thread ID 1). The parallelization happens only over the blocks. Synchronization within a block is therefore not needed (as it contains only one thread).
function sync_threads(caller::Module, args...; package::Symbol=get_package(caller)) #NOTE: the CPU implementation follows the model that no threads are grouped into blocks, i.e. that each block contains only 1 thread (with thread ID 1). The parallelization happens only over the blocks. Synchronization within a block is therefore not needed (as it contains only one thread).
if (package == PKG_CUDA) return :(CUDA.sync_threads($(args...)))
elseif (package == PKG_AMDGPU) return :(AMDGPU.sync_workgroup($(args...)))
elseif (package == PKG_THREADS) return :(ParallelStencil.ParallelKernel.@sync_threads_cpu($(args...)))
Expand All @@ -157,7 +157,7 @@ end

## FUNCTIONS FOR SHARED MEMORY ALLOCATION

function sharedMem(args...; package::Symbol=get_package())
function sharedMem(caller::Module, args...; package::Symbol=get_package(caller))
if (package == PKG_CUDA) return :(CUDA.@cuDynamicSharedMem($(args...)))
elseif (package == PKG_AMDGPU) return :(ParallelStencil.ParallelKernel.@sharedMem_amdgpu($(args...)))
elseif (package == PKG_THREADS) return :(ParallelStencil.ParallelKernel.@sharedMem_cpu($(args...)))
Expand All @@ -172,15 +172,15 @@ macro sharedMem_amdgpu(T, dims, offset) esc(:(ParallelStencil.ParallelKernel.@sh

## FUNCTIONS FOR PRINTING

function pk_show(args...; package::Symbol=get_package())
function pk_show(caller::Module, args...; package::Symbol=get_package(caller))
if (package == PKG_CUDA) return :(CUDA.@cushow($(args...)))
elseif (package == PKG_AMDGPU) @KeywordArgumentError("this functionality is not yet supported in AMDGPU.jl.")
elseif (package == PKG_THREADS) return :(Base.@show($(args...)))
else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).")
end
end

function pk_println(args...; package::Symbol=get_package())
function pk_println(caller::Module, args...; package::Symbol=get_package(caller))
if (package == PKG_CUDA) return :(CUDA.@cuprintln($(args...)))
elseif (package == PKG_AMDGPU) return :(AMDGPU.@rocprintln($(args...)))
elseif (package == PKG_THREADS) return :(Base.println($(args...)))
Expand Down
Loading
Loading