-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add non-blocking stream creation (#498)
- Add creating a non-blocking stream based on a stream property value "nonblocking" - Current implementation applies to cuda and hip modes only - An example is added to demonstrate the usage
- Loading branch information
1 parent
5db15e0
commit 5f5ec0f
Showing
11 changed files
with
172 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
main | ||
main.o | ||
main_c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
compile_cpp_example_with_modes(nonblocking_streams main.cpp) | ||
|
||
add_custom_target(cpp_example_nonblocking_streams_okl ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/powerOfPi.okl powerOfPi.okl) | ||
add_dependencies(examples_cpp_nonblocking_streams cpp_example_nonblocking_streams_okl) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) | ||
|
||
ifndef OCCA_DIR | ||
include $(PROJ_DIR)/../../../scripts/build/Makefile | ||
else | ||
include ${OCCA_DIR}/scripts/build/Makefile | ||
endif | ||
|
||
#---[ COMPILATION ]------------------------------- | ||
headers = $(wildcard $(incPath)/*.hpp) $(wildcard $(incPath)/*.tpp) | ||
sources = $(wildcard $(srcPath)/*.cpp) | ||
|
||
objects = $(subst $(srcPath)/,$(objPath)/,$(sources:.cpp=.o)) | ||
|
||
executables: ${PROJ_DIR}/main | ||
|
||
${PROJ_DIR}/main: $(objects) $(headers) ${PROJ_DIR}/main.cpp | ||
$(compiler) $(compilerFlags) -o ${PROJ_DIR}/main $(flags) $(objects) ${PROJ_DIR}/main.cpp $(paths) $(linkerFlags) | ||
|
||
$(objPath)/%.o:$(srcPath)/%.cpp $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.hpp))) $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.tpp))) | ||
$(compiler) $(compilerFlags) -o $@ $(flags) -c $(paths) $< | ||
|
||
clean: | ||
rm -f $(objPath)/*; | ||
rm -f ${PROJ_DIR}/main; | ||
#================================================= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Example: Non-blocking Streams | ||
|
||
GPU devices introduce `streams`, which potentially allow parallel queueing of instructions | ||
|
||
Especially with non-blocking streams created operations in those streams will not have implicit synchronizations with the default stream | ||
|
||
This example shows how to setup `occa::streams` with the non-blocking property | ||
|
||
# Compiling the Example | ||
|
||
```bash | ||
make | ||
``` | ||
|
||
## Usage | ||
|
||
``` | ||
> ./main --help | ||
Usage: ./main [OPTIONS] | ||
Example showing the use of multiple non-blocking streams in a device | ||
Options: | ||
-d, --device Device properties (default: "{mode: 'CUDA', device_id: 0}") | ||
-h, --help Print usage | ||
-v, --verbose Compile kernels in verbose mode | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#include <iostream> | ||
|
||
#include <occa.hpp> | ||
|
||
//---[ Internal Tools ]----------------- | ||
// Note: These headers are not officially supported | ||
// Please don't rely on it outside of the occa examples | ||
#include <occa/internal/utils/cli.hpp> | ||
//====================================== | ||
|
||
|
||
occa::json parseArgs(int argc, const char **argv); | ||
|
||
int main(int argc, const char **argv) { | ||
occa::json args = parseArgs(argc, argv); | ||
|
||
occa::setDevice(occa::json::parse(args["options/device"])); | ||
|
||
const int n_streams = 8; | ||
int entries = 1<<20; | ||
int block = 64; | ||
int group = 1; | ||
|
||
occa::memory o_x[n_streams]; | ||
occa::memory o_x_d = occa::malloc<float>(1); | ||
|
||
occa::json kernelProps({ | ||
{"defines/block", block}, | ||
{"defines/group", group}, | ||
}); | ||
occa::kernel powerOfPi = occa::buildKernel("powerOfPi.okl", | ||
"powerOfPi", | ||
kernelProps); | ||
|
||
occa::stream streams[n_streams]; | ||
occa::json streamProps({ | ||
{"nonblocking", true}, | ||
}); | ||
occa::stream default_stream = occa::getStream(); | ||
|
||
for (auto i = 0; i < n_streams; i++) { | ||
streams[i] = occa::createStream(streamProps); | ||
|
||
o_x[i] = occa::malloc<float>(entries); | ||
|
||
occa::setStream(streams[i]); | ||
|
||
powerOfPi(o_x[i], entries); | ||
|
||
occa::setStream(default_stream); | ||
|
||
powerOfPi(o_x_d, 1); | ||
} | ||
} | ||
|
||
occa::json parseArgs(int argc, const char **argv) { | ||
occa::cli::parser parser; | ||
parser | ||
.withDescription( | ||
"Example showing the use of multiple device streams" | ||
) | ||
.addOption( | ||
occa::cli::option('d', "device", | ||
"Device properties (default: \"{mode: 'CUDA', device_id: 0}\")") | ||
.withArg() | ||
.withDefaultValue("{mode: 'CUDA', device_id: 0}") | ||
) | ||
.addOption( | ||
occa::cli::option('v', "verbose", | ||
"Compile kernels in verbose mode") | ||
); | ||
|
||
occa::json args = parser.parseArgs(argc, argv); | ||
occa::settings()["kernel/verbose"] = args["options/verbose"]; | ||
|
||
return args; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
@kernel void powerOfPi(float* x, | ||
int entries) { | ||
for (int g = 0; g < group; g++; @outer) { | ||
for (int i = 0; i < block; ++i; @inner) { | ||
for (int j=i+g*block; j < entries; j+=block*group) { | ||
x[j] = sqrt(pow(3.14159,j)); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters