Skip to content

Commit

Permalink
Add non-blocking stream creation (#498)
Browse files Browse the repository at this point in the history
- Add creating a non-blocking stream based on a stream property value "nonblocking"
- Current implementation applies to cuda and hip modes only
- An example is added to demonstrate the usage
  • Loading branch information
deukhyun-cha committed Dec 6, 2021
1 parent 5db15e0 commit 5f5ec0f
Show file tree
Hide file tree
Showing 11 changed files with 172 additions and 7 deletions.
3 changes: 3 additions & 0 deletions examples/cpp/18_nonblocking_streams/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
main
main.o
main_c
4 changes: 4 additions & 0 deletions examples/cpp/18_nonblocking_streams/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
compile_cpp_example_with_modes(nonblocking_streams main.cpp)

add_custom_target(cpp_example_nonblocking_streams_okl ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/powerOfPi.okl powerOfPi.okl)
add_dependencies(examples_cpp_nonblocking_streams cpp_example_nonblocking_streams_okl)
27 changes: 27 additions & 0 deletions examples/cpp/18_nonblocking_streams/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

ifndef OCCA_DIR
include $(PROJ_DIR)/../../../scripts/build/Makefile
else
include ${OCCA_DIR}/scripts/build/Makefile
endif

#---[ COMPILATION ]-------------------------------
headers = $(wildcard $(incPath)/*.hpp) $(wildcard $(incPath)/*.tpp)
sources = $(wildcard $(srcPath)/*.cpp)

objects = $(subst $(srcPath)/,$(objPath)/,$(sources:.cpp=.o))

executables: ${PROJ_DIR}/main

${PROJ_DIR}/main: $(objects) $(headers) ${PROJ_DIR}/main.cpp
$(compiler) $(compilerFlags) -o ${PROJ_DIR}/main $(flags) $(objects) ${PROJ_DIR}/main.cpp $(paths) $(linkerFlags)

$(objPath)/%.o:$(srcPath)/%.cpp $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.hpp))) $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.tpp)))
$(compiler) $(compilerFlags) -o $@ $(flags) -c $(paths) $<

clean:
rm -f $(objPath)/*;
rm -f ${PROJ_DIR}/main;
#=================================================
28 changes: 28 additions & 0 deletions examples/cpp/18_nonblocking_streams/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Example: Non-blocking Streams

GPU devices introduce `streams`, which potentially allow parallel queueing of instructions

Especially with non-blocking streams created operations in those streams will not have implicit synchronizations with the default stream

This example shows how to setup `occa::streams` with the non-blocking property

# Compiling the Example

```bash
make
```

## Usage

```
> ./main --help
Usage: ./main [OPTIONS]
Example showing the use of multiple non-blocking streams in a device
Options:
-d, --device Device properties (default: "{mode: 'CUDA', device_id: 0}")
-h, --help Print usage
-v, --verbose Compile kernels in verbose mode
```
77 changes: 77 additions & 0 deletions examples/cpp/18_nonblocking_streams/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#include <iostream>

#include <occa.hpp>

//---[ Internal Tools ]-----------------
// Note: These headers are not officially supported
// Please don't rely on it outside of the occa examples
#include <occa/internal/utils/cli.hpp>
//======================================


occa::json parseArgs(int argc, const char **argv);

int main(int argc, const char **argv) {
occa::json args = parseArgs(argc, argv);

occa::setDevice(occa::json::parse(args["options/device"]));

const int n_streams = 8;
int entries = 1<<20;
int block = 64;
int group = 1;

occa::memory o_x[n_streams];
occa::memory o_x_d = occa::malloc<float>(1);

occa::json kernelProps({
{"defines/block", block},
{"defines/group", group},
});
occa::kernel powerOfPi = occa::buildKernel("powerOfPi.okl",
"powerOfPi",
kernelProps);

occa::stream streams[n_streams];
occa::json streamProps({
{"nonblocking", true},
});
occa::stream default_stream = occa::getStream();

for (auto i = 0; i < n_streams; i++) {
streams[i] = occa::createStream(streamProps);

o_x[i] = occa::malloc<float>(entries);

occa::setStream(streams[i]);

powerOfPi(o_x[i], entries);

occa::setStream(default_stream);

powerOfPi(o_x_d, 1);
}
}

occa::json parseArgs(int argc, const char **argv) {
occa::cli::parser parser;
parser
.withDescription(
"Example showing the use of multiple device streams"
)
.addOption(
occa::cli::option('d', "device",
"Device properties (default: \"{mode: 'CUDA', device_id: 0}\")")
.withArg()
.withDefaultValue("{mode: 'CUDA', device_id: 0}")
)
.addOption(
occa::cli::option('v', "verbose",
"Compile kernels in verbose mode")
);

occa::json args = parser.parseArgs(argc, argv);
occa::settings()["kernel/verbose"] = args["options/verbose"];

return args;
}
10 changes: 10 additions & 0 deletions examples/cpp/18_nonblocking_streams/powerOfPi.okl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@kernel void powerOfPi(float* x,
int entries) {
for (int g = 0; g < group; g++; @outer) {
for (int i = 0; i < block; ++i; @inner) {
for (int j=i+g*block; j < entries; j+=block*group) {
x[j] = sqrt(pow(3.14159,j));
}
}
}
}
6 changes: 3 additions & 3 deletions examples/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ add_subdirectory(12_native_opencl_kernels)
add_subdirectory(13_openmp_interop)
add_subdirectory(14_cuda_interop)

add_subdirectory(18_nonblocking_streams)
add_subdirectory(20_native_dpcpp_kernel)

# Don't force-compile OpenGL examples
# add_subdirectory(15_finite_difference)
# add_subdirectory(16_mandelbulb)

# add_subdirectory(16_finite_difference)
# add_subdirectory(17_mandelbulb)
9 changes: 7 additions & 2 deletions src/occa/internal/modes/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,13 @@ namespace occa {

setCudaContext();

OCCA_CUDA_ERROR("Device: createStream",
cuStreamCreate(&cuStream, CU_STREAM_DEFAULT));
if (props.get<bool>("nonblocking", false)) {
OCCA_CUDA_ERROR("Device: createStream - NonBlocking",
cuStreamCreate(&cuStream, CU_STREAM_NON_BLOCKING));
} else {
OCCA_CUDA_ERROR("Device: createStream",
cuStreamCreate(&cuStream, CU_STREAM_DEFAULT));
}

return new stream(this, props, cuStream);
}
Expand Down
1 change: 1 addition & 0 deletions src/occa/internal/modes/cuda/polyfill.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ namespace occa {
static const int CU_MEM_ATTACH_GLOBAL = 0;
static const int CU_MEM_ATTACH_HOST = 0;
static const int CU_STREAM_DEFAULT = 0;
static const int CU_STREAM_NON_BLOCKING = 0;
static const int CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 0;
static const int CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 0;

Expand Down
9 changes: 7 additions & 2 deletions src/occa/internal/modes/hip/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,13 @@ namespace occa {

OCCA_HIP_ERROR("Device: Setting Device",
hipSetDevice(deviceID));
OCCA_HIP_ERROR("Device: createStream",
hipStreamCreate(&hipStream));
if (props.get<bool>("nonblocking", false)) {
OCCA_HIP_ERROR("Device: createStream - NonBlocking",
hipStreamCreateWithFlag(&hipStream, hipStreamNonBlocking));
} else {
OCCA_HIP_ERROR("Device: createStream",
hipStreamCreate(&hipStream));
}

return new stream(this, props, hipStream);
}
Expand Down
5 changes: 5 additions & 0 deletions src/occa/internal/modes/hip/polyfill.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ namespace occa {
static const int HIP_LAUNCH_PARAM_BUFFER_POINTER = 0;
static const int HIP_LAUNCH_PARAM_BUFFER_SIZE = 0;
static const int HIP_LAUNCH_PARAM_END = 0;
static const int hipStreamNonBlocking = 0;

class hipDeviceProp_t {
public:
Expand Down Expand Up @@ -274,6 +275,10 @@ namespace occa {
return OCCA_HIP_IS_NOT_ENABLED;
}

inline hipError_t hipStreamCreateWithFlag(hipStream_t *phStream, unsigned int flags) {
return OCCA_HIP_IS_NOT_ENABLED;
}

inline hipError_t hipStreamDestroy(hipStream_t hStream) {
return OCCA_HIP_IS_NOT_ENABLED;
}
Expand Down

0 comments on commit 5f5ec0f

Please sign in to comment.