From 3612b5da05b56a4daebe814f690c12bf1c54624e Mon Sep 17 00:00:00 2001 From: Deukhyun Cha <55857168+deukhyun-cha@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:18:27 -0600 Subject: [PATCH] Update cmake to generate kernel-arguments-related codes only if necessary. Add an option to only generate jit kernel related code for the new number of kernel arguments and leave others to the default value for compile time efficiency (#725) --- CMakeLists.txt | 20 ++-------- Makefile | 8 ++++ cmake/CodeGen.cmake | 37 +++++++++++++++++++ include/occa/core/kernel.hpp | 2 +- include/occa/defines/macros.hpp | 2 +- scripts/build/compiledDefinesTemplate.hpp.in | 3 ++ .../codegen/kernelOperators.cpp_codegen.in | 0 .../codegen/kernelOperators.hpp_codegen.in | 0 .../codegen/macros.hpp_codegen.in | 0 .../codegen/runFunction.cpp_codegen.in | 0 scripts/codegen/setup_kernel_operators.py | 18 +++++---- src/core/kernel.cpp | 2 +- src/occa/internal/utils/sys.cpp | 2 +- 13 files changed, 67 insertions(+), 27 deletions(-) create mode 100644 cmake/CodeGen.cmake rename src/core/kernelOperators.cpp_codegen => scripts/codegen/kernelOperators.cpp_codegen.in (100%) rename include/occa/core/kernelOperators.hpp_codegen => scripts/codegen/kernelOperators.hpp_codegen.in (100%) rename include/occa/defines/macros.hpp_codegen => scripts/codegen/macros.hpp_codegen.in (100%) rename src/occa/internal/utils/runFunction.cpp_codegen => scripts/codegen/runFunction.cpp_codegen.in (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index a0710002e..0415f6168 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,29 +44,17 @@ endif() option(ENABLE_SHARABLE_DEVICE "Enable sharable device by multiple threads" OFF) if (ENABLE_SHARABLE_DEVICE) - add_compile_definitions(OCCA_THREAD_SHARABLE_ENABLED=1) + set(OCCA_THREAD_SHARABLE_ENABLED 1) message("-- OCCA sharable by multi-threads : Enabled") else() - add_compile_definitions(OCCA_THREAD_SHARABLE_ENABLED=0) + set(OCCA_THREAD_SHARABLE_ENABLED 0) endif() -set(MAX_NUM_KERNEL_ARGS_DEFAULT "128") -set(MAX_NUM_KERNEL_ARGS ${MAX_NUM_KERNEL_ARGS_DEFAULT} CACHE STRING "The maximum number of allowed kernel arguments") -if (${MAX_NUM_KERNEL_ARGS} GREATER ${MAX_NUM_KERNEL_ARGS_DEFAULT}) - execute_process(COMMAND python --version OUTPUT_VARIABLE python_version) - string(REGEX MATCH "[0-9.]\+" python_version ${python_version}) - if ("${python_version}" VERSION_LESS "3.7.2") - message(WARNING "-- Failed to set the maximum number of kernel arguments to ${MAX_NUM_KERNEL_ARGS}, required minimum python version 3.7.2. The default value ${MAX_NUM_KERNEL_ARGS_DEFAULT} will be used.") - else() - message("-- Codegen for the maximum number of kernel arguments : ${MAX_NUM_KERNEL_ARGS}") - execute_process(COMMAND ${CMAKE_COMMAND} -E env OCCA_DIR=${CMAKE_CURRENT_SOURCE_DIR} python ${CMAKE_CURRENT_SOURCE_DIR}/scripts/codegen/setup_kernel_operators.py -N ${MAX_NUM_KERNEL_ARGS}) - endif() -endif() -add_compile_definitions(OCCA_MAX_ARGS=${MAX_NUM_KERNEL_ARGS}) - set(OCCA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(OCCA_BUILD_DIR ${CMAKE_BINARY_DIR}) +include(CodeGen) + set(OCCA_USING_VS OFF) set(OCCA_UNSAFE OFF) diff --git a/Makefile b/Makefile index 447f708c3..d2ae06869 100644 --- a/Makefile +++ b/Makefile @@ -80,6 +80,14 @@ else endif #================================================= +PLACE_GENERATED_CODES := $(shell mkdir -p $(OCCA_DIR)/include/occa/core/codegen | \ + mkdir -p $(OCCA_DIR)/src/core/codegen | \ + mkdir -p $(OCCA_DIR)/src/occa/internal/utils/codegen | \ + mkdir -p $(OCCA_DIR)/include/occa/defines/codegen) +PLACE_GENERATED_CODES := $(shell cp $(OCCA_DIR)/scripts/codegen/kernelOperators.hpp_codegen.in $(OCCA_DIR)/include/occa/core/codegen/kernelOperators.hpp_codegen | \ + cp $(OCCA_DIR)/scripts/codegen/kernelOperators.cpp_codegen.in $(OCCA_DIR)/src/core/codegen/kernelOperators.cpp_codegen | \ + cp $(OCCA_DIR)/scripts/codegen/runFunction.cpp_codegen.in $(OCCA_DIR)/src/occa/internal/utils/codegen/runFunction.cpp_codegen | \ + cp $(OCCA_DIR)/scripts/codegen/macros.hpp_codegen.in $(OCCA_DIR)/include/occa/defines/codegen/macros.hpp_codegen) #---[ Compile Library ]--------------------------- # Setup compiled defines and force rebuild if defines changed diff --git a/cmake/CodeGen.cmake b/cmake/CodeGen.cmake new file mode 100644 index 000000000..a771fc55b --- /dev/null +++ b/cmake/CodeGen.cmake @@ -0,0 +1,37 @@ +## Generated codes for handling kernel arguments +# Configure default/pre-generated files +# Check if those files need to be re-generated +option(KEEP_DEFAULT_MAX_NUM_INLINE_KERNEL_ARGS "Skip code generations for the given maximum number of arguments on inline kernels, use default instead" OFF) +set(MAX_NUM_KERNEL_ARGS_DEFAULT "128") +set(MAX_NUM_KERNEL_ARGS ${MAX_NUM_KERNEL_ARGS_DEFAULT} CACHE STRING "The maximum number of allowed kernel arguments") +set(OCCA_MAX_ARGS ${MAX_NUM_KERNEL_ARGS_DEFAULT} CACHE STRING "The maximum number of allowed kernel arguments stored to use, do not change this value directly, update MAX_NUM_KERNEL_ARGS instead") +if (NOT ${MAX_NUM_KERNEL_ARGS} EQUAL ${OCCA_MAX_ARGS}) + execute_process(COMMAND python --version OUTPUT_VARIABLE python_version) + string(REGEX MATCH "[0-9.]\+" python_version ${python_version}) + if ("${python_version}" VERSION_LESS "3.7.2") + message(WARNING "-- Failed to set the maximum number of kernel arguments to ${MAX_NUM_KERNEL_ARGS}, required minimum python version 3.7.2. The default value ${MAX_NUM_KERNEL_ARGS_DEFAULT} will be used.") + else() + message("-- Codegen for the maximum number of kernel arguments : ${MAX_NUM_KERNEL_ARGS}") + if (KEEP_DEFAULT_MAX_NUM_INLINE_KERNEL_ARGS AND ${MAX_NUM_KERNEL_ARGS} GREATER ${MAX_NUM_KERNEL_ARGS_DEFAULT}) + execute_process(COMMAND ${CMAKE_COMMAND} -E env OCCA_DIR=${OCCA_BUILD_DIR} python ${OCCA_SOURCE_DIR}/scripts/codegen/setup_kernel_operators.py -N ${MAX_NUM_KERNEL_ARGS} --skipInline) + else() + execute_process(COMMAND ${CMAKE_COMMAND} -E env OCCA_DIR=${OCCA_BUILD_DIR} python ${OCCA_SOURCE_DIR}/scripts/codegen/setup_kernel_operators.py -N ${MAX_NUM_KERNEL_ARGS}) + endif() + set(OCCA_MAX_ARGS ${MAX_NUM_KERNEL_ARGS} CACHE STRING "The maximum number of allowed kernel arguments stored to use, do not change this value directly, update MAX_NUM_KERNEL_ARGS instead" FORCE) + endif() +else() + if (${OCCA_MAX_ARGS} EQUAL ${MAX_NUM_KERNEL_ARGS_DEFAULT}) + configure_file(${OCCA_SOURCE_DIR}/scripts/codegen/kernelOperators.hpp_codegen.in ${OCCA_BUILD_DIR}/include/codegen/kernelOperators.hpp_codegen COPYONLY) + configure_file(${OCCA_SOURCE_DIR}/scripts/codegen/kernelOperators.cpp_codegen.in ${OCCA_BUILD_DIR}/include/codegen/kernelOperators.cpp_codegen COPYONLY) + configure_file(${OCCA_SOURCE_DIR}/scripts/codegen/runFunction.cpp_codegen.in ${OCCA_BUILD_DIR}/include/codegen/runFunction.cpp_codegen COPYONLY) + configure_file(${OCCA_SOURCE_DIR}/scripts/codegen/macros.hpp_codegen.in ${OCCA_BUILD_DIR}/include/codegen/macros.hpp_codegen COPYONLY) + endif() +endif() + +# Set installtion of files required in header +install( + FILES ${OCCA_BUILD_DIR}/include/codegen/kernelOperators.hpp_codegen + DESTINATION include/occa/core/codegen) +install( + FILES ${OCCA_BUILD_DIR}/include/codegen/macros.hpp_codegen + DESTINATION include/occa/defines/codegen) diff --git a/include/occa/core/kernel.hpp b/include/occa/core/kernel.hpp index 8085462bc..7f9e9511e 100644 --- a/include/occa/core/kernel.hpp +++ b/include/occa/core/kernel.hpp @@ -296,7 +296,7 @@ namespace occa { */ void operator () () const; -#include "kernelOperators.hpp_codegen" +#include "codegen/kernelOperators.hpp_codegen" /** * @startDoc{free} diff --git a/include/occa/defines/macros.hpp b/include/occa/defines/macros.hpp index 8a5acd2a5..8dbd88f54 100644 --- a/include/occa/defines/macros.hpp +++ b/include/occa/defines/macros.hpp @@ -24,7 +24,7 @@ // Just in case someone wants to run with an older format than C99 #ifndef OCCA_DISABLE_VARIADIC_MACROS -#include "macros.hpp_codegen" +#include "codegen/macros.hpp_codegen" #endif // OCCA_DISABLE_VARIADIC_MACROS diff --git a/scripts/build/compiledDefinesTemplate.hpp.in b/scripts/build/compiledDefinesTemplate.hpp.in index 25c9ceb9d..75be10df7 100644 --- a/scripts/build/compiledDefinesTemplate.hpp.in +++ b/scripts/build/compiledDefinesTemplate.hpp.in @@ -28,6 +28,9 @@ #cmakedefine01 OCCA_METAL_ENABLED #cmakedefine01 OCCA_DPCPP_ENABLED +#cmakedefine01 OCCA_THREAD_SHARABLE_ENABLED +#cmakedefine OCCA_MAX_ARGS @OCCA_MAX_ARGS@ + #cmakedefine OCCA_SOURCE_DIR "@OCCA_SOURCE_DIR@" #cmakedefine OCCA_BUILD_DIR "@OCCA_BUILD_DIR@" diff --git a/src/core/kernelOperators.cpp_codegen b/scripts/codegen/kernelOperators.cpp_codegen.in similarity index 100% rename from src/core/kernelOperators.cpp_codegen rename to scripts/codegen/kernelOperators.cpp_codegen.in diff --git a/include/occa/core/kernelOperators.hpp_codegen b/scripts/codegen/kernelOperators.hpp_codegen.in similarity index 100% rename from include/occa/core/kernelOperators.hpp_codegen rename to scripts/codegen/kernelOperators.hpp_codegen.in diff --git a/include/occa/defines/macros.hpp_codegen b/scripts/codegen/macros.hpp_codegen.in similarity index 100% rename from include/occa/defines/macros.hpp_codegen rename to scripts/codegen/macros.hpp_codegen.in diff --git a/src/occa/internal/utils/runFunction.cpp_codegen b/scripts/codegen/runFunction.cpp_codegen.in similarity index 100% rename from src/occa/internal/utils/runFunction.cpp_codegen rename to scripts/codegen/runFunction.cpp_codegen.in diff --git a/scripts/codegen/setup_kernel_operators.py b/scripts/codegen/setup_kernel_operators.py index 4d2af6911..41ec7f8e3 100755 --- a/scripts/codegen/setup_kernel_operators.py +++ b/scripts/codegen/setup_kernel_operators.py @@ -31,7 +31,10 @@ def to_file(filename): def inner_to_file(func): @functools.wraps(func) def cached_func(*args, **kwargs): - with open(OCCA_DIR + '/' + filename, 'w') as f: + filepath = OCCA_DIR + '/' + filename + dirpath = os.path.dirname(os.path.abspath(filepath)) + os.makedirs(dirpath, exist_ok=True) + with open(filepath, 'w') as f: content = func(*args, **kwargs) f.write(EDIT_WARNING + '\n\n'); f.write(content + '\n') @@ -74,7 +77,7 @@ def array_args(N, indent): return content -@to_file('src/occa/internal/utils/runFunction.cpp_codegen') +@to_file('include/codegen/runFunction.cpp_codegen') def run_function_from_arguments(N): content = '\nswitch (argc) {\n' for n in range(N + 1): @@ -101,7 +104,7 @@ def run_function_from_argument(N): return content -@to_file('include/occa/core/kernelOperators.hpp_codegen') +@to_file('include/codegen/kernelOperators.hpp_codegen') def operator_declarations(N): # We manually define the 0-argument kernel for documentation purposes return '\n\n'.join( @@ -118,7 +121,7 @@ def operator_declaration(N): return content -@to_file('src/core/kernelOperators.cpp_codegen') +@to_file('include/codegen/kernelOperators.cpp_codegen') def operator_definitions(N): return '\n'.join( operator_definition(n) for n in range(N + 1) @@ -173,7 +176,7 @@ def macro_count(N): content += '0)\n' return content -@to_file('include/occa/defines/macros.hpp_codegen') +@to_file('include/codegen/macros.hpp_codegen') def macro_declarations(N): return ''.join( macro_count2(N) + '\n' + macro_count(N) @@ -182,10 +185,11 @@ def macro_declarations(N): if __name__ == '__main__': parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument("-N","--NargsMax", type=int, default=MAX_ARGS) + parser.add_argument("--skipInline", action='store_true') args = parser.parse_args() - MAX_ARGS = args.NargsMax - run_function_from_arguments(MAX_ARGS) + run_function_from_arguments(args.NargsMax) + MAX_ARGS = MAX_ARGS if args.skipInline else args.NargsMax operator_declarations(MAX_ARGS) operator_definitions(MAX_ARGS) macro_declarations(MAX_ARGS) diff --git a/src/core/kernel.cpp b/src/core/kernel.cpp index dd0fc7cab..c03578eb4 100644 --- a/src/core/kernel.cpp +++ b/src/core/kernel.cpp @@ -189,7 +189,7 @@ namespace occa { self.run(); } -#include "kernelOperators.cpp_codegen" +#include "codegen/kernelOperators.cpp_codegen" void kernel::free() { // ~modeKernel_t NULLs all wrappers diff --git a/src/occa/internal/utils/sys.cpp b/src/occa/internal/utils/sys.cpp index 01d6b0edb..346a37024 100644 --- a/src/occa/internal/utils/sys.cpp +++ b/src/occa/internal/utils/sys.cpp @@ -992,7 +992,7 @@ namespace occa { } void runFunction(functionPtr_t f, const int argc, void **args) { -#include "runFunction.cpp_codegen" +#include "codegen/runFunction.cpp_codegen" } std::string stacktrace(const int frameStart,