diff --git a/Publications/GPU-Opt-Guide/CMakeLists.txt b/Publications/GPU-Opt-Guide/CMakeLists.txt index dc97dd645b..9a5653bfef 100644 --- a/Publications/GPU-Opt-Guide/CMakeLists.txt +++ b/Publications/GPU-Opt-Guide/CMakeLists.txt @@ -2,29 +2,33 @@ cmake_minimum_required(VERSION 3.21) option(BUILD_FORTRAN_EXAMPLES "Whether to build fortran examples" ON) set(CMAKE_C_COMPILER icx) set(CMAKE_CXX_COMPILER icpx) +set(_languages C CXX) + if (BUILD_FORTRAN_EXAMPLES) + set(_languages ${_languages} Fortran) set(CMAKE_Fortran_COMPILER ifx) endif() -project(GPUOptGuide) -set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) -include(CheckLanguage) -enable_testing() +project(GPUOptGuide + LANGUAGES ${_languages} + DESCRIPTION "Code examples from Intel GPU Optimization guide") -find_package(IntelDPCPP REQUIRED) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) -if (BUILD_FOTRAN_EXAMPLES) - check_language(Fortran) - if(CMAKE_Fortran_COMPILER) - enable_language(Fortran) - else() - message(FATAL_ERROR "No Fortran support detected, but Fortran tests were requested. Install oneAPI HPC Toolkit.") - endif() -endif() +find_package(IntelSYCL REQUIRED) set(MKL_THREADING tbb_thread) +set(MKL_INTERFACE "ilp64") set(DPCPP_COMPILER ON) + +set(MKL_VERSION_2024 FALSE) +find_package(MKL QUIET) +if(MKL_FOUND) + if(MKL_VERSION VERSION_GREATER_EQUAL "2024.0.0") + set(MKL_VERSION_2024 TRUE) + endif() +endif() find_package(MKL REQUIRED) string(CONCAT WARNING_CXX_FLAGS_STR @@ -39,49 +43,77 @@ string(CONCAT WARNING_CXX_FLAGS_STR string(REPLACE " " ";" COMMON_CXX_FLAGS "${WARNING_CXX_FLAGS_STR}") function(add_example_with_mkl name) - add_executable(${name} ${name}.cpp) + set(_sources ${name}.cpp) + add_executable(${name} ${_sources}) + add_sycl_to_target(TARGET ${name} SOURCES ${_sources}) target_compile_options(${name} PRIVATE ${COMMON_CXX_FLAGS}) - target_compile_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl) - target_link_libraries(${name} PRIVATE MKL::MKL_DPCPP) - target_link_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl -lOpenCL) + if (MKL_VERSION_2024) + target_link_libraries(${name} PUBLIC MKL::MKL_SYCL) + else() + target_link_libraries(${name} PUBLIC MKL::MKL_DPCPP) + endif() add_test(NAME ${name} COMMAND ${name} ${ARGN}) endfunction(add_example_with_mkl) function(add_fortran_example_with_mkl name) if(CMAKE_Fortran_COMPILER) - add_executable(${name} ${name}.f) + set(_sources ${name}.f) + add_executable(${name} ${_sources}) + add_sycl_to_target(TARGET ${name} SOURCES ${_sources}) target_compile_options(${name} PRIVATE -warn all) - target_compile_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl -fpp -free) + target_compile_options(${name} PRIVATE -fpp -free) set_target_properties(${name} PROPERTIES LINKER_LANGUAGE Fortran) - target_link_libraries(${name} PUBLIC MKL::MKL_DPCPP) - target_link_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64 -qmkl -lOpenCL) + if (MKL_VERSION_2024) + target_link_libraries(${name} PUBLIC MKL::MKL_SYCL) + else() + target_link_libraries(${name} PUBLIC MKL::MKL_DPCPP) + endif() add_test(NAME ${name} COMMAND ${name} ${ARGN}) endif() endfunction(add_fortran_example_with_mkl) function(add_example name) - add_executable(${name} ${name}.cpp) + set(_sources ${name}.cpp) + add_executable(${name} ${_sources}) + add_sycl_to_target(TARGET ${name} SOURCES ${_sources}) target_compile_options(${name} PRIVATE ${COMMON_CXX_FLAGS}) target_link_options(${name} PRIVATE -fsycl-device-code-split=per_kernel) add_test(NAME ${name} COMMAND ${name} ${ARGN}) endfunction(add_example) +function(add_openmp_example name) + set(_sources ${name}.cpp) + add_executable(${name} ${_sources}) + target_compile_options(${name} PRIVATE ${COMMON_CXX_FLAGS}) + add_test(NAME ${name} COMMAND ${name} ${ARGN}) +endfunction(add_openmp_example) + function(add_fortran_example name) if(CMAKE_Fortran_COMPILER) - add_executable(${name} ${name}.f90) + set(_sources ${name}.f90) + add_executable(${name} ${_sources}) + add_sycl_to_target(TARGET ${name} SOURCES ${_sources}) target_compile_options(${name} PRIVATE -warn all) - target_compile_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64) set_target_properties(${name} PROPERTIES LINKER_LANGUAGE Fortran) - target_link_options(${name} PRIVATE -fiopenmp -fopenmp-targets=spir64) add_test(NAME ${name} COMMAND ${name} ${ARGN}) endif() endfunction(add_fortran_example) +function(add_fixed_fortran_example name) + if(CMAKE_Fortran_COMPILER) + set(_sources ${name}.f) + add_executable(${name} ${_sources}) + target_compile_options(${name} PRIVATE -warn all) + set_target_properties(${name} PROPERTIES LINKER_LANGUAGE Fortran) + add_test(NAME ${name} COMMAND ${name} ${ARGN}) + endif() +endfunction(add_fixed_fortran_example) + function(add_mpi_example name) if(MPI_FOUND) - add_executable(${name} ${name}.cpp) - target_compile_options(${name} PRIVATE -O3 -fiopenmp -fopenmp-targets=spir64) - target_link_options(${name} PRIVATE -O3 -fiopenmp -fopenmp-targets=spir64) + set(_sources ${name}.cpp) + add_executable(${name} ${_sources}) + add_sycl_to_target(TARGET ${name} SOURCES ${_sources}) target_link_libraries(${name} PRIVATE MPI::MPI_CXX) add_test(NAME ${name} COMMAND ${name} ${ARGN}) endif() diff --git a/Publications/GPU-Opt-Guide/MPI/01_omp_mpich/CMakeLists.txt b/Publications/GPU-Opt-Guide/MPI/01_omp_mpich/CMakeLists.txt index 6d0880c167..3c704971d9 100644 --- a/Publications/GPU-Opt-Guide/MPI/01_omp_mpich/CMakeLists.txt +++ b/Publications/GPU-Opt-Guide/MPI/01_omp_mpich/CMakeLists.txt @@ -1 +1,3 @@ add_mpi_example(omp_mpich) +target_compile_options(omp_mpich PRIVATE -fiopenmp) +target_link_options(omp_mpich PRIVATE -fiopenmp) diff --git a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c/CMakeLists.txt b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c/CMakeLists.txt index 1d36e7ae22..457b3cf855 100644 --- a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c/CMakeLists.txt +++ b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c/CMakeLists.txt @@ -1 +1 @@ -add_example(nbody_c) +add_openmp_example(nbody_c) diff --git a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c/nbody_c.cpp b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c/nbody_c.cpp index 156a4f451e..94324b0a14 100644 --- a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c/nbody_c.cpp +++ b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c/nbody_c.cpp @@ -71,7 +71,7 @@ void nbody_1d_cpu(float *c, float *a, float *b, int n1, int n2) { void clean_cache_gpu(double *d, int n) { #pragma omp target teams distribute parallel for thread_limit(1024) - for (unsigned i = 0; i < n; ++i) + for (int i = 0; i < n; ++i) d[i] = i; return; diff --git a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c_simd/CMakeLists.txt b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c_simd/CMakeLists.txt index 6ec8e627ac..a0063c70e4 100644 --- a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c_simd/CMakeLists.txt +++ b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c_simd/CMakeLists.txt @@ -1,2 +1,2 @@ add_compile_options(-fopenmp-target-simd) -add_example(nbody_c_simd) +add_openmp_example(nbody_c_simd) diff --git a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c_simd/nbody_c_simd.cpp b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c_simd/nbody_c_simd.cpp index fc6cffffd8..66da1ba4d6 100644 --- a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c_simd/nbody_c_simd.cpp +++ b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/c_simd/nbody_c_simd.cpp @@ -90,7 +90,7 @@ void nbody_1d_cpu(float *c, float *a, float *b, int n1, int n2) { void clean_cache_gpu(double *d, int n) { #pragma omp target teams distribute parallel for thread_limit(1024) - for (unsigned i = 0; i < n; ++i) + for (int i = 0; i < n; ++i) d[i] = i; return; diff --git a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/fortran/CMakeLists.txt b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/fortran/CMakeLists.txt index e9106a7dda..9642f146aa 100644 --- a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/fortran/CMakeLists.txt +++ b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/fortran/CMakeLists.txt @@ -1,2 +1,2 @@ -add_compile_options(-fpconstant -fpp -ffast-math -fno-sycl-instrument-device-code) -add_fortran_example_with_mkl(nbody_f) +add_compile_options(-fpconstant -fpp -ffast-math) +add_fixed_fortran_example(nbody_f) diff --git a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/fortran/nbody_f.f b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/fortran/nbody_f.f index a893aa7d8c..74934260c4 100644 --- a/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/fortran/nbody_f.f +++ b/Publications/GPU-Opt-Guide/OpenMP/26_omp_prefetch/fortran/nbody_f.f @@ -7,6 +7,8 @@ #define PREFETCH_HINT 4 ! 4 = prefetch to L1 and L3; 2 = prefetch to L3 #define TILE_SIZE 64 + module gpu_kernels + contains subroutine nbody_1d_gpu(c, a, b, n1, n2) implicit none integer n1, n2 @@ -53,7 +55,7 @@ subroutine nbody_1d_cpu(c, a, b, n1, n2) implicit none integer n1, n2 real a(0:n1), b(0:n2), c(0:n1) - real dx, bb(0:TILE_SIZE), delta, r2, s0, s1, f + real dx, delta, r2, s0, s1, f integer i,j real ma0, ma1, ma2, ma3, ma4, ma5, eps parameter (ma0=0.269327, ma1=-0.0750978, ma2=0.0114808) @@ -86,7 +88,10 @@ subroutine clean_cache_gpu(d,n) !$omp end target teams distribute parallel do end subroutine + end module gpu_kernels + program nbody + use gpu_kernels implicit none include 'omp_lib.h' diff --git a/Publications/GPU-Opt-Guide/matrix/CMakeLists.txt b/Publications/GPU-Opt-Guide/matrix/CMakeLists.txt index c0f5f72773..3bc7acb8df 100644 --- a/Publications/GPU-Opt-Guide/matrix/CMakeLists.txt +++ b/Publications/GPU-Opt-Guide/matrix/CMakeLists.txt @@ -1 +1,3 @@ -add_executable(matrix matrix.cpp multiply.cpp) +set(_sources matrix.cpp multiply.cpp) +add_executable(matrix ${_sources}) +add_sycl_to_target(TARGET matrix SOURCES ${_sources})