Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Commit #243

Open
wants to merge 22 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
6dbd3d4
added dft interface and implementation stubs for mklgpu backend
t4c1 Sep 27, 2022
32ddffe
format
t4c1 Sep 27, 2022
e481041
addressed comments from internal review
t4c1 Sep 28, 2022
bccc24c
Merge branch 'dft_interface' of https://github.com/t4c1/oneMKL into s…
anantsrivastava30 Oct 6, 2022
a48b583
pimpl descriptor class skeleton
anantsrivastava30 Oct 10, 2022
47af09e
push descriptor outside of device specific impl + desc class w/ prec …
anantsrivastava30 Oct 11, 2022
0c62927
initial implementation of set_value and corresponding desc_impl class…
anantsrivastava30 Oct 12, 2022
d5f824b
before refactoring the desc_impl class
anantsrivastava30 Oct 12, 2022
710a4e1
refactor and move desc class outside of pimpl & constructor generaliz…
anantsrivastava30 Oct 13, 2022
3f9d0b8
enable setting strides
anantsrivastava30 Oct 13, 2022
ba84e0e
cpu commit+set_value
anantsrivastava30 Oct 31, 2022
90dba9e
address reviews
anantsrivastava30 Nov 15, 2022
1852b13
adress commit handle and error handling
anantsrivastava30 Nov 21, 2022
c4ad8d8
remove the use of error enums
anantsrivastava30 Nov 21, 2022
6cf88ba
generalize descriptor passing+extra forward decl
anantsrivastava30 Nov 23, 2022
6de0cdd
revert testing
anantsrivastava30 Nov 23, 2022
9385c86
fix linking issues; update backend table
anantsrivastava30 Nov 23, 2022
96ac5de
revert some old comments
anantsrivastava30 Nov 23, 2022
04ed345
further comments
anantsrivastava30 Nov 30, 2022
820118e
refactor descriptor source
anantsrivastava30 Dec 5, 2022
9ed1f64
efficient resource handle
anantsrivastava30 Dec 5, 2022
aa22374
create_commit explicit instance
anantsrivastava30 Dec 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ if(ENABLE_MKLCPU_BACKEND
OR ENABLE_CURAND_BACKEND)
list(APPEND DOMAINS_LIST "rng")
endif()
if(ENABLE_MKLGPU_BACKEND
OR ENABLE_MKLCPU_BACKEND)
list(APPEND DOMAINS_LIST "dft")
endif()

# Define required CXX compilers before project
if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
Expand Down
27 changes: 27 additions & 0 deletions examples/dft/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#===============================================================================
# Copyright 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
#
#
# SPDX-License-Identifier: Apache-2.0
#===============================================================================

# Note: compile-time example uses both MKLCPU and CUSOLVER backends, therefore
# cmake in the sub-directory will only build it if CUSOLVER backend is enabled
add_subdirectory(compile_time_dispatching)

# runtime compilation is only possible with dynamic libraries
if (BUILD_SHARED_LIBS)
add_subdirectory(run_time_dispatching)
endif()
49 changes: 49 additions & 0 deletions examples/dft/compile_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#===============================================================================
# Copyright 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
#
#
# SPDX-License-Identifier: Apache-2.0
#===============================================================================

#Build object from all sources
set(DFTI_CT_SOURCES "")
if(ENABLE_MKLCPU_BACKEND)
list(APPEND DFTI_CT_SOURCES "complex_fwd_usm_mklcpu")
endif()

if(domain STREQUAL "dft" AND ENABLE_MKLCPU_BACKEND)
find_library(OPENCL_LIBRARY NAMES OpenCL)
message(STATUS "Found OpenCL: ${OPENCL_LIBRARY}")
endif()

foreach(dfti_ct_sources ${DFTI_CT_SOURCES})
add_executable(example_${domain}_${dfti_ct_sources} ${dfti_ct_sources}.cpp)
target_include_directories(example_${domain}_${dfti_ct_sources}
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
PUBLIC ${PROJECT_SOURCE_DIR}/include
PUBLIC ${CMAKE_BINARY_DIR}/bin
)
if(domain STREQUAL "dft" AND ENABLE_MKLCPU_BACKEND)
add_dependencies(example_${domain}_${dfti_ct_sources} onemkl_${domain}_mklcpu)
list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklcpu)
target_link_libraries(example_${domain}_${dfti_ct_sources} PUBLIC ${OPENCL_LIBRARY})
endif()
target_link_libraries(example_${domain}_${dfti_ct_sources} PUBLIC
${ONEMKL_LIBRARIES_${domain}}
ONEMKL::SYCL::SYCL
)
# Register example as ctest
add_test(NAME ${domain}/EXAMPLE/CT/${dfti_ct_sources} COMMAND example_${domain}_${dfti_ct_sources})
endforeach(dfti_ct_sources)
155 changes: 155 additions & 0 deletions examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*******************************************************************************
* Copyright 2022 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions
* and limitations under the License.
*
*
* SPDX-License-Identifier: Apache-2.0
*******************************************************************************/

/*
*
* Content:
* This example demonstrates use of oneapi::mkl::dft::getrf and
* oneapi::mkl::dft::getrs to perform LU factorization and compute
* the solution on both an Intel cpu device and NVIDIA cpu device.
*
* This example demonstrates only single precision (float) data type
* for matrix data
*
*******************************************************************************/

// STL includes
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <vector>

// oneMKL/SYCL includes
#if __has_include(<sycl/sycl.hpp>)
#include <sycl/sycl.hpp>
#else
#include <CL/sycl.hpp>
#endif
#include "oneapi/mkl.hpp"

// local includes
#include "example_helper.hpp"

void run_getrs_example(const sycl::device& cpu_device) {
// Matrix sizes and leading dimensions
constexpr std::size_t N = 10;
std::int64_t rs[3] {0, N, 1};


// Catch asynchronous exceptions for cpu and cpu
auto cpu_error_handler = [&](sycl::exception_list exceptions) {
for (auto const& e : exceptions) {
try {
std::rethrow_exception(e);
}
catch (sycl::exception const& e) {
// Handle not dft related exceptions that happened during asynchronous call
std::cerr
<< "Caught asynchronous SYCL exception on cpu device during GETRF or GETRS:"
<< std::endl;
std::cerr << "\t" << e.what() << std::endl;
}
}
std::exit(2);
};

std::cout << "DFTI example" << std::endl;
//
// Preparation on cpu
//
sycl::queue cpu_queue(cpu_device, cpu_error_handler);
sycl::context cpu_context = cpu_queue.get_context();
sycl::event cpu_getrf_done;

double *x_usm = (double*) malloc_shared(N*2*sizeof(double), cpu_queue.get_device(), cpu_queue.get_context());

// enabling
// 1. create descriptors
oneapi::mkl::dft::descriptor<oneapi::mkl::dft::precision::DOUBLE, oneapi::mkl::dft::domain::COMPLEX> desc(N);

// 2. variadic set_value
desc.set_value(oneapi::mkl::dft::config_param::PLACEMENT, oneapi::mkl::dft::config_value::NOT_INPLACE);

// 3. commit_descriptor (compile_time CPU)
desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue });

// 5. compute_forward / compute_backward (CPU)
// oneapi::mkl::dft::compute_forward(desc, x_usm);
}

//
// Description of example setup, apis used and supported floating point type precisions
//

void print_example_banner() {
std::cout << "" << std::endl;
std::cout << "########################################################################"
<< std::endl;
std::cout
<< "# DFTI complex in-place forward transform for USM/Buffer API's example: "
<< std::endl;
std::cout << "# " << std::endl;
std::cout << "# Using APIs:" << std::endl;
std::cout << "# USM/BUffer forward complex in-place" << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Using single precision (float) data type" << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Device will be selected during runtime." << std::endl;
std::cout << "# The environment variable SYCL_DEVICE_FILTER can be used to specify"
<< std::endl;
std::cout << "# Using single precision (float) data type" << std::endl;
std::cout << "# " << std::endl;
std::cout << "# Running on both Intel cpu and NVIDIA cpu devices" << std::endl;
std::cout << "# " << std::endl;
std::cout << "########################################################################"
<< std::endl;
std::cout << std::endl;
}

//
// Main entry point for example.
//
int main(int argc, char** argv) {
print_example_banner();

try {
sycl::device cpu_dev((sycl::cpu_selector_v));
std::cout << "Running DFT Complex forward inplace USM example" << std::endl;
std::cout << "Running with single precision real data type on:" << std::endl;
std::cout << "\tcpu device :" << cpu_dev.get_info<sycl::info::device::name>() << std::endl;

run_getrs_example(cpu_dev);
std::cout << "DFT Complex USM example ran OK on MKLcpu" << std::endl;
}
catch (sycl::exception const& e) {
// Handle not dft related exceptions that happened during synchronous call
std::cerr << "Caught synchronous SYCL exception:" << std::endl;
std::cerr << "\t" << e.what() << std::endl;
std::cerr << "\tSYCL error code: " << e.code().value() << std::endl;
return 1;
}
catch (std::exception const& e) {
// Handle not SYCL related exceptions that happened during synchronous call
std::cerr << "Caught synchronous std::exception:" << std::endl;
std::cerr << "\t" << e.what() << std::endl;
return 1;
}

return 0;
}
67 changes: 67 additions & 0 deletions examples/dft/run_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#===============================================================================
# Copyright 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
#
#
# SPDX-License-Identifier: Apache-2.0
#===============================================================================

# NOTE: user needs to set env var SYCL_DEVICE_FILTER to use runtime example (no need to specify backend when building with CMake)

# Build object from all example sources
set(DFT_RT_SOURCES "complex_fwd_usm")

# Set up for the right backend for run-time dispatching examples
# If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to
# overwrite SYCL_DEVICE_FILTER in their environment to run on the desired backend
set(DEVICE_FILTERS "")
if(ENABLE_MKLCPU_BACKEND)
list(APPEND DEVICE_FILTERS "cpu")
endif()
# RNG only supports mklcpu backend on Windows
if(UNIX AND ENABLE_MKLGPU_BACKEND)
list(APPEND DEVICE_FILTERS "gpu")
endif()

message(STATUS "SYCL_DEVICE_FILTER will be set to the following value(s): [${DEVICE_FILTERS}] for run-time dispatching examples")

foreach(dft_rt_sources ${DFT_RT_SOURCES})
add_executable(example_${domain}_${dft_rt_sources} ${dft_rt_sources}.cpp)
target_include_directories(example_${domain}_${dft_rt_sources}
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
PUBLIC ${PROJECT_SOURCE_DIR}/include
PUBLIC ${CMAKE_BINARY_DIR}/bin
)

add_dependencies(example_${domain}_${dft_rt_sources} onemkl)

if (USE_ADD_SYCL_TO_TARGET_INTEGRATION)
add_sycl_to_target(TARGET example_${domain}_${dft_rt_sources} SOURCES ${DFT_RT_SOURCES})
endif()

target_link_libraries(example_${domain}_${dft_rt_sources} PUBLIC
onemkl
ONEMKL::SYCL::SYCL
${CMAKE_DL_LIBS}
)

# Register example as ctest
foreach(device_filter ${DEVICE_FILTERS})
add_test(NAME ${domain}/EXAMPLE/RT/${dft_rt_sources}/${device_filter} COMMAND example_${domain}_${dft_rt_sources})
set_property(TEST ${domain}/EXAMPLE/RT/${dft_rt_sources}/${device_filter} PROPERTY
ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/lib:$ENV{LD_LIBRARY_PATH}
ENVIRONMENT SYCL_DEVICE_FILTER=${device_filter})
endforeach(device_filter)

endforeach()