diff --git a/cpp/oneapi/dal/algo/dbscan/common.hpp b/cpp/oneapi/dal/algo/dbscan/common.hpp index d688419761c..3566779b9c3 100644 --- a/cpp/oneapi/dal/algo/dbscan/common.hpp +++ b/cpp/oneapi/dal/algo/dbscan/common.hpp @@ -155,7 +155,7 @@ class descriptor : public detail::descriptor_base { using method_t = Method; using task_t = Task; - /// Creates a new instance of the class with the given :literals:`epsilon`, `min_observations` + /// Creates a new instance of the class with the given :literal:`epsilon`, :literal:`min_observations` explicit descriptor(double epsilon, std::int64_t min_observations) { set_min_observations(min_observations); set_epsilon(epsilon); diff --git a/cpp/oneapi/dal/algo/dbscan/compute_types.hpp b/cpp/oneapi/dal/algo/dbscan/compute_types.hpp index 757696e86d1..2954b4e56c4 100644 --- a/cpp/oneapi/dal/algo/dbscan/compute_types.hpp +++ b/cpp/oneapi/dal/algo/dbscan/compute_types.hpp @@ -116,7 +116,7 @@ class compute_result { } /// An $m \\times 1$ table with the indices of core observations in - /// the input data. $m is a number of core observations + /// the input data. $m$ is a number of core observations const table& get_core_observation_indices() const; auto& set_core_observation_indices(const table& value) { @@ -125,7 +125,7 @@ class compute_result { } /// An $m \\times p$ table with the core observations in - /// the input data. $m is a number of core observations + /// the input data. $m$ is a number of core observations const table& get_core_observations() const; auto& set_core_observations(const table& value) { diff --git a/docs/source/api/algorithms/clustering/dbscan.rst b/docs/source/api/algorithms/clustering/dbscan.rst new file mode 100644 index 00000000000..9cd02d20698 --- /dev/null +++ b/docs/source/api/algorithms/clustering/dbscan.rst @@ -0,0 +1,96 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +.. highlight:: cpp +.. default-domain:: cpp + +.. _api_dbscan: + +====== +DBSCAN +====== + +.. include:: ../../../includes/clustering/dbscan-introduction.rst + +------------------------ +Mathematical formulation +------------------------ + +Refer to :ref:`Developer Guide: DBSCAN `. + +--------------------- +Programming Interface +--------------------- + +All types and functions in this section are declared in the +``oneapi::dal::dbscan`` namespace and are available via inclusion of the +``oneapi/dal/algo/dbscan.hpp`` header file. + +Descriptor +---------- +.. onedal_class:: oneapi::dal::dbscan::descriptor + +Method tags +~~~~~~~~~~~ +.. onedal_tags_namespace:: oneapi::dal::dbscan::method + +Task tags +~~~~~~~~~ +.. onedal_tags_namespace:: oneapi::dal::dbscan::task + + +.. _dbscan_c_api: + +Computation :cpp:expr:`compute(...)` +------------------------------------ + +.. _dbscan_c_api_input: + +Input +~~~~~ +.. onedal_class:: oneapi::dal::dbscan::compute_input + +.. _dbscan_c_api_result: + +Result +~~~~~~ +.. onedal_class:: oneapi::dal::dbscan::compute_result + +Operation +~~~~~~~~~ + +.. function:: template \ + dbscan::compute_result compute(const Descriptor& desc, \ + const dbscan::compute_input& input) + + :param desc: DBSCAN algorithm descriptor :expr:`dbscan::descriptor` + :param input: Input data for the compute operation + + Preconditions + | :expr:`input.data.has_data == true` + | :expr:`!input.weights.has_data || input.weights.row_count == input.data.row_count && input.weights.column_count == 1` + +------------- +Usage example +------------- + +.. include:: ../../../includes/clustering/dbscan-usage-examples.rst + +-------- +Examples +-------- + +.. include:: ../../../includes/clustering/dbscan-examples.rst diff --git a/docs/source/api/algorithms/clustering/index.rst b/docs/source/api/algorithms/clustering/index.rst index ef966d80373..c09bd00c387 100644 --- a/docs/source/api/algorithms/clustering/index.rst +++ b/docs/source/api/algorithms/clustering/index.rst @@ -23,5 +23,6 @@ This chapter describes programming interfaces of the clustering algorithms imple .. toctree:: :titlesonly: + dbscan.rst kmeans.rst kmeans-init.rst diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index 44e7c916639..62792b4f032 100644 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -23,4 +23,5 @@ C++ API data-management/index.rst algorithms/index.rst + spmd/index.rst diff --git a/docs/source/api/spmd/communicator.rst b/docs/source/api/spmd/communicator.rst new file mode 100644 index 00000000000..64d9a96656d --- /dev/null +++ b/docs/source/api/spmd/communicator.rst @@ -0,0 +1,84 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +.. highlight:: cpp + +.. _api_communicator: + +============= +Communicators +============= + +.. _communicator_programming_interface: + +--------------------- +Programming interface +--------------------- + +All types and functions in this section are declared in the +``oneapi::dal::spmd::preview`` namespace and are available via inclusion of the +header file from specified backend. + +Communicator +------------ + +A base implementation of the :term:`communicator` concept. +The :term:`communicator` type and all of its subtypes are :term:`reference-counted `: + +1. The instance stores a pointer to the communicator implementation that holds all + property values and data. + +2. The reference count indicates how many communicator objects refer to the same implementation. + +3. The communicator increments the reference count + for it to be equal to the number of communicator objects sharing the same implementation. + +4. The communicator decrements the reference count when the + communicator goes out of the scope. If the reference count is zero, the communicator + frees its implementation. + +USM and non-USM memory usage +---------------------------- + +There are two types of memory access: + +- USM memory access (both USM and non-USM pointers can be used) +- Host, or non-USM, memory access (only non-USM pointers can be used) + +Use one of the following tags to select a memory access type: + +device_memory_access::none + Assumes only non-USM pointers are used for a collective operation. + +device_memory_access::usm + Both USM and non-USM can be used. Pointer type is controlled by + the use of ``sycl::queue`` object as a first parameter for collective + operations. The use of ``sycl::queue`` object is obligatory for USM + pointers. + +Request +------- + +Request is an object to control asynchronous communication. + +Reducion operations +------------------- + +The following reduction operations are supported: + +- Max +- Min +- Sum diff --git a/docs/source/api/spmd/index.rst b/docs/source/api/spmd/index.rst new file mode 100644 index 00000000000..f74833382fe --- /dev/null +++ b/docs/source/api/spmd/index.rst @@ -0,0 +1,26 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +=============================================== +Distributed Model: Single Process Multiple Data +=============================================== + +Refer to :ref:`Developer Guide: SPMD distributed model `. + +.. toctree:: + + overview.rst + communicator.rst diff --git a/docs/source/api/spmd/overview.rst b/docs/source/api/spmd/overview.rst new file mode 100644 index 00000000000..d20163a5133 --- /dev/null +++ b/docs/source/api/spmd/overview.rst @@ -0,0 +1,59 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +.. highlight:: cpp +.. default-domain:: cpp + +.. _api_spmd: + +====================== +Distributed SPMD model +====================== + +Refer to :ref:`Developer Guide: SPMD `. + +--------------------- +Programming interface +--------------------- + +All types and functions in this section are declared in the +``oneapi::dal::spmd::preview`` namespace and are available via inclusion of the +header file from specified backend. + +:term:`SPMD` distributed model consists of the following components: + +1. Additional ``train``, ``infer``, and ``compute`` methods that accept ``communicator`` object + as the first parameter. Those methods are expected to be called on all ranks to start + distributed simulations. + +2. The :term:`communicator` class that contains methods to perform collective operations among all ranks. + +3. Free functions to create a :term:`communicator` using a specified :term:`communicator backend`. Available backends are ``ccl`` + and ``mpi``. + +------------- +Usage example +------------- + +The following listings provide a brief introduction on how to create a particular communicator. + +.. rubric:: MPI backend + +.. include:: ../../includes/spmd/mpi-comm-usage-example.rst + +.. rubric:: CCL backend + +.. include:: ../../includes/spmd/ccl-comm-usage-example.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index b6128ade77e..0c22fdd7c2d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -281,6 +281,18 @@ ('cpp:identifier', 'oneapi::dal::basic_statistics::method'), ('cpp:identifier', 'oneapi::dal::basic_statistics::method::v1'), ('cpp:identifier', 'oneapi::dal::basic_statistics::method::v1::dense'), + # dbscan + ('cpp:identifier', 'dbscan'), + ('cpp:identifier', 'dbscan::descriptor'), + ('cpp:identifier', 'dbscan::compute_result'), + ('cpp:identifier', 'dbscan::compute_input'), + ('cpp:identifier', 'oneapi::dal::dbscan'), + ('cpp:identifier', 'oneapi::dal::dbscan::task'), + ('cpp:identifier', 'oneapi::dal::dbscan::task::v1'), + ('cpp:identifier', 'oneapi::dal::dbscan::task::v1::compute'), + ('cpp:identifier', 'oneapi::dal::dbscan::method'), + ('cpp:identifier', 'oneapi::dal::dbscan::method::v1'), + ('cpp:identifier', 'oneapi::dal::dbscan::method::v1::brute_force'), # common for algorithms ('cpp:identifier', 'result'), # common for result options diff --git a/docs/source/includes/clustering/dbscan-examples.rst b/docs/source/includes/clustering/dbscan-examples.rst new file mode 100644 index 00000000000..e0a1d7c5837 --- /dev/null +++ b/docs/source/includes/clustering/dbscan-examples.rst @@ -0,0 +1,35 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +.. tabs:: + + .. group-tab:: oneAPI DPC++ + + Batch Processing: + + - :ref:`dpc_dbscan_brute_force_batch.cpp` + + .. group-tab:: oneAPI C++ + + Batch Processing: + + - :ref:`cpp_dbscan_brute_force_batch.cpp` + + .. group-tab:: Python* with DPC++ support + + Batch Processing: + + - :daal4py_sycl_example:`dbscan_batch.py` diff --git a/docs/source/includes/clustering/dbscan-introduction.rst b/docs/source/includes/clustering/dbscan-introduction.rst new file mode 100644 index 00000000000..d19e8129a0d --- /dev/null +++ b/docs/source/includes/clustering/dbscan-introduction.rst @@ -0,0 +1,31 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +Density-based spatial clustering of applications with noise (DBSCAN) is a data clustering algorithm proposed in [Ester96]_. +It is a density-based clustering non-parametric algorithm: given a set of observations in some space, +it groups together observations that are closely packed together (observations with many nearby neighbors), +marking as outliers observations that lie alone in low-density regions (whose nearest neighbors are too far away). + +.. |c_math| replace:: :ref:`Compute ` +.. |c_input| replace:: :ref:`compute_input ` +.. |c_result| replace:: :ref:`compute_result ` +.. |c_op| replace:: :ref:`compute(...) ` + +=============== =========================== ======== =========== ============ + **Operation** **Computational methods** **Programming Interface** +--------------- --------------------------- --------------------------------- + |c_math| Default method |c_op| |c_input| |c_result| +=============== =========================== ======== =========== ============ diff --git a/docs/source/includes/clustering/dbscan-usage-examples.rst b/docs/source/includes/clustering/dbscan-usage-examples.rst new file mode 100644 index 00000000000..af6be8ecbe3 --- /dev/null +++ b/docs/source/includes/clustering/dbscan-usage-examples.rst @@ -0,0 +1,32 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +Compute +------- + +:: + + void run_compute(const table& data, + const table& weights) { + double epsilon = 1.0; + std::int64_t max_observations = 5; + const auto dbscan_desc = kmeans::descriptor{epsilon, max_observations} + .set_result_options(dal::dbscan::result_options::responses); + + const auto result = compute(dbscan_desc, data, weights); + + print_table("responses", result.get_responses()); + } diff --git a/docs/source/includes/spmd/ccl-comm-usage-example.rst b/docs/source/includes/spmd/ccl-comm-usage-example.rst new file mode 100644 index 00000000000..dc5464e1387 --- /dev/null +++ b/docs/source/includes/spmd/ccl-comm-usage-example.rst @@ -0,0 +1,46 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +.. highlight:: cpp +.. default-domain:: cpp + +:: + + #ifndef ONEDAL_DATA_PARALLEL + #define ONEDAL_DATA_PARALLEL + #endif + + #include "oneapi/dal/algo/kmeans.hpp" + #include "oneapi/dal/spmd/ccl/communicator.hpp" + + kmeans::model<> run_training(const table& data, + const table& initial_centroids) { + const auto kmeans_desc = kmeans::descriptor{} + .set_cluster_count(10) + .set_max_iteration_count(50) + .set_accuracy_threshold(1e-4); + + auto comm = dal::preview::spmd::make_communicator(queue); + auto rank_id = comm.get_rank(); + + const auto result_train = dal::preview::train(comm, kmeans_desc, local_input); + + if(rank_id == 0) { + print_table("centroids", result.get_model().get_centroids()); + print_value("objective", result.get_objective_function_value()); + } + return result.get_model(); + } \ No newline at end of file diff --git a/docs/source/includes/spmd/mpi-comm-usage-example.rst b/docs/source/includes/spmd/mpi-comm-usage-example.rst new file mode 100644 index 00000000000..ca1cc197dc6 --- /dev/null +++ b/docs/source/includes/spmd/mpi-comm-usage-example.rst @@ -0,0 +1,46 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +.. highlight:: cpp +.. default-domain:: cpp + +:: + + #ifndef ONEDAL_DATA_PARALLEL + #define ONEDAL_DATA_PARALLEL + #endif + + #include "oneapi/dal/algo/kmeans.hpp" + #include "oneapi/dal/spmd/mpi/communicator.hpp" + + kmeans::model<> run_training(const table& data, + const table& initial_centroids) { + const auto kmeans_desc = kmeans::descriptor{} + .set_cluster_count(10) + .set_max_iteration_count(50) + .set_accuracy_threshold(1e-4); + + auto comm = dal::preview::spmd::make_communicator(queue); + auto rank_id = comm.get_rank(); + + const auto result_train = dal::preview::train(comm, kmeans_desc, local_input); + + if(rank_id == 0) { + print_table("centroids", result.get_model().get_centroids()); + print_value("objective", result.get_objective_function_value()); + } + return result.get_model(); + } \ No newline at end of file diff --git a/docs/source/oneapi-interfaces.rst b/docs/source/oneapi-interfaces.rst index 0fdafb54a69..573edeaaa1f 100644 --- a/docs/source/oneapi-interfaces.rst +++ b/docs/source/oneapi-interfaces.rst @@ -32,5 +32,6 @@ Refer to |daal-docs|_ official page for general information. onedal/programming-model/computational-modes.rst onedal/data-management/index.rst onedal/algorithms/index.rst + onedal/spmd/index.rst examples.rst onedal/appendix/index.rst diff --git a/docs/source/onedal/algorithms/clustering/dbscan.rst b/docs/source/onedal/algorithms/clustering/dbscan.rst new file mode 100644 index 00000000000..8e380f54b90 --- /dev/null +++ b/docs/source/onedal/algorithms/clustering/dbscan.rst @@ -0,0 +1,73 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +.. highlight:: cpp +.. default-domain:: cpp + +.. re-use for math equations: +.. |x| replace:: :math:`x` +.. |y| replace:: :math:`y` + +.. _alg_dbscan: + +====== +DBSCAN +====== + +.. include:: ../../../includes/clustering/dbscan-introduction.rst + +------------------------ +Mathematical formulation +------------------------ + +.. _dbscan_c_math: + +Computation +----------- +Given the set :math:`X = \{x_1 = (x_{11}, \ldots, x_{1p}), \ldots, x_n = (x_{n1}, \ldots, x_{np})\}` +of :math:`n` :math:`p`-dimensional feature vectors (further referred as observations), +a positive floating-point number ``epsilon`` and a positive integer ``minObservations``, +the problem is to get clustering assignments for each input observation, based on the definitions below [Ester96]_: +two observations |x| and |y| are considered to be in the same cluster if there is a :term:`core observation` :math:`z`, +and |x| and |y| are both :term:`reachable` from :math:`z`. + +Each cluster gets a unique identifier, an integer number from :math:`0` to :math:`\text{total number of clusters } – 1`. +Each observation is assigned an identifier of the :term:`cluster` it belongs to, +or :math:`-1` if the observation considered to be a :term:`noise observation`. + +--------------------- +Programming Interface +--------------------- + +Refer to :ref:`API Reference: DBSCAN `. + +---------------- +Distributed mode +---------------- + +The algorithm supports distributed execution in SMPD mode (only on GPU). + +------------- +Usage example +------------- + +.. include:: ../../../includes/clustering/dbscan-usage-examples.rst + +-------- +Examples +-------- + +.. include:: ../../../includes/clustering/dbscan-examples.rst diff --git a/docs/source/onedal/algorithms/clustering/index.rst b/docs/source/onedal/algorithms/clustering/index.rst index 45ab5860dba..196f4e5b0bc 100644 --- a/docs/source/onedal/algorithms/clustering/index.rst +++ b/docs/source/onedal/algorithms/clustering/index.rst @@ -23,9 +23,14 @@ This chapter describes clustering algorithms implemented in |short_name|: .. toctree:: :titlesonly: + dbscan.rst kmeans.rst kmeans-init.rst +.. rubric:: Examples: DBSCAN + +.. include:: ../../../includes/clustering/dbscan-examples.rst + .. rubric:: Examples: K-Means .. include:: ../../../includes/clustering/kmeans-examples.rst diff --git a/docs/source/onedal/algorithms/ensembles/decision-forest.rst b/docs/source/onedal/algorithms/ensembles/decision-forest.rst index 3d0edaefa4f..1afaefcd801 100644 --- a/docs/source/onedal/algorithms/ensembles/decision-forest.rst +++ b/docs/source/onedal/algorithms/ensembles/decision-forest.rst @@ -343,3 +343,10 @@ Programming Interface --------------------- Refer to :ref:`API Reference: Decision Forest Classification and Regression `. + +---------------- +Distributed mode +---------------- + +The algorithm supports distributed execution in SMPD mode (only on GPU). + diff --git a/docs/source/onedal/algorithms/statistics/basic-statistics.rst b/docs/source/onedal/algorithms/statistics/basic-statistics.rst index bf671f51991..c8378e69c86 100755 --- a/docs/source/onedal/algorithms/statistics/basic-statistics.rst +++ b/docs/source/onedal/algorithms/statistics/basic-statistics.rst @@ -77,3 +77,9 @@ Programming Interface Refer to :ref:`API Reference: Basic statistics `. +---------------- +Distributed mode +---------------- + +The algorithm supports distributed execution in SMPD mode (only on GPU). + diff --git a/docs/source/onedal/glossary.rst b/docs/source/onedal/glossary.rst index c0a1884f95e..e1b700cde79 100644 --- a/docs/source/onedal/glossary.rst +++ b/docs/source/onedal/glossary.rst @@ -437,3 +437,23 @@ Common oneAPI terms SYCL(TM) [SYCLSpec]_ --- high-level programming model for OpenCL(TM) that enables code for heterogeneous processors to be written in a "single-source" style using completely standard C++. + +Distributed computational mode terms +==================================== + +.. glossary:: + :sorted: + + SPMD + Single Program, Multiple Data (SPMD) is a technique employed to achieve parallelism. + In SPMD model, multiple autonomous processors simultaneously execute the same program at independent points. + + Communicator + A |short_name| concept for an object that is used to perform inter-process collective + operations + + Communicator backend + A particular library providing collective operations. + + **Examples:** oneCCL, oneMPI + diff --git a/docs/source/onedal/spmd/_static/spmd_flow.png b/docs/source/onedal/spmd/_static/spmd_flow.png new file mode 100755 index 00000000000..65497b9a67b Binary files /dev/null and b/docs/source/onedal/spmd/_static/spmd_flow.png differ diff --git a/docs/source/onedal/spmd/index.rst b/docs/source/onedal/spmd/index.rst new file mode 100644 index 00000000000..c6fe7154a5d --- /dev/null +++ b/docs/source/onedal/spmd/index.rst @@ -0,0 +1,72 @@ +.. ****************************************************************************** +.. * Copyright 2021 Intel Corporation +.. * +.. * Licensed under the Apache License, Version 2.0 (the "License"); +.. * you may not use this file except in compliance with the License. +.. * You may obtain a copy of the License at +.. * +.. * http://www.apache.org/licenses/LICENSE-2.0 +.. * +.. * Unless required by applicable law or agreed to in writing, software +.. * distributed under the License is distributed on an "AS IS" BASIS, +.. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +.. * See the License for the specific language governing permissions and +.. * limitations under the License. +.. *******************************************************************************/ + +.. _dg_spmd: + +============================ +Single Program Multiple Data +============================ + +This section includes concepts and descriptions of objects that support +distributed computations using :capterm:`SPMD` model. + +Distributed computation using SPMD model +---------------------------------------- + +In a typical usage scenario, a user provides a :capterm:`communicator` object as a first parameter of +a free function to indicate that the algorithm can process data simultaneously. All internal inter-process +communications at sync points are hidden from the user. + +General expectation is that input dataset is distributed among processes. Results are distributed in +accordance with the input. + + +.. _example_spmd_flow: + +.. figure:: _static/spmd_flow.png + :width: 800 + :alt: Typical SPMD flow + + Example of SPMD Flow in oneDAL + +.. _communicator_operations: + +Supported Collective Operations +------------------------------- + +The following collective operations are supported: + +- ``bcast`` + Broadcasts data from specified process. + +- ``allreduce`` + Reduces data among all processes. + +- ``allgatherv`` + Gathers data from all processes and shares the result among all processes. + +.. _backend_restrictions: + +Backend-specific restrictions +----------------------------- + +- oneCCL: + Allgetherv does not support arbitrary displacements. The result is expected to + be closely packed without gaps. + +- oneMPI: + Collective operations in oneMPI do not support asynchronous executions. They block the + process till completion.