Skip to content

Commit

Permalink
[pstl] Initial implementation of OpenMP backend, on behalf of Christo…
Browse files Browse the repository at this point in the history
…pher Nelson nadiasvertex@gmail.com

Phabricator Review:
https://reviews.llvm.org/D99836

A couple of parallel patterns still remains serial - "Parallel partial sort", and "Parallel transform scan" - there are //TODOs in the code.
  • Loading branch information
MikeDvorskiy committed Oct 15, 2021
1 parent e741890 commit 6069a6a
Show file tree
Hide file tree
Showing 19 changed files with 1,057 additions and 6 deletions.
2 changes: 1 addition & 1 deletion libcxx/cmake/Modules/DefineLinkerScript.cmake
Expand Up @@ -31,7 +31,7 @@ function(define_linker_script target)
set(link_libraries)
if (interface_libs)
foreach(lib IN LISTS interface_libs)
if ("${lib}" STREQUAL "cxx-headers")
if ("${lib}" MATCHES "cxx-headers|ParallelSTL")
continue()
endif()
# If ${lib} is not a target, we use a dummy target which we know will
Expand Down
6 changes: 5 additions & 1 deletion pstl/CMakeLists.txt
Expand Up @@ -16,7 +16,7 @@ math(EXPR VERSION_PATCH "(${PARALLELSTL_VERSION_SOURCE} % 10)")

project(ParallelSTL VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH} LANGUAGES CXX)

set(PSTL_PARALLEL_BACKEND "serial" CACHE STRING "Threading backend to use. Valid choices are 'serial' and 'tbb'. The default is 'serial'.")
set(PSTL_PARALLEL_BACKEND "serial" CACHE STRING "Threading backend to use. Valid choices are 'serial', 'omp', and 'tbb'. The default is 'serial'.")
set(PSTL_HIDE_FROM_ABI_PER_TU OFF CACHE BOOL "Whether to constrain ABI-unstable symbols to each translation unit (basically, mark them with C's static keyword).")
set(_PSTL_HIDE_FROM_ABI_PER_TU ${PSTL_HIDE_FROM_ABI_PER_TU}) # For __pstl_config_site

Expand All @@ -43,6 +43,10 @@ elseif (PSTL_PARALLEL_BACKEND STREQUAL "tbb")
message(STATUS "Parallel STL uses TBB ${TBB_VERSION} (interface version: ${TBB_INTERFACE_VERSION})")
target_link_libraries(ParallelSTL INTERFACE TBB::tbb)
set(_PSTL_PAR_BACKEND_TBB ON)
elseif (PSTL_PARALLEL_BACKEND STREQUAL "omp")
message(STATUS "Parallel STL uses the omp backend")
target_compile_options(ParallelSTL INTERFACE "-fopenmp=libomp")
set(_PSTL_PAR_BACKEND_OPENMP ON)
else()
message(FATAL_ERROR "Requested unknown Parallel STL backend '${PSTL_PARALLEL_BACKEND}'.")
endif()
Expand Down
4 changes: 4 additions & 0 deletions pstl/CREDITS.txt
Expand Up @@ -15,3 +15,7 @@ D: Created the initial implementation.
N: Thomas Rodgers
E: trodgers@redhat.com
D: Identifier name transformation for inclusion in a Standard C++ library.

N: Christopher Nelson
E: nadiasvertex@gmail.com
D: Add support for an OpenMP backend.
1 change: 1 addition & 0 deletions pstl/include/__pstl_config_site.in
Expand Up @@ -11,6 +11,7 @@

#cmakedefine _PSTL_PAR_BACKEND_SERIAL
#cmakedefine _PSTL_PAR_BACKEND_TBB
#cmakedefine _PSTL_PAR_BACKEND_OPENMP
#cmakedefine _PSTL_HIDE_FROM_ABI_PER_TU

#endif // __PSTL_CONFIG_SITE
64 changes: 64 additions & 0 deletions pstl/include/pstl/internal/omp/parallel_for.h
@@ -0,0 +1,64 @@
// -*- C++ -*-
// -*-===----------------------------------------------------------------------===//
//
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
//
//===----------------------------------------------------------------------===//

#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_H
#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_H

#include <cstddef>

#include "util.h"

namespace __pstl
{
namespace __omp_backend
{

template <class _Index, class _Fp>
void
__parallel_for_body(_Index __first, _Index __last, _Fp __f)
{
// initial partition of the iteration space into chunks
auto __policy = __omp_backend::__chunk_partitioner(__first, __last);

// To avoid over-subscription we use taskloop for the nested parallelism
_PSTL_PRAGMA(omp taskloop untied mergeable)
for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
{
__omp_backend::__process_chunk(__policy, __first, __chunk, __f);
}
}

//------------------------------------------------------------------------
// Notation:
// Evaluation of brick f[i,j) for each subrange [i,j) of [first, last)
//------------------------------------------------------------------------

template <class _ExecutionPolicy, class _Index, class _Fp>
void
__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f)
{
if (omp_in_parallel())
{
// we don't create a nested parallel region in an existing parallel
// region: just create tasks
__pstl::__omp_backend::__parallel_for_body(__first, __last, __f);
}
else
{
// in any case (nested or non-nested) one parallel region is created and
// only one thread creates a set of tasks
_PSTL_PRAGMA(omp parallel)
_PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_body(__first, __last, __f); }
}
}

} // namespace __omp_backend
} // namespace __pstl
#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_H
59 changes: 59 additions & 0 deletions pstl/include/pstl/internal/omp/parallel_for_each.h
@@ -0,0 +1,59 @@
// -*- C++ -*-
// -*-===----------------------------------------------------------------------===//
//
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
//
//===----------------------------------------------------------------------===//

#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H
#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H

#include "util.h"

namespace __pstl
{
namespace __omp_backend
{

template <class _ForwardIterator, class _Fp>
void
__parallel_for_each_body(_ForwardIterator __first, _ForwardIterator __last, _Fp __f)
{
using DifferenceType = typename std::iterator_traits<_ForwardIterator>::difference_type;
// TODO: Think of an approach to remove the std::distance call
auto __size = std::distance(__first, __last);

_PSTL_PRAGMA(omp taskloop untied mergeable)
for (DifferenceType __index = 0; __index < __size; ++__index)
{
// TODO: Think of an approach to remove the increment here each time.
auto __iter = std::next(__first, __index);
__f(*__iter);
}
}

template <class _ExecutionPolicy, class _ForwardIterator, class _Fp>
void
__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f)
{
if (omp_in_parallel())
{
// we don't create a nested parallel region in an existing parallel
// region: just create tasks
__pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f);
}
else
{
// in any case (nested or non-nested) one parallel region is created and
// only one thread creates a set of tasks
_PSTL_PRAGMA(omp parallel)
_PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f); }
}
}

} // namespace __omp_backend
} // namespace __pstl
#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H
50 changes: 50 additions & 0 deletions pstl/include/pstl/internal/omp/parallel_invoke.h
@@ -0,0 +1,50 @@
// -*- C++ -*-
// -*-===----------------------------------------------------------------------===//
//
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
//
//===----------------------------------------------------------------------===//

#ifndef _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H
#define _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H

#include "util.h"

namespace __pstl
{
namespace __omp_backend
{

template <typename _F1, typename _F2>
void
__parallel_invoke_body(_F1&& __f1, _F2&& __f2)
{
_PSTL_PRAGMA(omp taskgroup)
{
_PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F1>(__f1)(); }
_PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F2>(__f2)(); }
}
}

template <class _ExecutionPolicy, typename _F1, typename _F2>
void
__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2)
{
if (omp_in_parallel())
{
__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
}
else
{
_PSTL_PRAGMA(omp parallel)
_PSTL_PRAGMA(omp single nowait)
__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
}
}

} // namespace __omp_backend
} // namespace __pstl
#endif // _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H
94 changes: 94 additions & 0 deletions pstl/include/pstl/internal/omp/parallel_merge.h
@@ -0,0 +1,94 @@
// -*- C++ -*-
// -*-===----------------------------------------------------------------------===//
//
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
//
//===----------------------------------------------------------------------===//

#ifndef _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H
#define _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H

#include "util.h"

namespace __pstl
{
namespace __omp_backend
{

template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _RandomAccessIterator3,
typename _Compare, typename _LeafMerge>
void
__parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessIterator1 __xs,
_RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye,
_RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge)
{

if (__size_x + __size_y <= __omp_backend::__default_chunk_size)
{
__leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp);
return;
}

_RandomAccessIterator1 __xm;
_RandomAccessIterator2 __ym;

if (__size_x < __size_y)
{
__ym = __ys + (__size_y / 2);
__xm = std::upper_bound(__xs, __xe, *__ym, __comp);
}
else
{
__xm = __xs + (__size_x / 2);
__ym = std::lower_bound(__ys, __ye, *__xm, __comp);
}

auto __zm = __zs + (__xm - __xs) + (__ym - __ys);

_PSTL_PRAGMA(omp task untied mergeable default(none)
firstprivate(__xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge))
__parallel_merge_body(__xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge);

_PSTL_PRAGMA(omp task untied mergeable default(none)
firstprivate(__xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge))
__parallel_merge_body(__xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge);

_PSTL_PRAGMA(omp taskwait)
}

template <class _ExecutionPolicy, typename _RandomAccessIterator1, typename _RandomAccessIterator2,
typename _RandomAccessIterator3, typename _Compare, typename _LeafMerge>
void
__parallel_merge(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe,
_RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp,
_LeafMerge __leaf_merge)

{
std::size_t __size_x = __xe - __xs;
std::size_t __size_y = __ye - __ys;

/*
* Run the merge in parallel by chunking it up. Use the smaller range (if any) as the iteration range, and the
* larger range as the search range.
*/

if (omp_in_parallel())
{
__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge);
}
else
{
_PSTL_PRAGMA(omp parallel)
{
_PSTL_PRAGMA(omp single nowait)
__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge);
}
}
}

} // namespace __omp_backend
} // namespace __pstl
#endif // _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H
68 changes: 68 additions & 0 deletions pstl/include/pstl/internal/omp/parallel_reduce.h
@@ -0,0 +1,68 @@
// -*- C++ -*-
// -*-===----------------------------------------------------------------------===//
//
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
//
//===----------------------------------------------------------------------===//

#ifndef _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H
#define _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H

#include "util.h"

namespace __pstl
{
namespace __omp_backend
{

template <class _RandomAccessIterator, class _Value, typename _RealBody, typename _Reduction>
_Value
__parallel_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity,
_RealBody __real_body, _Reduction __reduce)
{
auto __middle = __first + ((__last - __first) / 2);
_Value __v1(__identity), __v2(__identity);
__parallel_invoke_body(
[&]() { __v1 = __parallel_reduce_body(__first, __middle, __identity, __real_body, __reduce); },
[&]() { __v2 = __parallel_reduce_body(__middle, __last, __identity, __real_body, __reduce); });

return __reduce(__v1, __v2);
}

//------------------------------------------------------------------------
// Notation:
// r(i,j,init) returns reduction of init with reduction over [i,j)
// c(x,y) combines values x and y that were the result of r
//------------------------------------------------------------------------

template <class _ExecutionPolicy, class _RandomAccessIterator, class _Value, typename _RealBody, typename _Reduction>
_Value
__parallel_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity,
_RealBody __real_body, _Reduction __reduction)
{
// We don't create a nested parallel region in an existing parallel region:
// just create tasks.
if (omp_in_parallel())
{
return __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction);
}

// In any case (nested or non-nested) one parallel region is created and only
// one thread creates a set of tasks.
_Value __res = __identity;

_PSTL_PRAGMA(omp parallel)
_PSTL_PRAGMA(omp single nowait)
{
__res = __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction);
}

return __res;
}

} // namespace __omp_backend
} // namespace __pstl
#endif // _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H

0 comments on commit 6069a6a

Please sign in to comment.