Skip to content

Commit

Permalink
[libomptarget][nfc] Extract function from data_sharing, move to common
Browse files Browse the repository at this point in the history
Summary:
[libomptarget][nfc] Extract function from data_sharing, move to common

Finding the first active thread in the warp is different on nvptx and amdgcn,
mostly due to warp size and the desire for efficiency.

Reviewers: ABataev, jdoerfert, grokos

Reviewed By: jdoerfert

Subscribers: jvesely, mgorny, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D71643
  • Loading branch information
JonChesterfield committed Dec 18, 2019
1 parent 9d38fd8 commit 8adae60
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 17 deletions.
5 changes: 3 additions & 2 deletions openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
Expand Up @@ -57,12 +57,13 @@ get_filename_component(devicertl_base_directory
set(cuda_sources
${devicertl_base_directory}/common/src/cancel.cu
${devicertl_base_directory}/common/src/critical.cu
${devicertl_base_directory}/common/src/loop.cu
${devicertl_base_directory}/common/src/data_sharing.cu
${devicertl_base_directory}/common/src/libcall.cu
${devicertl_base_directory}/common/src/reduction.cu
${devicertl_base_directory}/common/src/loop.cu
${devicertl_base_directory}/common/src/omp_data.cu
${devicertl_base_directory}/common/src/omptarget.cu
${devicertl_base_directory}/common/src/parallel.cu
${devicertl_base_directory}/common/src/reduction.cu
${devicertl_base_directory}/common/src/sync.cu
${devicertl_base_directory}/common/src/task.cu)

Expand Down
2 changes: 2 additions & 0 deletions openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
Expand Up @@ -101,6 +101,8 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
return __lanemask_gt();
}

EXTERN bool __kmpc_impl_is_first_active_thread();

INLINE uint32_t __kmpc_impl_smid() {
return __smid();
}
Expand Down
@@ -1,26 +1,18 @@
//===----- data_sharing.cu - NVPTX OpenMP debug utilities -------- CUDA -*-===//
//===----- data_sharing.cu - OpenMP GPU data sharing ------------- CUDA -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the implementation of data sharing environments/
// This file contains the implementation of data sharing environments
//
//===----------------------------------------------------------------------===//
#include "common/omptarget.h"
#include "target_impl.h"
#include <stdio.h>

// Return true if this is the first active thread in the warp.
INLINE static bool IsWarpMasterActiveThread() {
unsigned long long Mask = __kmpc_impl_activemask();
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
unsigned long long Sh = Mask << ShNum;
// Truncate Sh to the 32 lower bits
return (unsigned)Sh == 0;
}
// Return true if this is the master thread.
INLINE static bool IsMasterThread(bool isSPMDExecutionMode) {
return !isSPMDExecutionMode && GetMasterThreadID() == GetThreadIdInBlock();
Expand Down Expand Up @@ -128,7 +120,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);

// Only the warp active master needs to grow the stack.
if (IsWarpMasterActiveThread()) {
if (__kmpc_impl_is_first_active_thread()) {
// Save the current active threads.
ActiveT = CurActiveThreads;

Expand Down Expand Up @@ -229,7 +221,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
unsigned WID = GetWarpId();

if (IsEntryPoint) {
if (IsWarpMasterActiveThread()) {
if (__kmpc_impl_is_first_active_thread()) {
DSPRINT0(DSFLAG, "Doing clean up\n");

// The master thread cleans the saved slot, because this is an environment
Expand All @@ -255,7 +247,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
// warp diverged and returns in different places). This only works if we
// assume that threads will converge right after the call site that started
// the environment.
if (IsWarpMasterActiveThread()) {
if (__kmpc_impl_is_first_active_thread()) {
__kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];

DSPRINT0(DSFLAG, "Before restoring the stack\n");
Expand Down
4 changes: 2 additions & 2 deletions openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
Expand Up @@ -53,16 +53,16 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
set(cuda_src_files
${devicertl_common_directory}/src/cancel.cu
${devicertl_common_directory}/src/critical.cu
src/data_sharing.cu
${devicertl_common_directory}/src/data_sharing.cu
${devicertl_common_directory}/src/libcall.cu
src/target_impl.cu
${devicertl_common_directory}/src/loop.cu
${devicertl_common_directory}/src/omptarget.cu
${devicertl_common_directory}/src/parallel.cu
${devicertl_common_directory}/src/reduction.cu
${devicertl_common_directory}/src/support.cu
${devicertl_common_directory}/src/sync.cu
${devicertl_common_directory}/src/task.cu
src/target_impl.cu
)

set(omp_data_objects ${devicertl_common_directory}/src/omp_data.cu)
Expand Down
9 changes: 9 additions & 0 deletions openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Expand Up @@ -94,6 +94,15 @@ INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
return res;
}

// Return true if this is the first active thread in the warp.
INLINE bool __kmpc_impl_is_first_active_thread() {
unsigned long long Mask = __kmpc_impl_activemask();
unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
unsigned long long Sh = Mask << ShNum;
// Truncate Sh to the 32 lower bits
return (unsigned)Sh == 0;
}

INLINE uint32_t __kmpc_impl_smid() {
uint32_t id;
asm("mov.u32 %0, %%smid;" : "=r"(id));
Expand Down

0 comments on commit 8adae60

Please sign in to comment.