Skip to content
Permalink
Browse files

[OPENMP] Resolve lost LoopTripCnt for subsequent loops in same thread.

Remove loopTripCnt from threaded device stack after consuming it.
Added a libomptarget DP message to aid in future debugging and to
validate the added testcase, which only runs in Debug build.

Differential Revision: https://reviews.llvm.org/D64808


git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@366349 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information...
ronlieb committed Jul 17, 2019
1 parent 07f4283 commit c1b1ef1c009f285690b40a96f0853ec2b1573fbb
Showing with 41 additions and 2 deletions.
  1. +5 −2 libomptarget/src/omptarget.cpp
  2. +36 −0 libomptarget/test/offloading/looptripcnt.c
@@ -732,8 +732,11 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
uint64_t ltc = 0;
TblMapMtx.lock();
auto I = Device.LoopTripCnt.find(__kmpc_global_thread_num(NULL));
if (I != Device.LoopTripCnt.end())
std::swap(ltc, I->second);
if (I != Device.LoopTripCnt.end()) {
ltc = I->second;
Device.LoopTripCnt.erase(I);
DP("loop trip count is %lu.\n", ltc);
}
TblMapMtx.unlock();

// Launch device execution.
@@ -0,0 +1,36 @@
// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu -allow-empty -check-prefix=DEBUG
// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu -allow-empty -check-prefix=DEBUG
// REQUIRES: libomptarget-debug

/*
Test for looptripcount being popped from runtime stack.
*/
#include <stdio.h>
#include <omp.h>
int main()
{
int N = 128;
int NN = 1024;
int num_teams[NN];
int num_threads[NN];

printf("#pragma omp target teams distribute parallel for thread_limit(4)\n");
#pragma omp target teams distribute parallel for thread_limit(4)
for (int j = 0; j< N; j++) {
num_threads[j] = omp_get_num_threads();
num_teams[j] = omp_get_num_teams();
}
printf("num_threads %d num_teams %d\n", num_threads[0], num_teams[0]);
// DEBUG: loop trip count is 128
printf("#pragma omp target teams distribute parallel for\n");
#pragma omp target teams distribute parallel for
for (int j = 0; j< N; j++) {
num_threads[j] = omp_get_num_threads();
num_teams[j] = omp_get_num_teams();
}
printf("num_threads %d num_teams %d\n", num_threads[0], num_teams[0]);
// DEBUG: loop trip count is 128
return 0;
}

0 comments on commit c1b1ef1

Please sign in to comment.
You can’t perform that action at this time.