-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[OpenMP] Improve default block count selection fow low block counts
If a combined loop has insufficient parallelism (= low trip count), we might end up with too few teams/blocks. To counter that we can reduce the number of threads per team we use. This patch implements a heuristic and exposes a new environment variable to control the minimum of threads to be employed in this case. Issue reported by: Felipe Cabarcas Jaramillo <cabarcas@udel.edu> (@fel-cab). Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D152014
- Loading branch information
Showing
4 changed files
with
114 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// clang-format off | ||
// RUN: %libomptarget-compile-generic | ||
// RUN: env LIBOMPTARGET_INFO=16 \ | ||
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefix=DEFAULT | ||
// RUN: env LIBOMPTARGET_INFO=16 LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT=8 \ | ||
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefix=EIGHT | ||
|
||
// UNSUPPORTED: x86_64-pc-linux-gnu | ||
// UNSUPPORTED: x86_64-pc-linux-gnu-LTO | ||
|
||
#define N 128 | ||
|
||
__attribute__((optnone)) void optnone() {} | ||
|
||
int main() { | ||
// DEFAULT: Launching kernel {{.+_main_.+}} with 4 blocks and 32 threads in SPMD mode | ||
// EIGHT: Launching kernel {{.+_main_.+}} with 16 blocks and 8 threads in SPMD mode | ||
#pragma omp target teams distribute parallel for simd | ||
for (int i = 0; i < N; ++i) { | ||
optnone(); | ||
} | ||
// DEFAULT: Launching kernel {{.+_main_.+}} with 4 blocks and 32 threads in SPMD mode | ||
// EIGHT: Launching kernel {{.+_main_.+}} with 16 blocks and 8 threads in SPMD mode | ||
#pragma omp target teams distribute parallel for simd | ||
for (int i = 0; i < N - 1; ++i) { | ||
optnone(); | ||
} | ||
// DEFAULT: Launching kernel {{.+_main_.+}} with 5 blocks and 32 threads in SPMD mode | ||
// EIGHT: Launching kernel {{.+_main_.+}} with 17 blocks and 8 threads in SPMD mode | ||
#pragma omp target teams distribute parallel for simd | ||
for (int i = 0; i < N + 1; ++i) { | ||
optnone(); | ||
} | ||
// DEFAULT: Launching kernel {{.+_main_.+}} with 32 blocks and 4 threads in SPMD mode | ||
// EIGHT: Launching kernel {{.+_main_.+}} with 32 blocks and 4 threads in SPMD mode | ||
#pragma omp target teams distribute parallel for simd thread_limit(4) | ||
for (int i = 0; i < N; ++i) { | ||
optnone(); | ||
} | ||
} | ||
|