-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Flang][OpenMP][MLIR] Initial array section mapping MLIR -> LLVM-IR l…
…owering utilising omp.bounds (#68689) This patch seeks to add initial lowering of OpenMP array sections within target region map clauses from MLIR to LLVM IR. This patch seeks to support fixed sized contiguous (don't think OpenMP supports anything other than contiguous sections from my reading but i could be wrong) arrays initially, before looking toward assumed size and shaped arrays. The patch also currently does not include stride, it's left for future work. Although, assumed size works in some fashion (dummy arguments) with some minor alterations to the OMPEarlyOutliner, so it is possible changes made in the IsolatedFromAbove series may allow this to work with no further required patches. It utilises the generated omp.bounds to calculate the size of the mapped OpenMP array (both for sectioned and un-sectioned arrays) as well as the offset to be passed to the kernel argument structure. Alongside these changes some refactoring of how map data is handled is attempted, using a new MapData structure to keep track of information utilised in the lowering of mapped values. The initial addition of a more complex createDeviceArgumentAccessor that utilises capture kinds similarly to (and loosely based on) Clang to generate different kernel argument accesses is also added. A similar function for altering how the kernel argument is passed to the kernel argument structure on the host is also utilised (createAlteredByCaptureMap), which allows modification of the pointer/basePointer based on their capture (and bounds information). It's of note ByRef, is the default for explicit mappings and ByCopy will be the default for implicit captures, so the former is currently tested in this patch and the latter is not for the moment.
- Loading branch information
Showing
10 changed files
with
710 additions
and
165 deletions.
There are no files selected for viewing
535 changes: 389 additions & 146 deletions
535
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Large diffs are not rendered by default.
Oops, something went wrong.
56 changes: 56 additions & 0 deletions
56
mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s | ||
|
||
// This test checks the offload sizes provided to the OpenMP kernel argument | ||
// structure are correct when lowering to LLVM-IR from MLIR with 3-D bounds | ||
// provided for a 3-D array. One with full default size, and the other with | ||
// a user specified OpenMP array sectioning. We expect the default sized | ||
// array bounds to lower to the full size of the array and the sectioned | ||
// array to be the size of 3*3*1*element-byte-size (36 bytes in this case). | ||
|
||
module attributes {omp.is_target_device = false} { | ||
llvm.func @_3d_target_array_section() { | ||
%0 = llvm.mlir.addressof @_QFEinarray : !llvm.ptr | ||
%1 = llvm.mlir.addressof @_QFEoutarray : !llvm.ptr | ||
%2 = llvm.mlir.constant(1 : index) : i64 | ||
%3 = llvm.mlir.constant(0 : index) : i64 | ||
%4 = llvm.mlir.constant(2 : index) : i64 | ||
%5 = omp.bounds lower_bound(%3 : i64) upper_bound(%4 : i64) stride(%2 : i64) start_idx(%2 : i64) | ||
%6 = omp.bounds lower_bound(%2 : i64) upper_bound(%2 : i64) stride(%2 : i64) start_idx(%2 : i64) | ||
%7 = omp.map_info var_ptr(%0 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %6) -> !llvm.ptr {name = "inarray(1:3,1:3,2:2)"} | ||
%8 = omp.map_info var_ptr(%1 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %5) -> !llvm.ptr {name = "outarray(1:3,1:3,1:3)"} | ||
omp.target map_entries(%7, %8 : !llvm.ptr, !llvm.ptr) { | ||
%9 = llvm.mlir.constant(0 : i64) : i64 | ||
%10 = llvm.mlir.constant(1 : i64) : i64 | ||
%11 = llvm.getelementptr %0[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> | ||
%12 = llvm.load %11 : !llvm.ptr -> i32 | ||
%13 = llvm.getelementptr %1[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> | ||
llvm.store %12, %13 : i32, !llvm.ptr | ||
omp.terminator | ||
} | ||
llvm.return | ||
} | ||
llvm.mlir.global internal @_QFEinarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> { | ||
%0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>> | ||
llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>> | ||
} | ||
llvm.mlir.global internal @_QFEoutarray() {addr_space = 0 : i32} : !llvm.array<3 x array<3 x array<3 x i32>>> { | ||
%0 = llvm.mlir.zero : !llvm.array<3 x array<3 x array<3 x i32>>> | ||
llvm.return %0 : !llvm.array<3 x array<3 x array<3 x i32>>> | ||
} | ||
} | ||
|
||
// CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 36, i64 108] | ||
// CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 35, i64 35] | ||
// CHECKL: @.offload_mapnames = private constant [2 x ptr] [ptr @0, ptr @1] | ||
|
||
// CHECK: define void @_3d_target_array_section() | ||
|
||
// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 | ||
// CHECK: store ptr @_QFEinarray, ptr %[[OFFLOADBASEPTRS]], align 8 | ||
// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0 | ||
// CHECK: store ptr getelementptr inbounds ([3 x [3 x [3 x i32]]], ptr @_QFEinarray, i64 0, i64 1, i64 0, i64 0), ptr %[[OFFLOADPTRS]], align 8 | ||
|
||
// CHECK: %[[OFFLOADBASEPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 | ||
// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADBASEPTRS2]], align 8 | ||
// CHECK: %[[OFFLOADPTRS2:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1 | ||
// CHECK: store ptr @_QFEoutarray, ptr %[[OFFLOADPTRS2]], align 8 |
41 changes: 41 additions & 0 deletions
41
mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s | ||
|
||
module attributes {omp.is_target_device = true} { | ||
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} { | ||
%0 = llvm.mlir.addressof @_QFEi : !llvm.ptr | ||
%1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr | ||
%2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} | ||
%3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} | ||
omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) { | ||
%4 = llvm.load %0 : !llvm.ptr -> i32 | ||
llvm.store %4, %1 : i32, !llvm.ptr | ||
omp.terminator | ||
} | ||
llvm.return | ||
} | ||
llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 { | ||
%0 = llvm.mlir.constant(1 : i32) : i32 | ||
llvm.return %0 : i32 | ||
} | ||
llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 { | ||
%0 = llvm.mlir.constant(0 : i32) : i32 | ||
llvm.return %0 : i32 | ||
} | ||
} | ||
|
||
// CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}__QQmain_l{{.*}}(ptr %[[ARG_BYREF:.*]], ptr %[[ARG_BYCOPY:.*]]) { | ||
|
||
// CHECK: entry: | ||
// CHECK: %[[ALLOCA_BYREF:.*]] = alloca ptr, align 8 | ||
// CHECK: store ptr %[[ARG_BYREF]], ptr %[[ALLOCA_BYREF]], align 8 | ||
// CHECK: %[[ALLOCA_BYCOPY:.*]] = alloca ptr, align 8 | ||
// CHECK: store ptr %[[ARG_BYCOPY]], ptr %[[ALLOCA_BYCOPY]], align 8 | ||
|
||
// CHECK: user_code.entry: ; preds = %entry | ||
// CHECK: %[[LOAD_BYREF:.*]] = load ptr, ptr %[[ALLOCA_BYREF]], align 8 | ||
// CHECK: br label %omp.target | ||
|
||
// CHECK: omp.target: ; preds = %user_code.entry | ||
// CHECK: %[[VAL_LOAD_BYCOPY:.*]] = load i32, ptr %[[ALLOCA_BYCOPY]], align 4 | ||
// CHECK: store i32 %[[VAL_LOAD_BYCOPY]], ptr %[[LOAD_BYREF]], align 4 | ||
// CHECK: br label %omp.region.cont |
42 changes: 42 additions & 0 deletions
42
mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s | ||
|
||
module attributes {omp.is_target_device = false} { | ||
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} { | ||
%0 = llvm.mlir.addressof @_QFEi : !llvm.ptr | ||
%1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr | ||
%2 = omp.map_info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} | ||
%3 = omp.map_info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} | ||
omp.target map_entries(%2, %3 : !llvm.ptr, !llvm.ptr) { | ||
%4 = llvm.load %0 : !llvm.ptr -> i32 | ||
llvm.store %4, %1 : i32, !llvm.ptr | ||
omp.terminator | ||
} | ||
llvm.return | ||
} | ||
llvm.mlir.global internal @_QFEi() {addr_space = 0 : i32} : i32 { | ||
%0 = llvm.mlir.constant(1 : i32) : i32 | ||
llvm.return %0 : i32 | ||
} | ||
llvm.mlir.global internal @_QFEsp() {addr_space = 0 : i32} : i32 { | ||
%0 = llvm.mlir.constant(0 : i32) : i32 | ||
llvm.return %0 : i32 | ||
} | ||
} | ||
|
||
// CHECK: define void @_QQmain() { | ||
// CHECK: %[[BYCOPY_ALLOCA:.*]] = alloca ptr, align 8 | ||
|
||
// CHECK: entry: ; preds = %0 | ||
// CHECK: %[[LOAD_VAL:.*]] = load i32, ptr @_QFEi, align 4 | ||
// CHECK: store i32 %[[LOAD_VAL]], ptr %[[BYCOPY_ALLOCA]], align 4 | ||
// CHECK: %[[BYCOPY_LOAD:.*]] = load ptr, ptr %[[BYCOPY_ALLOCA]], align 8 | ||
|
||
// CHECK: %[[BASEPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 | ||
// CHECK: store ptr @_QFEsp, ptr %[[BASEPTR_BYREF]], align 8 | ||
// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 0 | ||
// CHECK: store ptr @_QFEsp, ptr %[[OFFLOADPTR_BYREF]], align 8 | ||
|
||
// CHECK: %[[BASEPTR_BYCOPY:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 | ||
// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[BASEPTR_BYCOPY]], align 8 | ||
// CHECK: %[[OFFLOADPTR_BYREF:.*]] = getelementptr inbounds [2 x ptr], ptr %.offload_ptrs, i32 0, i32 1 | ||
// CHECK: store ptr %[[BYCOPY_LOAD]], ptr %[[OFFLOADPTR_BYREF]], align 8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
openmp/libomptarget/test/offloading/fortran/basic-target-region-1D-array-section.f90
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
! Basic offloading test of arrays with provided lower | ||
! and upper bounds as specified by OpenMP's sectioning | ||
! REQUIRES: flang, amdgcn-amd-amdhsa | ||
! UNSUPPORTED: nvptx64-nvidia-cuda | ||
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO | ||
! UNSUPPORTED: aarch64-unknown-linux-gnu | ||
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO | ||
! UNSUPPORTED: x86_64-pc-linux-gnu | ||
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO | ||
|
||
! RUN: %libomptarget-compile-fortran-run-and-check-generic | ||
program main | ||
implicit none | ||
integer :: write_arr(10) = (/0,0,0,0,0,0,0,0,0,0/) | ||
integer :: read_arr(10) = (/1,2,3,4,5,6,7,8,9,10/) | ||
integer :: i = 2 | ||
|
||
!$omp target map(to:read_arr(2:5)) map(from:write_arr(2:5)) map(tofrom:i) | ||
do i = 2, 5 | ||
write_arr(i) = read_arr(i) | ||
end do | ||
!$omp end target | ||
|
||
print *, write_arr(:) | ||
end program | ||
|
||
! CHECK: 0 2 3 4 5 0 0 0 0 0 |
39 changes: 39 additions & 0 deletions
39
openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array-section.f90
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
! Basic offloading test of a regular array explicitly | ||
! passed within a target region | ||
! REQUIRES: flang, amdgcn-amd-amdhsa | ||
! UNSUPPORTED: nvptx64-nvidia-cuda | ||
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO | ||
! UNSUPPORTED: aarch64-unknown-linux-gnu | ||
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO | ||
! UNSUPPORTED: x86_64-pc-linux-gnu | ||
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO | ||
|
||
! RUN: %libomptarget-compile-fortran-run-and-check-generic | ||
program main | ||
implicit none | ||
integer :: inArray(3,3,3) | ||
integer :: outArray(3,3,3) | ||
integer :: i, j, k | ||
|
||
do i = 1, 3 | ||
do j = 1, 3 | ||
do k = 1, 3 | ||
inArray(i, j, k) = 42 | ||
outArray(i, j, k) = 0 | ||
end do | ||
end do | ||
end do | ||
|
||
!$omp target map(tofrom:inArray(1:3, 1:3, 2:2), outArray(1:3, 1:3, 1:3), j, k) | ||
do j = 1, 3 | ||
do k = 1, 3 | ||
outArray(k, j, 2) = inArray(k, j, 2) | ||
end do | ||
end do | ||
!$omp end target | ||
|
||
print *, outArray | ||
|
||
end program | ||
|
||
! CHECK: 0 0 0 0 0 0 0 0 0 42 42 42 42 42 42 42 42 42 0 0 0 0 0 0 0 0 0 |
45 changes: 45 additions & 0 deletions
45
openmp/libomptarget/test/offloading/fortran/basic-target-region-3D-array.f90
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
! Basic offloading test of a regular array explicitly | ||
! passed within a target region | ||
! REQUIRES: flang, amdgcn-amd-amdhsa | ||
! UNSUPPORTED: nvptx64-nvidia-cuda | ||
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO | ||
! UNSUPPORTED: aarch64-unknown-linux-gnu | ||
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO | ||
! UNSUPPORTED: x86_64-pc-linux-gnu | ||
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO | ||
|
||
! RUN: %libomptarget-compile-fortran-run-and-check-generic | ||
program main | ||
implicit none | ||
integer :: x(2,2,2) | ||
integer :: i = 1, j = 1, k = 1 | ||
integer :: counter = 1 | ||
do i = 1, 2 | ||
do j = 1, 2 | ||
do k = 1, 2 | ||
x(i, j, k) = 0 | ||
end do | ||
end do | ||
end do | ||
|
||
!$omp target map(tofrom:x, i, j, k, counter) | ||
do i = 1, 2 | ||
do j = 1, 2 | ||
do k = 1, 2 | ||
x(i, j, k) = counter | ||
counter = counter + 1 | ||
end do | ||
end do | ||
end do | ||
!$omp end target | ||
|
||
do i = 1, 2 | ||
do j = 1, 2 | ||
do k = 1, 2 | ||
print *, x(i, j, k) | ||
end do | ||
end do | ||
end do | ||
end program main | ||
|
||
! CHECK: 1 2 3 4 5 6 7 8 |
27 changes: 27 additions & 0 deletions
27
openmp/libomptarget/test/offloading/fortran/basic-target-region-array.f90
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
! Basic offloading test of a regular array explicitly | ||
! passed within a target region | ||
! REQUIRES: flang, amdgcn-amd-amdhsa | ||
! UNSUPPORTED: nvptx64-nvidia-cuda | ||
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO | ||
! UNSUPPORTED: aarch64-unknown-linux-gnu | ||
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO | ||
! UNSUPPORTED: x86_64-pc-linux-gnu | ||
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO | ||
|
||
! RUN: %libomptarget-compile-fortran-run-and-check-generic | ||
program main | ||
integer :: x(10) = (/0,0,0,0,0,0,0,0,0,0/) | ||
integer :: i = 1 | ||
integer :: j = 11 | ||
|
||
!$omp target map(tofrom:x, i, j) | ||
do while (i <= j) | ||
x(i) = i; | ||
i = i + 1 | ||
end do | ||
!$omp end target | ||
|
||
PRINT *, x(:) | ||
end program main | ||
|
||
! CHECK: 1 2 3 4 5 6 7 8 9 10 |