diff --git a/flang/include/flang/Optimizer/Passes/Pipelines.h b/flang/include/flang/Optimizer/Passes/Pipelines.h index fd8c43cc88a19..4d4d30e69cdd7 100644 --- a/flang/include/flang/Optimizer/Passes/Pipelines.h +++ b/flang/include/flang/Optimizer/Passes/Pipelines.h @@ -22,6 +22,7 @@ #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp index 98f947a1f635d..6c9e0648fede8 100644 --- a/flang/lib/Optimizer/Passes/Pipelines.cpp +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -411,6 +411,13 @@ void createMLIRToLLVMPassPipeline(mlir::PassManager &pm, // Add codegen pass pipeline. fir::createDefaultFIRCodeGenPassPipeline(pm, config, inputFilename); + + // Run a pass to prepare for translation of delayed privatization in the + // context of deferred target tasks. + addNestedPassConditionally( + pm, disableFirToLlvmIr, [&]() { + return mlir::LLVM::createPrepareForOMPOffloadPrivatizationPass(); + }); } } // namespace fir diff --git a/flang/test/Driver/tco-emit-final-mlir.fir b/flang/test/Driver/tco-emit-final-mlir.fir index 75f8f153127af..177810cf41378 100644 --- a/flang/test/Driver/tco-emit-final-mlir.fir +++ b/flang/test/Driver/tco-emit-final-mlir.fir @@ -13,7 +13,7 @@ // CHECK: llvm.return // CHECK-NOT: func.func -func.func @_QPfoo() { +func.func @_QPfoo() -> !fir.ref { %1 = fir.alloca i32 - return + return %1 : !fir.ref } diff --git a/flang/test/Driver/tco-test-gen.fir b/flang/test/Driver/tco-test-gen.fir index 38d4e50ecf3aa..15483f7ee3534 100644 --- a/flang/test/Driver/tco-test-gen.fir +++ b/flang/test/Driver/tco-test-gen.fir @@ -42,11 +42,10 @@ func.func @_QPtest(%arg0: !fir.ref {fir.bindc_name = "num"}, %arg1: !fir.re // CHECK-SAME: %[[ARG2:.*]]: !llvm.ptr {fir.bindc_name = "ub", llvm.nocapture}, // CHECK-SAME: %[[ARG3:.*]]: !llvm.ptr {fir.bindc_name = "step", llvm.nocapture}) { +// CMPLX: %[[VAL_3:.*]] = llvm.mlir.constant(0 : index) : i64 +// CMPLX: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64 // CMPLX: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64 // CMPLX: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr -// CMPLX: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64 -// CMPLX: %[[VAL_3:.*]] = llvm.mlir.constant(0 : index) : i64 -// CMPLX: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64 // SIMPLE: %[[VAL_3:.*]] = llvm.mlir.constant(0 : index) : i64 // SIMPLE: %[[VAL_2:.*]] = llvm.mlir.constant(1 : index) : i64 diff --git a/flang/test/Fir/alloc-32.fir b/flang/test/Fir/alloc-32.fir index a3cbf200c24fc..f57f6ce6fcf5e 100644 --- a/flang/test/Fir/alloc-32.fir +++ b/flang/test/Fir/alloc-32.fir @@ -19,7 +19,7 @@ func.func @allocmem_scalar_nonchar() -> !fir.heap { // CHECK-LABEL: define ptr @allocmem_scalar_dynchar( // CHECK-SAME: i32 %[[len:.*]]) // CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64 -// CHECK: %[[mul2:.*]] = mul i64 1, %[[mul1]] +// CHECK: %[[mul2:.*]] = mul i64 %[[mul1]], 1 // CHECK: %[[cmp:.*]] = icmp sgt i64 %[[mul2]], 0 // CHECK: %[[sz:.*]] = select i1 %[[cmp]], i64 %[[mul2]], i64 1 // CHECK: %[[trunc:.*]] = trunc i64 %[[sz]] to i32 diff --git a/flang/test/Fir/alloc.fir b/flang/test/Fir/alloc.fir index 8da8b828c18b9..0d3ce323d0d7c 100644 --- a/flang/test/Fir/alloc.fir +++ b/flang/test/Fir/alloc.fir @@ -86,7 +86,7 @@ func.func @alloca_scalar_dynchar_kind(%l : i32) -> !fir.ref> { // CHECK-LABEL: define ptr @allocmem_scalar_dynchar( // CHECK-SAME: i32 %[[len:.*]]) // CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64 -// CHECK: %[[mul2:.*]] = mul i64 1, %[[mul1]] +// CHECK: %[[mul2:.*]] = mul i64 %[[mul1]], 1 // CHECK: %[[cmp:.*]] = icmp sgt i64 %[[mul2]], 0 // CHECK: %[[size:.*]] = select i1 %[[cmp]], i64 %[[mul2]], i64 1 // CHECK: call ptr @malloc(i64 %[[size]]) @@ -98,7 +98,7 @@ func.func @allocmem_scalar_dynchar(%l : i32) -> !fir.heap> { // CHECK-LABEL: define ptr @allocmem_scalar_dynchar_kind( // CHECK-SAME: i32 %[[len:.*]]) // CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64 -// CHECK: %[[mul2:.*]] = mul i64 2, %[[mul1]] +// CHECK: %[[mul2:.*]] = mul i64 %[[mul1]], 2 // CHECK: %[[cmp:.*]] = icmp sgt i64 %[[mul2]], 0 // CHECK: %[[size:.*]] = select i1 %[[cmp]], i64 %[[mul2]], i64 1 // CHECK: call ptr @malloc(i64 %[[size]]) @@ -185,7 +185,7 @@ func.func @alloca_dynarray_of_nonchar2(%e: index) -> !fir.ref !fir.heap !fir.ref !fir.heap !fir.heap !fir // CHECK: %[[VAL_0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 // CHECK: %[[VAL_3:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, i64 1 // CHECK: %[[VAL_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 - +func.func private @foo(%0: !fir.ref>, %1: !fir.ref>>, %2: !fir.ref>, %3: !fir.ref>>) func.func @alloca_unlimited_polymorphic_box() { %0 = fir.alloca !fir.class %1 = fir.alloca !fir.class> %2 = fir.alloca !fir.box %3 = fir.alloca !fir.box> + fir.call @foo(%0, %1, %2, %3) : (!fir.ref>, !fir.ref>>, !fir.ref>, !fir.ref>>) -> () return } // Note: allocmem of fir.box are not possible (fir::HeapType::verify does not diff --git a/flang/test/Fir/arrexp.fir b/flang/test/Fir/arrexp.fir index e8ec8ac79e0c2..2eb717228d998 100644 --- a/flang/test/Fir/arrexp.fir +++ b/flang/test/Fir/arrexp.fir @@ -143,9 +143,9 @@ func.func @f6(%arg0: !fir.box>, %arg1: f32) { %c9 = arith.constant 9 : index %c10 = arith.constant 10 : index - // CHECK: %[[EXT_GEP:.*]] = getelementptr {{.*}} %[[A]], i32 0, i32 7, i64 0, i32 1 + // CHECK: %[[EXT_GEP:.*]] = getelementptr {{.*}} %[[A]], i32 0, i32 7, i32 0, i32 1 // CHECK: %[[EXTENT:.*]] = load i64, ptr %[[EXT_GEP]] - // CHECK: %[[SIZE:.*]] = mul i64 4, %[[EXTENT]] + // CHECK: %[[SIZE:.*]] = mul i64 %[[EXTENT]], 4 // CHECK: %[[CMP:.*]] = icmp sgt i64 %[[SIZE]], 0 // CHECK: %[[SZ:.*]] = select i1 %[[CMP]], i64 %[[SIZE]], i64 1 // CHECK: %[[MALLOC:.*]] = call ptr @malloc(i64 %[[SZ]]) diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir index c9fe53bf093a1..6bad03dded24d 100644 --- a/flang/test/Fir/basic-program.fir +++ b/flang/test/Fir/basic-program.fir @@ -158,4 +158,6 @@ func.func @_QQmain() { // PASSES-NEXT: LowerNontemporalPass // PASSES-NEXT: FIRToLLVMLowering // PASSES-NEXT: ReconcileUnrealizedCasts +// PASSES-NEXT: 'llvm.func' Pipeline +// PASSES-NEXT: PrepareForOMPOffloadPrivatizationPass // PASSES-NEXT: LLVMIRLoweringPass diff --git a/flang/test/Fir/box.fir b/flang/test/Fir/box.fir index c0cf3d8375983..760fbd4792122 100644 --- a/flang/test/Fir/box.fir +++ b/flang/test/Fir/box.fir @@ -57,7 +57,7 @@ func.func @fa(%a : !fir.ref>) { // CHECK-SAME: ptr {{[^%]*}}%[[res:.*]], ptr {{[^%]*}}%[[arg0:.*]], i64 %[[arg1:.*]]) func.func @b1(%arg0 : !fir.ref>, %arg1 : index) -> !fir.box> { // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 } - // CHECK: %[[size:.*]] = mul i64 1, %[[arg1]] + // CHECK: %[[size:.*]] = mul i64 %[[arg1]], 1 // CHECK: insertvalue {{.*}} undef, i64 %[[size]], 1 // CHECK: insertvalue {{.*}} i32 20240719, 2 // CHECK: insertvalue {{.*}} ptr %[[arg0]], 0 @@ -89,7 +89,7 @@ func.func @b2(%arg0 : !fir.ref>>, %arg1 : index) -> func.func @b3(%arg0 : !fir.ref>>, %arg1 : index, %arg2 : index) -> !fir.box>> { %1 = fir.shape %arg2 : (index) -> !fir.shape<1> // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } - // CHECK: %[[size:.*]] = mul i64 1, %[[arg1]] + // CHECK: %[[size:.*]] = mul i64 %[[arg1]], 1 // CHECK: insertvalue {{.*}} i64 %[[size]], 1 // CHECK: insertvalue {{.*}} i32 20240719, 2 // CHECK: insertvalue {{.*}} i64 %[[arg2]], 7, 0, 1 @@ -108,7 +108,7 @@ func.func @b4(%arg0 : !fir.ref>>, %arg1 : index) -> %c_7 = arith.constant 7 : index %1 = fir.shape %c_7 : (index) -> !fir.shape<1> // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } - // CHECK: %[[size:.*]] = mul i64 1, %[[arg1]] + // CHECK: %[[size:.*]] = mul i64 %[[arg1]], 1 // CHECK: insertvalue {{.*}} i64 %[[size]], 1 // CHECK: insertvalue {{.*}} i32 20240719, 2 // CHECK: insertvalue {{.*}} i64 7, 7, 0, 1 diff --git a/flang/test/Fir/boxproc.fir b/flang/test/Fir/boxproc.fir index 97d9b38ed6f40..d4c36a4f5b213 100644 --- a/flang/test/Fir/boxproc.fir +++ b/flang/test/Fir/boxproc.fir @@ -82,12 +82,8 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) { // CHECK: store [1 x i8] c" ", ptr %[[VAL_18]], align 1 // CHECK: call void @llvm.init.trampoline(ptr %[[VAL_20]], ptr @_QFtest_proc_dummy_charPgen_message, ptr %[[VAL_2]]) // CHECK: %[[VAL_23:.*]] = call ptr @llvm.adjust.trampoline(ptr %[[VAL_20]]) -// CHECK: %[[VAL_25:.*]] = insertvalue { ptr, i64 } undef, ptr %[[VAL_23]], 0 -// CHECK: %[[VAL_26:.*]] = insertvalue { ptr, i64 } %[[VAL_25]], i64 10, 1 // CHECK: %[[VAL_27:.*]] = call ptr @llvm.stacksave.p0() -// CHECK: %[[VAL_28:.*]] = extractvalue { ptr, i64 } %[[VAL_26]], 0 -// CHECK: %[[VAL_29:.*]] = extractvalue { ptr, i64 } %[[VAL_26]], 1 -// CHECK: %[[VAL_30:.*]] = call { ptr, i64 } @_QPget_message(ptr %[[VAL_0]], i64 40, ptr %[[VAL_28]], i64 %[[VAL_29]]) +// CHECK: %[[VAL_30:.*]] = call { ptr, i64 } @_QPget_message(ptr %[[VAL_0]], i64 40, ptr %[[VAL_23]], i64 10) // CHECK: %[[VAL_32:.*]] = call i1 @_FortranAioOutputAscii(ptr %{{.*}}, ptr %[[VAL_0]], i64 40) // CHECK: call void @llvm.stackrestore.p0(ptr %[[VAL_27]]) @@ -115,14 +111,10 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) { // CHECK-LABEL: define { ptr, i64 } @_QPget_message(ptr // CHECK-SAME: %[[VAL_0:.*]], i64 %[[VAL_1:.*]], ptr %[[VAL_2:.*]], i64 // CHECK-SAME: %[[VAL_3:.*]]) -// CHECK: %[[VAL_4:.*]] = insertvalue { ptr, i64 } undef, ptr %[[VAL_2]], 0 -// CHECK: %[[VAL_5:.*]] = insertvalue { ptr, i64 } %[[VAL_4]], i64 %[[VAL_3]], 1 -// CHECK: %[[VAL_7:.*]] = extractvalue { ptr, i64 } %[[VAL_5]], 0 -// CHECK: %[[VAL_8:.*]] = extractvalue { ptr, i64 } %[[VAL_5]], 1 // CHECK: %[[VAL_9:.*]] = call ptr @llvm.stacksave.p0() -// CHECK: %[[VAL_10:.*]] = alloca i8, i64 %[[VAL_8]], align 1 -// CHECK: %[[VAL_12:.*]] = call { ptr, i64 } %[[VAL_7]](ptr %[[VAL_10]], i64 %[[VAL_8]]) -// CHECK: %[[VAL_13:.*]] = add i64 %[[VAL_8]], 12 +// CHECK: %[[VAL_10:.*]] = alloca i8, i64 %[[VAL_3]], align 1 +// CHECK: %[[VAL_12:.*]] = call { ptr, i64 } %[[VAL_2]](ptr %[[VAL_10]], i64 %[[VAL_3]]) +// CHECK: %[[VAL_13:.*]] = add i64 %[[VAL_3]], 12 // CHECK: %[[VAL_14:.*]] = alloca i8, i64 %[[VAL_13]], align 1 // CHECK: call void @llvm.memmove.p0.p0.i64(ptr %[[VAL_14]], ptr {{.*}}, i64 12, i1 false) // CHECK: %[[VAL_18:.*]] = phi i64 diff --git a/flang/test/Fir/embox.fir b/flang/test/Fir/embox.fir index 0f304cff2c79e..11f7457b6873c 100644 --- a/flang/test/Fir/embox.fir +++ b/flang/test/Fir/embox.fir @@ -11,7 +11,7 @@ func.func @_QPtest_callee(%arg0: !fir.box>) { func.func @_QPtest_slice() { // CHECK: %[[a1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 // CHECK: %[[a2:.*]] = alloca [20 x i32], i64 1, align 4 -// CHECK: %[[a3:.*]] = getelementptr [20 x i32], ptr %[[a2]], i64 0, i64 0 +// CHECK: %[[a3:.*]] = getelementptr [20 x i32], ptr %[[a2]], i32 0, i64 0 // CHECK: %[[a4:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } // CHECK: { ptr undef, i64 4, i32 20240719, i8 1, i8 9, i8 0, i8 0, [1 x [3 x i64]] // CHECK: [i64 1, i64 5, i64 8]] }, ptr %[[a3]], 0 @@ -38,7 +38,7 @@ func.func @_QPtest_dt_callee(%arg0: !fir.box>) { func.func @_QPtest_dt_slice() { // CHECK: %[[a1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 // CHECK: %[[a3:.*]] = alloca [20 x %_QFtest_dt_sliceTt], i64 1, align 8 -// CHECK: %[[a4:.*]] = getelementptr [20 x %_QFtest_dt_sliceTt], ptr %[[a3]], i64 0, i64 0, i32 0 +// CHECK: %[[a4:.*]] = getelementptr [20 x %_QFtest_dt_sliceTt], ptr %[[a3]], i32 0, i64 0, i32 0 // CHECK: %[[a5:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } // CHECK-SAME: { ptr undef, i64 4, i32 20240719, i8 1, i8 9, i8 0, i8 0, [1 x [3 x i64]] // CHECK-SAME: [i64 1, i64 5, i64 16 @@ -73,7 +73,7 @@ func.func @emboxSubstring(%arg0: !fir.ref>>) { %0 = fir.shape %c2, %c3 : (index, index) -> !fir.shape<2> %1 = fir.slice %c1, %c2, %c1, %c1, %c3, %c1 substr %c1_i64, %c2_i64 : (index, index, index, index, index, index, i64, i64) -> !fir.slice<2> %2 = fir.embox %arg0(%0) [%1] : (!fir.ref>>, !fir.shape<2>, !fir.slice<2>) -> !fir.box>> - // CHECK: %[[addr:.*]] = getelementptr [3 x [2 x [4 x i8]]], ptr %[[arg0]], i64 0, i64 0, i64 0, i64 1 + // CHECK: %[[addr:.*]] = getelementptr [3 x [2 x [4 x i8]]], ptr %[[arg0]], i32 0, i64 0, i64 0, i32 1 // CHECK: insertvalue {[[descriptorType:.*]]} { ptr undef, i64 2, i32 20240719, i8 2, i8 40, i8 0, i8 0 // CHECK-SAME: [2 x [3 x i64]] [{{\[}}3 x i64] [i64 1, i64 2, i64 4], [3 x i64] [i64 1, i64 3, i64 8]] } // CHECK-SAME: ptr %[[addr]], 0 diff --git a/flang/test/Fir/omp-reduction-embox-codegen.fir b/flang/test/Fir/omp-reduction-embox-codegen.fir index 1645e1a407ad4..e517b1352ff5c 100644 --- a/flang/test/Fir/omp-reduction-embox-codegen.fir +++ b/flang/test/Fir/omp-reduction-embox-codegen.fir @@ -23,14 +23,14 @@ omp.declare_reduction @test_reduction : !fir.ref> init { omp.yield(%0 : !fir.ref>) } -func.func @_QQmain() attributes {fir.bindc_name = "reduce"} { +func.func @_QQmain() -> !fir.ref> attributes {fir.bindc_name = "reduce"} { %4 = fir.alloca !fir.box omp.parallel reduction(byref @test_reduction %4 -> %arg0 : !fir.ref>) { omp.terminator } - return + return %4: !fir.ref> } // basically we are testing that there isn't a crash -// CHECK-LABEL: define void @_QQmain +// CHECK-LABEL: define ptr @_QQmain // CHECK-NEXT: alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8 diff --git a/flang/test/Fir/omp_target_allocmem_freemem.fir b/flang/test/Fir/omp_target_allocmem_freemem.fir index 03eb94acb1ac7..aa7b2dce07153 100644 --- a/flang/test/Fir/omp_target_allocmem_freemem.fir +++ b/flang/test/Fir/omp_target_allocmem_freemem.fir @@ -62,7 +62,7 @@ func.func @omp_target_allocmem_scalar_char_kind() -> () { // CHECK-LABEL: define void @omp_target_allocmem_scalar_dynchar( // CHECK-SAME: i32 [[TMP0:%.*]]) { // CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP3:%.*]] = mul i64 1, [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 1 // CHECK-NEXT: [[TMP4:%.*]] = mul i64 1, [[TMP3]] // CHECK-NEXT: [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0) // CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 @@ -80,7 +80,7 @@ func.func @omp_target_allocmem_scalar_dynchar(%l : i32) -> () { // CHECK-LABEL: define void @omp_target_allocmem_scalar_dynchar_kind( // CHECK-SAME: i32 [[TMP0:%.*]]) { // CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP3:%.*]] = mul i64 2, [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 // CHECK-NEXT: [[TMP4:%.*]] = mul i64 1, [[TMP3]] // CHECK-NEXT: [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0) // CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 @@ -141,7 +141,7 @@ func.func @omp_target_allocmem_array_of_dynchar(%l: i32) -> () { // CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_nonchar( // CHECK-SAME: i64 [[TMP0:%.*]]) { -// CHECK-NEXT: [[TMP2:%.*]] = mul i64 12, [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 12 // CHECK-NEXT: [[TMP3:%.*]] = mul i64 1, [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call ptr @omp_target_alloc(i64 [[TMP3]], i32 0) // CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 @@ -157,7 +157,7 @@ func.func @omp_target_allocmem_dynarray_of_nonchar(%e: index) -> () { // CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_nonchar2( // CHECK-SAME: i64 [[TMP0:%.*]]) { -// CHECK-NEXT: [[TMP2:%.*]] = mul i64 4, [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 4 // CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP0]] // CHECK-NEXT: [[TMP4:%.*]] = mul i64 1, [[TMP3]] // CHECK-NEXT: [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0) @@ -174,7 +174,7 @@ func.func @omp_target_allocmem_dynarray_of_nonchar2(%e: index) -> () { // CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_char( // CHECK-SAME: i64 [[TMP0:%.*]]) { -// CHECK-NEXT: [[TMP2:%.*]] = mul i64 60, [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 60 // CHECK-NEXT: [[TMP3:%.*]] = mul i64 1, [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = call ptr @omp_target_alloc(i64 [[TMP3]], i32 0) // CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 @@ -191,7 +191,7 @@ func.func @omp_target_allocmem_dynarray_of_char(%e : index) -> () { // CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_char2( // CHECK-SAME: i64 [[TMP0:%.*]]) { -// CHECK-NEXT: [[TMP2:%.*]] = mul i64 20, [[TMP0]] +// CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 20 // CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP0]] // CHECK-NEXT: [[TMP4:%.*]] = mul i64 1, [[TMP3]] // CHECK-NEXT: [[TMP5:%.*]] = call ptr @omp_target_alloc(i64 [[TMP4]], i32 0) @@ -227,7 +227,7 @@ func.func @omp_target_allocmem_dynarray_of_dynchar(%l: i32, %e : index) -> () { // CHECK-LABEL: define void @omp_target_allocmem_dynarray_of_dynchar2( // CHECK-SAME: i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) { // CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP4:%.*]] = mul i64 2, [[TMP3]] +// CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 // CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP1]] // CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP1]] // CHECK-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]] diff --git a/flang/test/Fir/optional.fir b/flang/test/Fir/optional.fir index bded8b5332a30..66ff69f083467 100644 --- a/flang/test/Fir/optional.fir +++ b/flang/test/Fir/optional.fir @@ -37,8 +37,7 @@ func.func @bar2() -> i1 { // CHECK-LABEL: @foo3 func.func @foo3(%arg0: !fir.boxchar<1>) -> i1 { - // CHECK: %[[extract:.*]] = extractvalue { ptr, i64 } %{{.*}}, 0 - // CHECK: %[[ptr:.*]] = ptrtoint ptr %[[extract]] to i64 + // CHECK: %[[ptr:.*]] = ptrtoint ptr %0 to i64 // CHECK: icmp ne i64 %[[ptr]], 0 %0 = fir.is_present %arg0 : (!fir.boxchar<1>) -> i1 return %0 : i1 diff --git a/flang/test/Fir/pdt.fir b/flang/test/Fir/pdt.fir index a200cd7e7cc03..411927aae6bdf 100644 --- a/flang/test/Fir/pdt.fir +++ b/flang/test/Fir/pdt.fir @@ -96,13 +96,13 @@ func.func @_QTt1P.f2.offset(%0 : i32, %1 : i32) -> i32 { func.func private @bar(!fir.ref>) -// CHECK-LABEL: define void @_QPfoo(i32 %0, i32 %1) -func.func @_QPfoo(%arg0 : i32, %arg1 : i32) { +// CHECK-LABEL: define ptr @_QPfoo(i32 %0, i32 %1) +func.func @_QPfoo(%arg0 : i32, %arg1 : i32) -> !fir.ref> { // CHECK: %[[size:.*]] = call i64 @_QTt1P.mem.size(i32 %0, i32 %1) // CHECK: %[[alloc:.*]] = alloca i8, i64 %[[size]] %0 = fir.alloca !fir.type<_QTt1(p1:i32,p2:i32){f1:!fir.char<1,?>,f2:!fir.char<1,?>}>(%arg0, %arg1 : i32, i32) //%2 = fir.coordinate_of %0, f2 : (!fir.ref>) -> !fir.ref> %2 = fir.zero_bits !fir.ref> fir.call @bar(%2) : (!fir.ref>) -> () - return + return %0 : !fir.ref> } diff --git a/flang/test/Fir/rebox.fir b/flang/test/Fir/rebox.fir index 0c9f6d9bb94ad..d858adfb7c45d 100644 --- a/flang/test/Fir/rebox.fir +++ b/flang/test/Fir/rebox.fir @@ -36,7 +36,7 @@ func.func @test_rebox_1(%arg0: !fir.box>) { // CHECK: %[[VOIDBASE0:.*]] = getelementptr i8, ptr %[[INBASE]], i64 %[[OFFSET_0]] // CHECK: %[[OFFSET_1:.*]] = mul i64 2, %[[INSTRIDE_1]] // CHECK: %[[VOIDBASE1:.*]] = getelementptr i8, ptr %[[VOIDBASE0]], i64 %[[OFFSET_1]] - // CHECK: %[[OUTSTRIDE0:.*]] = mul i64 3, %[[INSTRIDE_1]] + // CHECK: %[[OUTSTRIDE0:.*]] = mul i64 %[[INSTRIDE_1]], 3 // CHECK: %[[OUTBOX1:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %{{.*}}, i64 %[[OUTSTRIDE0]], 7, 0, 2 // CHECK: %[[OUTBOX2:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[OUTBOX1]], ptr %[[VOIDBASE1]], 0 // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[OUTBOX2]], ptr %[[OUTBOX_ALLOC]], align 8 @@ -63,7 +63,7 @@ func.func @test_rebox_2(%arg0: !fir.box>>) { // CHECK: %[[OUTBOX:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] } // CHECK: %[[LEN_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] }, ptr %[[INBOX]], i32 0, i32 1 // CHECK: %[[LEN:.*]] = load i64, ptr %[[LEN_GEP]] - // CHECK: %[[SIZE:.*]] = mul i64 1, %[[LEN]] + // CHECK: %[[SIZE:.*]] = mul i64 %[[LEN]], 1 // CHECK: insertvalue { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] } undef, i64 %[[SIZE]], 1 %1 = fir.rebox %arg0 [%0] : (!fir.box>>, !fir.slice<2>) -> !fir.box>> @@ -94,8 +94,8 @@ func.func @test_rebox_3(%arg0: !fir.box>) { // CHECK: %[[INSTRIDE:.*]] = load i64, ptr %[[INSTRIDE_GEP]] // CHECK: %[[INBASE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[INBOX]], i32 0, i32 0 // CHECK: %[[INBASE:.*]] = load ptr, ptr %[[INBASE_GEP]] - // CHECK: %[[OUTSTRIDE1:.*]] = mul i64 3, %[[INSTRIDE]] - // CHECK: %[[OUTSTRIDE2:.*]] = mul i64 4, %[[OUTSTRIDE1]] + // CHECK: %[[OUTSTRIDE1:.*]] = mul i64 %[[INSTRIDE]], 3 + // CHECK: %[[OUTSTRIDE2:.*]] = mul i64 %[[OUTSTRIDE1]], 4 // CHECK: %[[OUTBOX0:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [3 x [3 x i64]] } %{{.*}}, i64 %[[INSTRIDE]], 7, 0, 2 // CHECK: %[[OUTBOX1:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [3 x [3 x i64]] } %[[OUTBOX0]], i64 3, 7, 1, 0 // CHECK: %[[OUTBOX2:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [3 x [3 x i64]] } %[[OUTBOX1]], i64 4, 7, 1, 1 @@ -153,13 +153,13 @@ func.func @test_cmplx_1(%arg0: !fir.box>>) { %0:3 = fir.box_dims %arg0, %c0 : (!fir.box>>, index) -> (index, index, index) %1 = fir.slice %c1, %0#1, %c1 path %c1_i32 : (index, index, index, i32) -> !fir.slice<1> %2 = fir.rebox %arg0 [%1] : (!fir.box>>, !fir.slice<1>) -> !fir.box> - // CHECK: %[[INSTRIDE_0_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[INBOX]], i32 0, i32 7, i64 0, i32 1 + // CHECK: %[[INSTRIDE_0_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[INBOX]], i32 0, i32 7, i32 0, i32 1 // CHECK: %[[INSTRIDE_0:.*]] = load i64, ptr %[[INSTRIDE_0_GEP]] // CHECK: %[[INSTRIDE_1_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[INBOX]], i32 0, i32 7, i32 0, i32 2 // CHECK: %[[INSTRIDE_1:.*]] = load i64, ptr %[[INSTRIDE_1_GEP]] // CHECK: %[[FRONT_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[INBOX]], i32 0, i32 0 // CHECK: %[[FRONT_PTR:.*]] = load ptr, ptr %[[FRONT_GEP]] - // CHECK: %[[FIELD_OFFSET_GEP:.*]] = getelementptr { float, float }, ptr %[[FRONT_PTR]], i64 0, i32 0 + // CHECK: %[[FIELD_OFFSET_GEP:.*]] = getelementptr { float, float }, ptr %[[FRONT_PTR]], i32 0, i32 0 // CHECK: %[[FRONT_OFFSET:.*]] = mul i64 0, %[[INSTRIDE_1]] // CHECK: %[[OFFSET_GEP:.*]] = getelementptr i8, ptr %[[FIELD_OFFSET_GEP]], i64 %[[FRONT_OFFSET]] // CHECK: %[[SUB_1:.*]] = sub i64 %[[INSTRIDE_0]], 1 @@ -167,7 +167,7 @@ func.func @test_cmplx_1(%arg0: !fir.box>>) { // CHECK: %[[DIV_1:.*]] = sdiv i64 %[[ADD_1]], 1 // CHECK: %[[CHECK_NONZERO:.*]] = icmp sgt i64 %[[DIV_1]], 0 // CHECK: %[[CHECKED_BOUND:.*]] = select i1 %[[CHECK_NONZERO]], i64 %[[DIV_1]], i64 0 - // CHECK: %[[STRIDE:.*]] = mul i64 1, %[[INSTRIDE_1]] + // CHECK: %[[STRIDE:.*]] = mul i64 %[[INSTRIDE_1]], 1 // CHECK: %[[VAL_BUILD_1:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %{{.*}}, i64 %[[CHECKED_BOUND]], 7, 0, 1 // CHECK: %[[VAL_BUILD_2:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[VAL_BUILD_1]], i64 %[[STRIDE]], 7, 0, 2 // CHECK: %[[VAL_BUILD_3:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[VAL_BUILD_2]], ptr %[[OFFSET_GEP]], 0 @@ -198,10 +198,10 @@ func.func @test_cmplx_2(%arg0: !fir.box>>) { // CHECK: %[[INSTRIDE_0:.*]] = load i64, ptr %[[INSTRIDE_0_GEP]] // CHECK: %[[FRONT_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[INBOX]], i32 0, i32 0 // CHECK: %[[FRONT_PTR:.*]] = load ptr, ptr %[[FRONT_GEP]] - // CHECK: %[[FIELD_OFFSET_GEP:.*]] = getelementptr { float, float }, ptr %[[FRONT_PTR]], i64 0, i32 1 + // CHECK: %[[FIELD_OFFSET_GEP:.*]] = getelementptr { float, float }, ptr %[[FRONT_PTR]], i32 0, i32 1 // CHECK: %[[FRONT_OFFSET:.*]] = mul i64 6, %[[INSTRIDE_0]] // CHECK: %[[OFFSET_GEP:.*]] = getelementptr i8, ptr %[[FIELD_OFFSET_GEP]], i64 %[[FRONT_OFFSET]] - // CHECK: %[[STRIDE:.*]] = mul i64 5, %[[INSTRIDE_0]] + // CHECK: %[[STRIDE:.*]] = mul i64 %[[INSTRIDE_0]], 5 // CHECK: %[[VAL_BUILD_1:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %{{.*}}, i64 %[[STRIDE]], 7, 0, 2 // CHECK: %[[VAL_BUILD_2:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[VAL_BUILD_1]], ptr %[[OFFSET_GEP]], 0 // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[VAL_BUILD_2]], ptr %[[OUTBOX_ALLOC]] diff --git a/flang/test/Fir/select.fir b/flang/test/Fir/select.fir index 5e88048446407..6d843e824d33f 100644 --- a/flang/test/Fir/select.fir +++ b/flang/test/Fir/select.fir @@ -64,6 +64,6 @@ func.func @h(%a : i32) -> i32 { return %1 : i32 ^bb6: %x = arith.addi %b4, %b3 : i32 - // CHECK: ret i32 + // CHECK-DAG: ret i32 return %x : i32 } diff --git a/flang/test/Fir/target.fir b/flang/test/Fir/target.fir index b04e23a018e7e..1e721a09c835e 100644 --- a/flang/test/Fir/target.fir +++ b/flang/test/Fir/target.fir @@ -97,10 +97,6 @@ func.func @call8() { // X64-LABEL: define i64 @char1lensum(ptr {{[^%]*}}%0, ptr {{[^%]*}}%1, i64 %2, i64 %3) // PPC-LABEL: define i64 @char1lensum(ptr {{[^%]*}}%0, ptr {{[^%]*}}%1, i64 %2, i64 %3) func.func @char1lensum(%arg0 : !fir.boxchar<1>, %arg1 : !fir.boxchar<1>) -> i64 { - // X64-DAG: %[[p0:.*]] = insertvalue { ptr, i64 } undef, ptr %1, 0 - // X64-DAG: = insertvalue { ptr, i64 } %[[p0]], i64 %3, 1 - // X64-DAG: %[[p1:.*]] = insertvalue { ptr, i64 } undef, ptr %0, 0 - // X64-DAG: = insertvalue { ptr, i64 } %[[p1]], i64 %2, 1 %1:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref>, i64) %2:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, i64) // I32: %[[add:.*]] = add i64 % diff --git a/flang/test/Fir/tbaa-codegen2.fir b/flang/test/Fir/tbaa-codegen2.fir index 4907aa03ec5a5..072c8bbe4e80c 100644 --- a/flang/test/Fir/tbaa-codegen2.fir +++ b/flang/test/Fir/tbaa-codegen2.fir @@ -62,9 +62,9 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ // CHECK-LABEL: define void @_QPfunc( // CHECK-SAME: ptr {{[^%]*}}%[[ARG0:.*]]){{.*}}{ // [...] -// CHECK: %[[VAL5:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG0]], i32 0, i32 7, i32 0, i32 0 +// CHECK: %[[VAL5:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG0]], i32 0, i32 6 // box access: -// CHECK: %[[VAL6:.*]] = load i64, ptr %[[VAL5]], align 4, !tbaa ![[BOX_ACCESS_TAG:.*]] +// CHECK: %[[VAL6:.*]] = load i8, ptr %[[VAL5]], align 1, !tbaa ![[BOX_ACCESS_TAG:.*]] // CHECK: %[[VAL7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %0, i32 0, i32 7, i32 0, i32 1 // box access: // CHECK: %[[VAL8:.*]] = load i64, ptr %[[VAL7]], align 4, !tbaa ![[BOX_ACCESS_TAG]] @@ -76,15 +76,9 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ // CHECK: %[[VAL12:.*]] = load ptr, ptr %[[VAL11]], align 8, !tbaa ![[BOX_ACCESS_TAG]] // CHECK: %[[VAL15:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %{{.*}}, ptr %[[VAL12]], 0 // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[VAL15]], ptr %{{.*}}, align 8, !tbaa ![[BOX_ACCESS_TAG]] -// CHECK: %[[VAL16:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i64 0, i32 0 -// box access: -// CHECK: %[[VAL17:.*]] = load i64, ptr %[[VAL16]], align 4, !tbaa ![[BOX_ACCESS_TAG]] -// CHECK: %[[VAL18:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i64 0, i32 1 +// CHECK: %[[VAL18:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i32 0, i32 1 // box access: // CHECK: %[[VAL19:.*]] = load i64, ptr %[[VAL18]], align 4, !tbaa ![[BOX_ACCESS_TAG]] -// CHECK: %[[VAL20:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i64 0, i32 2 -// box access: -// CHECK: %[[VAL21:.*]] = load i64, ptr %[[VAL20]], align 4, !tbaa ![[BOX_ACCESS_TAG]] // [...] // box access: // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %{{.*}}, ptr %{{.*}}, align 8, !tbaa ![[BOX_ACCESS_TAG]] diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90 index 665be5a8db4d4..5ce36ac87ca8c 100644 --- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 +++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90 @@ -545,7 +545,7 @@ end subroutine mapType_common_block_members !CHECK: %[[ALLOCATABLE_DESC_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 !CHECK: %[[ALLOCA:.*]] = alloca %_QFmaptype_derived_type_allocaTone_layer, i64 1, align 8 !CHECK: %[[MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_derived_type_allocaTone_layer, ptr %[[ALLOCA]], i32 0, i32 4 -!CHECK: %[[DESC_BOUND_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1 +!CHECK: %[[DESC_BOUND_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_DESC_ALLOCA]], i32 0, i32 7, i32 0, i32 1 !CHECK: %[[DESC_BOUND_ACCESS_LOAD:.*]] = load i64, ptr %[[DESC_BOUND_ACCESS]], align 8 !CHECK: %[[OFFSET_UB:.*]] = sub i64 %[[DESC_BOUND_ACCESS_LOAD]], 1 !CHECK: %[[MEMBER_DESCRIPTOR_BASE_ADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[MEMBER_ACCESS]], i32 0, i32 0 @@ -596,7 +596,7 @@ end subroutine mapType_common_block_members !CHECK: %{{.*}} = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 0 !CHECK: %{{.*}} = load ptr, ptr %{{.*}}, align 8 !CHECK: %{{.*}} = getelementptr %_QFmaptype_alloca_derived_typeTone_layer, ptr %{{.*}}, i32 0, i32 4 -!CHECK: %[[ACCESS_DESC_MEMBER_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA]], i32 0, i32 7, i64 0, i32 1 +!CHECK: %[[ACCESS_DESC_MEMBER_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[DTYPE_ARRAY_MEMBER_DESC_ALLOCA]], i32 0, i32 7, i32 0, i32 1 !CHECK: %[[LOAD_DESC_MEMBER_UB:.*]] = load i64, ptr %[[ACCESS_DESC_MEMBER_UB]], align 8 !CHECK: %[[OFFSET_MEMBER_UB:.*]] = sub i64 %[[LOAD_DESC_MEMBER_UB]], 1 !CHECK: %[[DTYPE_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_2]], i32 0, i32 0 @@ -665,7 +665,7 @@ end subroutine mapType_common_block_members !CHECK: %[[ALLOCATABLE_MEMBER_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 !CHECK: %[[DTYPE_DESC_ALLOCA_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, align 8 !CHECK: %[[DTYPE_DESC_ALLOCA_3:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1, align 8 -!CHECK: %[[ALLOCATABLE_MEMBER_ALLOCA_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_MEMBER_ALLOCA]], i32 0, i32 7, i64 0, i32 1 +!CHECK: %[[ALLOCATABLE_MEMBER_ALLOCA_UB:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_MEMBER_ALLOCA]], i32 0, i32 7, i32 0, i32 1 !CHECK: %[[ALLOCATABLE_MEMBER_ALLOCA_UB_LOAD:.*]] = load i64, ptr %[[ALLOCATABLE_MEMBER_ALLOCA_UB]], align 8 !CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_1:.*]] = sub i64 %[[ALLOCATABLE_MEMBER_ALLOCA_UB_LOAD]], 1 !CHECK: %[[DTYPE_DESC_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[DTYPE_DESC_ALLOCA_2]], i32 0, i32 0 @@ -734,7 +734,7 @@ end subroutine mapType_common_block_members !CHECK: %[[ALLOCA:.*]] = alloca %_QFmaptype_nested_derived_type_allocaTtop_layer, i64 1, align 8 !CHECK: %[[NESTED_DTYPE_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_nested_derived_type_allocaTtop_layer, ptr %[[ALLOCA]], i32 0, i32 6 !CHECK: %[[NESTED_MEMBER_ACCESS:.*]] = getelementptr %_QFmaptype_nested_derived_type_allocaTmiddle_layer, ptr %[[NESTED_DTYPE_MEMBER_ACCESS]], i32 0, i32 2 -!CHECK: %[[ALLOCATABLE_MEMBER_BASE_ADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_MEMBER_ALLOCA]], i32 0, i32 7, i64 0, i32 1 +!CHECK: %[[ALLOCATABLE_MEMBER_BASE_ADDR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCATABLE_MEMBER_ALLOCA]], i32 0, i32 7, i32 0, i32 1 !CHECK: %[[ALLOCATABLE_MEMBER_ADDR_LOAD:.*]] = load i64, ptr %[[ALLOCATABLE_MEMBER_BASE_ADDR]], align 8 !CHECK: %[[ALLOCATABLE_MEMBER_SIZE_CALC_1:.*]] = sub i64 %[[ALLOCATABLE_MEMBER_ADDR_LOAD]], 1 !CHECK: %[[NESTED_MEMBER_BASE_ADDR_ACCESS:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 0 @@ -778,9 +778,9 @@ end subroutine mapType_common_block_members !CHECK: %[[ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, align 8 !CHECK: %[[BASE_PTR_1:.*]] = alloca %_QFmaptype_nested_derived_type_member_idxTdtype, i64 1, align 8 !CHECK: %[[OFF_PTR_1:.*]] = getelementptr %_QFmaptype_nested_derived_type_member_idxTdtype, ptr %[[BASE_PTR_1]], i32 0, i32 1 -!CHECK: %[[BOUNDS_ACC:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA]], i32 0, i32 7, i64 0, i32 1 +!CHECK: %[[BOUNDS_ACC:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA]], i32 0, i32 7, i32 0, i32 1 !CHECK: %[[BOUNDS_LD:.*]] = load i64, ptr %[[BOUNDS_ACC]], align 8 -!CHECK: %[[BOUNDS_ACC_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCA_1]], i32 0, i32 7, i64 0, i32 1 +!CHECK: %[[BOUNDS_ACC_2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ALLOCA_1]], i32 0, i32 7, i32 0, i32 1 !CHECK: %[[BOUNDS_LD_2:.*]] = load i64, ptr %[[BOUNDS_ACC_2]], align 8 !CHECK: %[[BOUNDS_CALC:.*]] = sub i64 %[[BOUNDS_LD_2]], 1 !CHECK: %[[OFF_PTR_CALC_0:.*]] = sub i64 %[[BOUNDS_LD]], 1 @@ -789,7 +789,7 @@ end subroutine mapType_common_block_members !CHECK: %[[LOAD_DESC_PTR:.*]] = load ptr, ptr %[[GEP_DESC_PTR]], align 8 !CHECK: %[[SZ_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA_0]], i32 0, i32 7, i32 0, i32 2 !CHECK: %[[SZ_CALC_2:.*]] = load i64, ptr %[[SZ_CALC_1]], align 8 -!CHECK: %[[SZ_CALC_3:.*]] = mul nsw i64 1, %[[SZ_CALC_2]] +!CHECK: %[[SZ_CALC_3:.*]] = mul nsw i64 %[[SZ_CALC_2]], 1 !CHECK: %[[SZ_CALC_4:.*]] = add nsw i64 %[[SZ_CALC_3]], 0 !CHECK: %[[SZ_CALC_5:.*]] = getelementptr i8, ptr %[[LOAD_DESC_PTR]], i64 %[[SZ_CALC_4]] !CHECK: %[[SZ_CALC_6:.*]] = getelementptr %_QFmaptype_nested_derived_type_member_idxTvertexes, ptr %[[SZ_CALC_5]], i32 0, i32 2 diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index e6a8c5e025123..5a28e97054359 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -606,8 +606,6 @@ program test_alloc ! LLVM-COUNT-2: call void %{{[0-9]*}}() ! LLVM: call void @llvm.memcpy.p0.p0.i32 -! LLVM: %[[GEP_TDESC_C1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 -! LLVM: %[[TDESC_C1:.*]] = load ptr, ptr %[[GEP_TDESC_C1]] ! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 ! LLVM: %[[ELEM_SIZE:.*]] = load i64, ptr %[[ELEM_SIZE_GEP]] ! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 4 @@ -620,8 +618,6 @@ program test_alloc ! LLVM: call void %{{.*}}(ptr %{{.*}}) ! LLVM: call void @llvm.memcpy.p0.p0.i32 -! LLVM: %[[GEP_TDESC_C2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 -! LLVM: %[[TDESC_C2:.*]] = load ptr, ptr %[[GEP_TDESC_C2]] ! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 ! LLVM: %[[ELEM_SIZE:.*]] = load i64, ptr %[[ELEM_SIZE_GEP]] ! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 4 diff --git a/flang/test/Lower/forall/character-1.f90 b/flang/test/Lower/forall/character-1.f90 index d1e12a8dbdfec..7a1f4b125a79f 100644 --- a/flang/test/Lower/forall/character-1.f90 +++ b/flang/test/Lower/forall/character-1.f90 @@ -23,11 +23,11 @@ end program test ! CHECK-LABEL: define internal void @_QFPsub( ! CHECK-SAME: ptr {{[^%]*}}%[[arg:.*]]) -! CHECK: %[[extent:.*]] = getelementptr { {{.*}}, [1 x [3 x i64]] }, ptr %[[arg]], i32 0, i32 7, i64 0, i32 1 +! CHECK: %[[extent:.*]] = getelementptr { {{.*}}, [1 x [3 x i64]] }, ptr %[[arg]], i32 0, i32 7, i32 0, i32 1 ! CHECK: %[[extval:.*]] = load i64, ptr %[[extent]] ! CHECK: %[[elesize:.*]] = getelementptr { {{.*}}, [1 x [3 x i64]] }, ptr %[[arg]], i32 0, i32 1 ! CHECK: %[[esval:.*]] = load i64, ptr %[[elesize]] -! CHECK: %[[mul:.*]] = mul i64 1, %[[esval]] +! CHECK: %[[mul:.*]] = mul i64 %[[esval]], 1 ! CHECK: %[[mul2:.*]] = mul i64 %[[mul]], %[[extval]] ! CHECK: %[[cmp:.*]] = icmp sgt i64 %[[mul2]], 0 ! CHECK: %[[size:.*]] = select i1 %[[cmp]], i64 %[[mul2]], i64 1 diff --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.h b/mlir/include/mlir/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.h new file mode 100644 index 0000000000000..af6dfb0057688 --- /dev/null +++ b/mlir/include/mlir/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.h @@ -0,0 +1,23 @@ +//===- OpenMPOffloadPrivatizationPrepare.h - Prepare for OpenMP Offload +// Privatization -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LLVMIR_TRANSFORMS_PREPAREFOROMPOFFLOADPRIVATIZATIONPASS_H +#define MLIR_DIALECT_LLVMIR_TRANSFORMS_PREPAREFOROMPOFFLOADPRIVATIZATIONPASS_H + +#include + +namespace mlir { +class Pass; +namespace LLVM { +#define GEN_PASS_DECL_PREPAREFOROMPOFFLOADPRIVATIZATIONPASS +#include "mlir/Dialect/LLVMIR/Transforms/Passes.h.inc" +} // namespace LLVM +} // namespace mlir + +#endif // MLIR_DIALECT_LLVMIR_TRANSFORMS_PREPAREFOROMPOFFLOADPRIVATIZATIONPASS_H diff --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td index 961909d5c8d27..1ba67caba05be 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.td @@ -73,4 +73,16 @@ def DIScopeForLLVMFuncOpPass : Pass<"ensure-debug-info-scope-on-llvm-func", "::m ]; } +def PrepareForOMPOffloadPrivatizationPass : Pass<"omp-offload-privatization-prepare", "::mlir::LLVM::LLVMFuncOp"> { + let summary = "Prepare OpenMP maps for privatization for deferred target tasks"; + let description = [{ + When generating LLVMIR for privatized variables in an OpenMP offloading directive (eg. omp::TargetOp) + that creates a deferred target task (when the nowait clause is used), we need to copy the privatized + variable out of the stack of the generating task and into the heap so that the deferred target task + can still access it. However, if such a privatized variable is also mapped, typically the case for + allocatables, then the corresponding `omp::MapInfoOp` needs to be fixed up to map the new heap-allocated + variable and not the original variable. + }]; + let dependentDialects = ["LLVM::LLVMDialect", "mlir::omp::OpenMPDialect"]; +} #endif // MLIR_DIALECT_LLVMIR_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 2548a8ab4aac6..efa43107da068 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -1479,8 +1479,8 @@ def TargetOp : OpenMP_Op<"target", traits = [ `map` operands. For `private` operands that require a map, the value of the corresponding element in the attribute is the index of the `map` operand (relative to other `map` operands not the whole operands of the operation). For - `private` opernads that do not require a map, this value is -1 (which is omitted - from the assembly foramt printing). + `private` operands that do not require a map, this value is -1 (which is omitted + from the assembly format printing). }] # clausesDescription; let arguments = !con(clausesArgs, diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt index d4ff0955c5d0e..729f5191cd557 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/Transforms/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_dialect_library(MLIRLLVMIRTransforms LegalizeForExport.cpp OptimizeForNVVM.cpp RequestCWrappers.cpp + OpenMPOffloadPrivatizationPrepare.cpp DEPENDS MLIRLLVMPassIncGen @@ -18,4 +19,5 @@ add_mlir_dialect_library(MLIRLLVMIRTransforms MLIRPass MLIRTransforms MLIRNVVMDialect + MLIROpenMPDialect ) diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.cpp new file mode 100644 index 0000000000000..a2e522d5f536d --- /dev/null +++ b/mlir/lib/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.cpp @@ -0,0 +1,423 @@ +//===- OpenMPOffloadPrivatizationPrepare.cpp - Prepare for OpenMP Offload +// Privatization ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/LLVMIR/Transforms/OpenMPOffloadPrivatizationPrepare.h" +#include "mlir/Analysis/SliceAnalysis.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Dominance.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include +#include + +//===----------------------------------------------------------------------===// +// A pass that prepares OpenMP code for translation of delayed privatization +// in the context of deferred target tasks. Deferred target tasks are created +// when the nowait clause is used on the target directive. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "omp-prepare-for-offload-privatization" +#define PDBGS() (llvm::dbgs() << "[" << DEBUG_TYPE << "]: ") + +namespace mlir { +namespace LLVM { + +#define GEN_PASS_DEF_PREPAREFOROMPOFFLOADPRIVATIZATIONPASS +#include "mlir/Dialect/LLVMIR/Transforms/Passes.h.inc" + +} // namespace LLVM +} // namespace mlir + +using namespace mlir; +namespace { + +//===----------------------------------------------------------------------===// +// OMPTargetPrepareDelayedPrivatizationPattern +//===----------------------------------------------------------------------===// + +class OMPTargetPrepareDelayedPrivatizationPattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + // Match omp::TargetOp that have the following characteristics. + // 1. have private vars which refer to local (stack) memory + // 2. the target op has the nowait clause + // In this case, we allocate memory for the privatized variable on the heap + // and copy the original variable into this new heap allocation. We fix up + // any omp::MapInfoOp instances that may be mapping the private variable. + mlir::LogicalResult + matchAndRewrite(omp::TargetOp targetOp, + PatternRewriter &rewriter) const override { + if (!hasPrivateVars(targetOp) || !isTargetTaskDeferred(targetOp)) + return rewriter.notifyMatchFailure( + targetOp, + "targetOp does not have privateVars or does not need a target task"); + + ModuleOp mod = targetOp->getParentOfType(); + LLVM::LLVMFuncOp llvmFunc = targetOp->getParentOfType(); + OperandRange privateVars = targetOp.getPrivateVars(); + mlir::SmallVector newPrivVars; + + newPrivVars.reserve(privateVars.size()); + std::optional privateSyms = targetOp.getPrivateSyms(); + for (auto [privVarIdx, privVarSymPair] : + llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) { + auto privVar = std::get<0>(privVarSymPair); + auto privSym = std::get<1>(privVarSymPair); + + omp::PrivateClauseOp privatizer = findPrivatizer(targetOp, privSym); + if (!privatizer.needsMap()) { + newPrivVars.push_back(privVar); + continue; + } + bool isFirstPrivate = privatizer.getDataSharingType() == + omp::DataSharingClauseType::FirstPrivate; + + mlir::Value mappedValue = + targetOp.getMappedValueForPrivateVar(privVarIdx); + Operation *mapInfoOperation = mappedValue.getDefiningOp(); + auto mapInfoOp = mlir::cast(mapInfoOperation); + + if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy) { + newPrivVars.push_back(privVar); + continue; + } + + // Allocate heap memory that corresponds to the type of memory + // pointed to by varPtr + // TODO: For boxchars this likely wont be a pointer. + mlir::Value varPtr = privVar; + mlir::Value heapMem = allocateHeapMem(targetOp, privVar, mod, rewriter); + if (!heapMem) + return failure(); + + newPrivVars.push_back(heapMem); + + // Find the earliest insertion point for the copy. This will be before + // the first in the list of omp::MapInfoOp instances that use varPtr. + // After the copy these omp::MapInfoOp instances will refer to heapMem + // instead. + Operation *varPtrDefiningOp = varPtr.getDefiningOp(); + std::set users; + users.insert(varPtrDefiningOp->user_begin(), + varPtrDefiningOp->user_end()); + + auto usesVarPtr = [&users](Operation *op) -> bool { + return users.count(op); + }; + SmallVector chainOfOps; + chainOfOps.push_back(mapInfoOperation); + if (!mapInfoOp.getMembers().empty()) { + for (auto member : mapInfoOp.getMembers()) { + if (usesVarPtr(member.getDefiningOp())) + chainOfOps.push_back(member.getDefiningOp()); + + omp::MapInfoOp memberMap = + mlir::cast(member.getDefiningOp()); + if (memberMap.getVarPtrPtr() && + usesVarPtr(memberMap.getVarPtrPtr().getDefiningOp())) + chainOfOps.push_back(memberMap.getVarPtrPtr().getDefiningOp()); + } + } + DominanceInfo dom; + llvm::sort(chainOfOps, [&](Operation *l, Operation *r) { + return dom.dominates(l, r); + }); + + rewriter.setInsertionPoint(chainOfOps.front()); + // Copy the value of the local variable into the heap-allocated location. + mlir::Location loc = chainOfOps.front()->getLoc(); + mlir::Type varType = getElemType(varPtr); + auto loadVal = rewriter.create(loc, varType, varPtr); + LLVM_ATTRIBUTE_UNUSED auto storeInst = + rewriter.create(loc, loadVal.getResult(), heapMem); + + using ReplacementEntry = std::pair; + llvm::SmallVector replRecord; + auto cloneAndMarkForDeletion = [&](Operation *origOp) -> Operation * { + Operation *clonedOp = rewriter.clone(*origOp); + rewriter.replaceAllOpUsesWith(origOp, clonedOp); + replRecord.push_back(std::make_pair(origOp, clonedOp)); + return clonedOp; + }; + + rewriter.setInsertionPoint(targetOp); + rewriter.setInsertionPoint(cloneAndMarkForDeletion(mapInfoOperation)); + + // Fix any members that may use varPtr to now use heapMem + if (!mapInfoOp.getMembers().empty()) { + for (auto member : mapInfoOp.getMembers()) { + Operation *memberOperation = member.getDefiningOp(); + if (!usesVarPtr(memberOperation)) + continue; + rewriter.setInsertionPoint(cloneAndMarkForDeletion(memberOperation)); + + auto memberMapInfoOp = mlir::cast(memberOperation); + if (memberMapInfoOp.getVarPtrPtr()) { + Operation *varPtrPtrdefOp = + memberMapInfoOp.getVarPtrPtr().getDefiningOp(); + + // In the case of firstprivate, we have to do the following + // 1. Allocate heap memory for the underlying data. + // 2. Copy the original underlying data to the new memory allocated + // on the heap. + // 3. Put this new (heap) address in the originating + // struct/descriptor + + // Consider the following sequence of omp.map.info and omp.target + // operations. + // %0 = llvm.getelementptr %19[0, 0] + // %1 = omp.map.info var_ptr(%19 : !llvm.ptr, i32) ... + // var_ptr_ptr(%0 : !llvm.ptr) bounds(..) + // %2 = omp.map.info var_ptr(%19 : !llvm.ptr, !desc_type)>) ... + // members(%1 : [0] : !llvm.ptr) -> !llvm.ptr + // omp.target nowait map_entries(%2 -> %arg5, %1 -> %arg8 : ..) + // private(@privatizer %19 -> %arg9 [map_idx=1] : + // !llvm.ptr) { + // We need to allocate memory on the heap for the underlying pointer + // which is stored at the var_ptr_ptr operand of %1. Then we need to + // copy this pointer to the new heap allocated memory location. + // Then, we need to store the address of the new heap location in + // the originating struct/descriptor. So, we generate the following + // (pseudo) MLIR code (Using the same names of mlir::Value instances + // in the example as in the code below) + // + // %dataMalloc = malloc(totalSize) + // %loadDataPtr = load %0 : !llvm.ptr -> !llvm.ptr + // memcpy(%dataMalloc, %loadDataPtr, totalSize) + // %newVarPtrPtrOp = llvm.getelementptr %heapMem[0, 0] + // llvm.store %dataMalloc, %newVarPtrPtrOp + // %1.cloned = omp.map.info var_ptr(%heapMem : !llvm.ptr, i32) ... + // var_ptr_ptr(%newVarPtrPtrOp : !llvm.ptr) + // %2.cloned = omp.map.info var_ptr(%heapMem : !llvm.ptr, + // !desc_type)>) ... + // members(%1.cloned : [0] : !llvm.ptr) + // -> !llvm.ptr + // omp.target nowait map_entries(%2.cloned -> %arg5, + // %1.cloned -> %arg8 : ..) + // private(@privatizer %heapMem -> .. [map_idx=1] : ..) { + + if (isFirstPrivate) { + assert(!memberMapInfoOp.getBounds().empty() && + "empty bounds on member map of firstprivate variable"); + mlir::Location loc = memberMapInfoOp.getLoc(); + mlir::Value totalSize = + getSizeInBytes(memberMapInfoOp, mod, rewriter); + auto dataMalloc = allocateHeapMem(loc, totalSize, mod, rewriter); + auto loadDataPtr = rewriter.create( + loc, memberMapInfoOp.getVarPtrPtr().getType(), + memberMapInfoOp.getVarPtrPtr()); + LLVM_ATTRIBUTE_UNUSED auto memcpy = + rewriter.create( + loc, dataMalloc.getResult(), loadDataPtr.getResult(), + totalSize, /*isVolatile=*/false); + Operation *newVarPtrPtrOp = rewriter.clone(*varPtrPtrdefOp); + rewriter.replaceAllUsesExcept(memberMapInfoOp.getVarPtrPtr(), + newVarPtrPtrOp->getOpResult(0), + loadDataPtr); + rewriter.modifyOpInPlace(newVarPtrPtrOp, [&]() { + newVarPtrPtrOp->replaceUsesOfWith(varPtr, heapMem); + }); + LLVM_ATTRIBUTE_UNUSED auto storePtr = + rewriter.create(loc, dataMalloc.getResult(), + newVarPtrPtrOp->getResult(0)); + } else + rewriter.setInsertionPoint( + cloneAndMarkForDeletion(varPtrPtrdefOp)); + } + } + } + + for (auto repl : replRecord) { + Operation *origOp = repl.first; + Operation *clonedOp = repl.second; + rewriter.modifyOpInPlace( + clonedOp, [&]() { clonedOp->replaceUsesOfWith(varPtr, heapMem); }); + rewriter.eraseOp(origOp); + } + } + assert(newPrivVars.size() == privateVars.size() && + "The number of private variables must match before and after " + "transformation"); + + rewriter.setInsertionPoint(targetOp); + Operation *newOp = rewriter.clone(*targetOp.getOperation()); + omp::TargetOp newTargetOp = mlir::cast(newOp); + rewriter.modifyOpInPlace(newTargetOp, [&]() { + newTargetOp.getPrivateVarsMutable().assign(newPrivVars); + }); + rewriter.replaceOp(targetOp, newTargetOp); + return mlir::success(); + } + +private: + bool hasPrivateVars(omp::TargetOp targetOp) const { + return !targetOp.getPrivateVars().empty(); + } + + bool isTargetTaskDeferred(omp::TargetOp targetOp) const { + return targetOp.getNowait(); + } + + template + omp::PrivateClauseOp findPrivatizer(OpTy op, mlir::Attribute privSym) const { + SymbolRefAttr privatizerName = llvm::cast(privSym); + omp::PrivateClauseOp privatizer = + SymbolTable::lookupNearestSymbolFrom( + op, privatizerName); + return privatizer; + } + + template + mlir::Type getElemType(OpType op) const { + return op.getElemType(); + } + + mlir::Type getElemType(mlir::Value varPtr) const { + Operation *definingOp = unwrapAddrSpaceCast(varPtr.getDefiningOp()); + assert((mlir::isa(definingOp)) && + "getElemType in PrepareForOMPOffloadPrivatizationPass can deal only " + "with Alloca or GEP for now"); + if (auto allocaOp = mlir::dyn_cast(definingOp)) + return getElemType(allocaOp); + // TODO: get rid of this because GEPOp.getElemType() is not the right thing + // to use. + if (auto gepOp = mlir::dyn_cast(definingOp)) + return getElemType(gepOp); + return mlir::Type{}; + } + + mlir::Operation *unwrapAddrSpaceCast(Operation *op) const { + if (!mlir::isa(op)) + return op; + mlir::LLVM::AddrSpaceCastOp addrSpaceCastOp = + mlir::cast(op); + return unwrapAddrSpaceCast(addrSpaceCastOp.getArg().getDefiningOp()); + } + + // Get the (compile-time constant) size of varType as per the + // given DataLayout dl. + std::int64_t getSizeInBytes(const mlir::DataLayout &dl, + mlir::Type varType) const { + llvm::TypeSize size = dl.getTypeSize(varType); + unsigned short alignment = dl.getTypeABIAlignment(varType); + return llvm::alignTo(size, alignment); + } + + // Generate code to get the size of data being mapped from the bounds + // of mapInfoOp + mlir::Value getSizeInBytes(omp::MapInfoOp mapInfoOp, ModuleOp mod, + PatternRewriter &rewriter) const { + mlir::Location loc = mapInfoOp.getLoc(); + mlir::Type llvmInt64Ty = rewriter.getI64Type(); + mlir::Value constOne = + rewriter.create(loc, llvmInt64Ty, 1); + mlir::Value elementCount = constOne; + // TODO: Consider using boundsOp.getExtent() if available. + for (auto bounds : mapInfoOp.getBounds()) { + auto boundsOp = mlir::cast(bounds.getDefiningOp()); + elementCount = rewriter.create( + loc, llvmInt64Ty, elementCount, + rewriter.create( + loc, llvmInt64Ty, + (rewriter.create(loc, llvmInt64Ty, + boundsOp.getUpperBound(), + boundsOp.getLowerBound())), + constOne)); + } + const mlir::DataLayout &dl = mlir::DataLayout(mod); + std::int64_t elemSize = getSizeInBytes(dl, mapInfoOp.getVarType()); + mlir::Value elemSizeV = + rewriter.create(loc, llvmInt64Ty, elemSize); + return rewriter.create(loc, llvmInt64Ty, elementCount, + elemSizeV); + } + + LLVM::LLVMFuncOp getMalloc(ModuleOp mod, PatternRewriter &rewriter) const { + llvm::FailureOr mallocCall = + LLVM::lookupOrCreateMallocFn(rewriter, mod, rewriter.getI64Type()); + assert(llvm::succeeded(mallocCall) && + "Could not find malloc in the module"); + return mallocCall.value(); + } + + template + mlir::Value allocateHeapMem(OpTy targetOp, mlir::Value privVar, ModuleOp mod, + PatternRewriter &rewriter) const { + mlir::Value varPtr = privVar; + Operation *definingOp = varPtr.getDefiningOp(); + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPoint(definingOp); + LLVM::LLVMFuncOp mallocFn = getMalloc(mod, rewriter); + + mlir::Location loc = definingOp->getLoc(); + mlir::Type varType = getElemType(varPtr); + assert(mod.getDataLayoutSpec() && + "MLIR module with no datalayout spec not handled yet"); + const mlir::DataLayout &dl = mlir::DataLayout(mod); + std::int64_t distance = getSizeInBytes(dl, varType); + mlir::Value sizeBytes = rewriter.create( + loc, mallocFn.getFunctionType().getParamType(0), distance); + + auto mallocCallOp = + rewriter.create(loc, mallocFn, ValueRange{sizeBytes}); + return mallocCallOp.getResult(); + } + + LLVM::CallOp allocateHeapMem(mlir::Location loc, mlir::Value size, + ModuleOp mod, PatternRewriter &rewriter) const { + LLVM::LLVMFuncOp mallocFn = getMalloc(mod, rewriter); + return rewriter.create(loc, mallocFn, ValueRange{size}); + } +}; + +//===----------------------------------------------------------------------===// +// PrepareForOMPOffloadPrivatizationPass +//===----------------------------------------------------------------------===// + +struct PrepareForOMPOffloadPrivatizationPass + : public LLVM::impl::PrepareForOMPOffloadPrivatizationPassBase< + PrepareForOMPOffloadPrivatizationPass> { + + void runOnOperation() override { + LLVM::LLVMFuncOp func = getOperation(); + MLIRContext &context = getContext(); + ModuleOp mod = func->getParentOfType(); + + // FunctionFilteringPass removes bounds arguments from omp.map.info + // operations. We require bounds else our pass asserts. But, that's only for + // maps in functions that are on the host. So, skip functions being compiled + // for the target. + auto offloadModuleInterface = + mlir::dyn_cast(mod.getOperation()); + if (offloadModuleInterface && offloadModuleInterface.getIsTargetDevice()) { + return; + } + + RewritePatternSet patterns(&context); + patterns.add(&context); + + if (mlir::failed( + applyPatternsGreedily(func, std::move(patterns), + GreedyRewriteConfig().setStrictness( + GreedyRewriteStrictness::ExistingOps)))) { + emitError(func.getLoc(), + "error in preparing targetOps for delayed privatization."); + signalPassFailure(); + } + } +}; +} // namespace diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6694de8383534..f3cbd62b53342 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -356,14 +356,8 @@ static LogicalResult checkImplementationStatus(Operation &op) { result = todo("priority"); }; auto checkPrivate = [&todo](auto op, LogicalResult &result) { - if constexpr (std::is_same_v, omp::TargetOp>) { - // Privatization is supported only for included target tasks. - if (!op.getPrivateVars().empty() && op.getNowait()) - result = todo("privatization for deferred target tasks"); - } else { - if (!op.getPrivateVars().empty() || op.getPrivateSyms()) - result = todo("privatization"); - } + if (!op.getPrivateVars().empty() || op.getPrivateSyms()) + result = todo("privatization"); }; auto checkReduction = [&todo](auto op, LogicalResult &result) { if (isa(op)) @@ -450,7 +444,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { checkDevice(op, result); checkInReduction(op, result); checkIsDevicePtr(op, result); - checkPrivate(op, result); }) .Default([](Operation &) { // Assume all clauses for an operation can be translated unless they are diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp index de714d8b740af..60c5406bdd197 100644 --- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp +++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp @@ -624,6 +624,7 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream, // We use the thread-pool this context is creating, and avoid // creating any thread when disabled. MLIRContext threadPoolCtx; + if (threadPoolCtx.isMultithreadingEnabled()) threadPool = &threadPoolCtx.getThreadPool(); diff --git a/mlir/test/Dialect/LLVMIR/omp-offload-privatization-prepare.mlir b/mlir/test/Dialect/LLVMIR/omp-offload-privatization-prepare.mlir new file mode 100644 index 0000000000000..6b8121b262f47 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/omp-offload-privatization-prepare.mlir @@ -0,0 +1,167 @@ +// RUN: mlir-opt --mlir-disable-threading -omp-offload-privatization-prepare --split-input-file %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>} { + llvm.func @free(!llvm.ptr) + omp.private {type = private} @privatizer : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> init { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.mlir.constant(48 : i32) : i32 + "llvm.intr.memcpy"(%arg1, %arg0, %0) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () + omp.yield(%arg1 : !llvm.ptr) + } + + omp.private {type = firstprivate} @firstprivatizer : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> copy { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.mlir.constant(48 : i32) : i32 + "llvm.intr.memcpy"(%arg1, %arg0, %0) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () + omp.yield(%arg1 : !llvm.ptr) + } + + llvm.func internal @private_test(%arg0: !llvm.ptr {fir.bindc_name = "ptr0"}, %arg1: !llvm.ptr {fir.bindc_name = "ptr1"}) { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(0 : index) : i64 + %5 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %19 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {bindc_name = "local"} : (i32) -> !llvm.ptr + %21 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i32) -> !llvm.ptr + %33 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + llvm.store %33, %19 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr + llvm.store %0, %21 : i32, !llvm.ptr + %124 = omp.map.info var_ptr(%21 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} + %150 = llvm.getelementptr %19[0, 7, %1, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %151 = llvm.load %150 : !llvm.ptr -> i64 + %152 = llvm.getelementptr %19[0, 7, %1, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %153 = llvm.load %152 : !llvm.ptr -> i64 + %154 = llvm.getelementptr %19[0, 7, %1, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %155 = llvm.load %154 : !llvm.ptr -> i64 + %156 = llvm.sub %153, %1 : i64 + %157 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%156 : i64) extent(%153 : i64) stride(%155 : i64) start_idx(%151 : i64) {stride_in_bytes = true} + %158 = llvm.getelementptr %19[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %159 = omp.map.info var_ptr(%19 : !llvm.ptr, i32) map_clauses(descriptor_base_addr, to) capture(ByRef) var_ptr_ptr(%158 : !llvm.ptr) bounds(%157) -> !llvm.ptr {name = ""} + %160 = omp.map.info var_ptr(%19 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(always, descriptor, to) capture(ByRef) members(%159 : [0] : !llvm.ptr) -> !llvm.ptr + omp.target nowait map_entries(%124 -> %arg2, %160 -> %arg5, %159 -> %arg8 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@privatizer %19 -> %arg9 [map_idx=1] : !llvm.ptr) { + omp.terminator + } + %166 = llvm.mlir.constant(48 : i32) : i32 + %167 = llvm.getelementptr %19[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %168 = llvm.load %167 : !llvm.ptr -> !llvm.ptr + llvm.call @free(%168) : (!llvm.ptr) -> () + llvm.return + } + + llvm.func internal @firstprivate_test(%arg0: !llvm.ptr {fir.bindc_name = "ptr0"}, %arg1: !llvm.ptr {fir.bindc_name = "ptr1"}) { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(0 : index) : i64 + %5 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + %19 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {bindc_name = "local"} : (i32) -> !llvm.ptr + %21 = llvm.alloca %0 x i32 {bindc_name = "i"} : (i32) -> !llvm.ptr + %33 = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + llvm.store %33, %19 : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr + llvm.store %0, %21 : i32, !llvm.ptr + %124 = omp.map.info var_ptr(%21 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} + %150 = llvm.getelementptr %19[0, 7, %1, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %151 = llvm.load %150 : !llvm.ptr -> i64 + %152 = llvm.getelementptr %19[0, 7, %1, 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %153 = llvm.load %152 : !llvm.ptr -> i64 + %154 = llvm.getelementptr %19[0, 7, %1, 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %155 = llvm.load %154 : !llvm.ptr -> i64 + %156 = llvm.sub %153, %1 : i64 + %157 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%156 : i64) extent(%153 : i64) stride(%155 : i64) start_idx(%151 : i64) {stride_in_bytes = true} + %158 = llvm.getelementptr %19[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %159 = omp.map.info var_ptr(%19 : !llvm.ptr, i32) map_clauses(descriptor_base_addr, to) capture(ByRef) var_ptr_ptr(%158 : !llvm.ptr) bounds(%157) -> !llvm.ptr {name = ""} + %160 = omp.map.info var_ptr(%19 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(always, descriptor, to) capture(ByRef) members(%159 : [0] : !llvm.ptr) -> !llvm.ptr + omp.target nowait map_entries(%124 -> %arg2, %160 -> %arg5, %159 -> %arg8 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@firstprivatizer %19 -> %arg9 [map_idx=1] : !llvm.ptr) { + omp.terminator + } + %166 = llvm.mlir.constant(48 : i32) : i32 + %167 = llvm.getelementptr %19[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %168 = llvm.load %167 : !llvm.ptr -> !llvm.ptr + llvm.call @free(%168) : (!llvm.ptr) -> () + llvm.return + } +} + +// CHECK-LABEL: llvm.func @malloc(i64) -> !llvm.ptr +// CHECK: llvm.func @free(!llvm.ptr) + +// CHECK-LABEL: llvm.func internal @private_test( +// CHECK: %[[VAL_0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(48 : i64) : i64 +// CHECK: %[[HEAP:.*]] = llvm.call @malloc(%[[VAL_3]]) : (i64) -> !llvm.ptr +// CHECK: %[[STACK:.*]] = llvm.alloca %[[VAL_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {bindc_name = "local"} : (i32) -> !llvm.ptr +// CHECK: %[[VAL_6:.*]] = llvm.alloca %[[VAL_1]] x i32 {bindc_name = "i"} : (i32) -> !llvm.ptr +// CHECK: llvm.store %[[VAL_0]], %[[STACK]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr +// CHECK: llvm.store %[[VAL_1]], %[[VAL_6]] : i32, !llvm.ptr +// CHECK: %[[VAL_7:.*]] = omp.map.info var_ptr(%[[VAL_6]] : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} +// CHECK: %[[VAL_8:.*]] = llvm.getelementptr %[[STACK]][0, 7, 0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_9:.*]] = llvm.load %[[VAL_8]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_10:.*]] = llvm.getelementptr %[[STACK]][0, 7, 0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_11:.*]] = llvm.load %[[VAL_10]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_12:.*]] = llvm.getelementptr %[[STACK]][0, 7, 0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_13:.*]] = llvm.load %[[VAL_12]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_14:.*]] = llvm.sub %[[VAL_11]], %[[VAL_2]] : i64 +// CHECK: %[[VAL_15:.*]] = omp.map.bounds lower_bound(%[[VAL_2]] : i64) upper_bound(%[[VAL_14]] : i64) extent(%[[VAL_11]] : i64) stride(%[[VAL_13]] : i64) start_idx(%[[VAL_9]] : i64) {stride_in_bytes = true} +// CHECK: %[[VAL_16:.*]] = llvm.load %[[STACK]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: llvm.store %[[VAL_16]], %[[HEAP]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr +// CHECK: %[[VAL_17:.*]] = llvm.getelementptr %[[HEAP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_18:.*]] = omp.map.info var_ptr(%[[HEAP]] : !llvm.ptr, i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[VAL_17]] : !llvm.ptr) bounds(%[[VAL_15]]) -> !llvm.ptr {name = ""} +// CHECK: %[[VAL_19:.*]] = omp.map.info var_ptr(%[[HEAP]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(always, to) capture(ByRef) members(%[[VAL_18]] : [0] : !llvm.ptr) -> !llvm.ptr +// CHECK: omp.target nowait map_entries(%[[VAL_7]] -> %[[VAL_20:.*]], %[[VAL_19]] -> %[[VAL_21:.*]], %[[VAL_18]] -> %[[VAL_22:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@privatizer %[[HEAP]] -> %[[VAL_23:.*]] [map_idx=1] : !llvm.ptr) { +// CHECK: omp.terminator +// CHECK: } +// CHECK: %[[VAL_24:.*]] = llvm.getelementptr %[[STACK]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_25:.*]] = llvm.load %[[VAL_24]] : !llvm.ptr -> !llvm.ptr +// CHECK: llvm.call @free(%[[VAL_25]]) : (!llvm.ptr) -> () +// CHECK: llvm.return +// CHECK: } + +// CHECK-LABEL: llvm.func internal @firstprivate_test( +// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(4 : i64) : i64 +// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_2:.*]] = llvm.mlir.undef : +// CHECK-SAME: !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_5:.*]] = llvm.mlir.constant(48 : i64) : i64 +// CHECK: %[[HEAP:.*]] = llvm.call @malloc(%[[VAL_5]]) : (i64) -> !llvm.ptr +// CHECK: %[[STACK:.*]] = llvm.alloca %[[VAL_3]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_8:.*]] = llvm.alloca %[[VAL_3]] x i32 {bindc_name = "i"} : (i32) -> !llvm.ptr +// CHECK: llvm.store %[[VAL_2]], %[[STACK]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr +// CHECK: llvm.store %[[VAL_3]], %[[VAL_8]] : i32, !llvm.ptr +// CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_8]] : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) +// CHECK-SAME: capture(ByCopy) -> !llvm.ptr {name = "i"} +// CHECK: %[[VAL_10:.*]] = llvm.getelementptr %[[STACK]][0, 7, 0, 0] : (!llvm.ptr) -> !llvm.ptr, +// CHECK-SAME: !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_11:.*]] = llvm.load %[[VAL_10]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_12:.*]] = llvm.getelementptr %[[STACK]][0, 7, 0, 1] : (!llvm.ptr) -> +// CHECK-SAME: !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_13:.*]] = llvm.load %[[VAL_12]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_14:.*]] = llvm.getelementptr %[[STACK]][0, 7, 0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_15:.*]] = llvm.load %[[VAL_14]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_16:.*]] = llvm.sub %[[VAL_13]], %[[VAL_4]] : i64 +// CHECK: %[[VAL_17:.*]] = omp.map.bounds lower_bound(%[[VAL_4]] : i64) upper_bound(%[[VAL_16]] : i64) extent(%[[VAL_13]] : i64) stride(%[[VAL_15]] : i64) start_idx(%[[VAL_11]] : i64) {stride_in_bytes = true} +// CHECK: %[[VAL_18:.*]] = llvm.load %[[STACK]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: llvm.store %[[VAL_18]], %[[HEAP]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, !llvm.ptr +// CHECK: %[[VAL_19:.*]] = llvm.getelementptr %[[STACK]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_20:.*]] = llvm.sub %[[VAL_16]], %[[VAL_4]] : i64 +// CHECK: %[[VAL_21:.*]] = llvm.add %[[VAL_20]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_22:.*]] = llvm.mul %[[VAL_1]], %[[VAL_21]] : i64 +// CHECK: %[[VAL_23:.*]] = llvm.mul %[[VAL_22]], %[[VAL_0]] : i64 +// CHECK: %[[NEW_DATA_PTR:.*]] = llvm.call @malloc(%[[VAL_23]]) : (i64) -> !llvm.ptr +// CHECK: %[[OLD_DATA_PTR:.*]] = llvm.load %[[VAL_19]] : !llvm.ptr -> !llvm.ptr +// CHECK: "llvm.intr.memcpy"(%[[NEW_DATA_PTR]], %[[OLD_DATA_PTR]], %[[VAL_23]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> () +// CHECK: %[[VAL_26:.*]] = llvm.getelementptr %[[HEAP]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: llvm.store %[[NEW_DATA_PTR]], %[[VAL_26]] : !llvm.ptr, !llvm.ptr +// CHECK: %[[VAL_27:.*]] = omp.map.info var_ptr(%[[HEAP]] : !llvm.ptr, i32) map_clauses(to) capture(ByRef) +// CHECK-SAME: var_ptr_ptr(%[[VAL_26]] : !llvm.ptr) bounds(%[[VAL_17]]) -> !llvm.ptr {name = ""} +// CHECK: %[[VAL_28:.*]] = omp.map.info var_ptr(%[[HEAP]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) +// CHECK-SAME: map_clauses(always, to) capture(ByRef) members(%[[VAL_27]] : [0] : !llvm.ptr) -> !llvm.ptr +// CHECK: omp.target nowait map_entries(%[[VAL_9]] -> %[[VAL_29:.*]], %[[VAL_28]] -> %[[VAL_30:.*]], %[[VAL_27]] -> %[[VAL_31:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) +// CHECK-SAME: private(@firstprivatizer %[[HEAP]] -> %[[VAL_32:.*]] [map_idx=1] : !llvm.ptr) { +// CHECK: omp.terminator +// CHECK: } +// CHECK: %[[VAL_33:.*]] = llvm.getelementptr %[[STACK]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +// CHECK: %[[VAL_34:.*]] = llvm.load %[[VAL_33]] : !llvm.ptr -> !llvm.ptr +// CHECK: llvm.call @free(%[[VAL_34]]) : (!llvm.ptr) -> () +// CHECK: llvm.return +// CHECK: } diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir index 2fa4470bb8300..af6d254cfd3c3 100644 --- a/mlir/test/Target/LLVMIR/openmp-todo.mlir +++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir @@ -249,24 +249,6 @@ llvm.func @target_is_device_ptr(%x : !llvm.ptr) { // ----- -omp.private {type = firstprivate} @x.privatizer : i32 copy { -^bb0(%mold: !llvm.ptr, %private: !llvm.ptr): - %0 = llvm.load %mold : !llvm.ptr -> i32 - llvm.store %0, %private : i32, !llvm.ptr - omp.yield(%private: !llvm.ptr) -} -llvm.func @target_firstprivate(%x : !llvm.ptr) { - %0 = omp.map.info var_ptr(%x : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr - // expected-error@below {{not yet implemented: Unhandled clause privatization for deferred target tasks in omp.target operation}} - // expected-error@below {{LLVM Translation failed for operation: omp.target}} - omp.target nowait map_entries(%0 -> %blockarg0 : !llvm.ptr) private(@x.privatizer %x -> %arg0 [map_idx=0] : !llvm.ptr) { - omp.terminator - } - llvm.return -} - -// ----- - llvm.func @target_enter_data_depend(%x: !llvm.ptr) { // expected-error@below {{not yet implemented: Unhandled clause depend in omp.target_enter_data operation}} // expected-error@below {{LLVM Translation failed for operation: omp.target_enter_data}}