diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 5f5e968eaaa641..91250fbaf42d6d 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -109,6 +109,7 @@ class DataSharingProcessor { bool hasLastPrivateOp; mlir::OpBuilder::InsertPoint lastPrivIP; mlir::OpBuilder::InsertPoint insPt; + mlir::Value loopIV; // Symbols in private, firstprivate, and/or lastprivate clauses. llvm::SetVector privatizedSymbols; llvm::SetVector defaultSymbols; @@ -157,6 +158,11 @@ class DataSharingProcessor { // dealocation code as well. void processStep1(); void processStep2(mlir::Operation *op, bool isLoop); + + void setLoopIV(mlir::Value iv) { + assert(!loopIV && "Loop iteration variable already set"); + loopIV = iv; + } }; void DataSharingProcessor::processStep1() { @@ -270,7 +276,6 @@ void DataSharingProcessor ::insertBarrier() { } void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { - mlir::arith::CmpIOp cmpOp; bool cmpCreated = false; mlir::OpBuilder::InsertPoint localInsPt = firOpBuilder.saveInsertionPoint(); for (const Fortran::parser::OmpClause &clause : opClauseList.v) { @@ -349,18 +354,17 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { } } } else if (mlir::isa(op)) { - mlir::Operation *lastOper = op->getRegion(0).back().getTerminator(); - firOpBuilder.setInsertionPoint(lastOper); - // Update the original variable just before exiting the worksharing // loop. Conversion as follows: // // omp.wsloop { // omp.wsloop { ... // ... store - // store ===> %cmp = llvm.icmp "eq" %iv %ub - // omp.yield fir.if %cmp { - // } ^%lpv_update_blk: + // store ===> %v = arith.addi %iv, %step + // omp.yield %cmp = %step < 0 ? %v < %ub : %v > %ub + // } fir.if %cmp { + // fir.store %v to %loopIV + // ^%lpv_update_blk: // } // omp.yield // } @@ -368,15 +372,37 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { // Only generate the compare once in presence of multiple LastPrivate // clauses. - if (!cmpCreated) { - cmpOp = firOpBuilder.create( - op->getLoc(), mlir::arith::CmpIPredicate::eq, - op->getRegion(0).front().getArguments()[0], - mlir::dyn_cast(op).getUpperBound()[0]); - } - auto ifOp = - firOpBuilder.create(op->getLoc(), cmpOp, /*else*/ false); + if (cmpCreated) + continue; + cmpCreated = true; + + mlir::Location loc = op->getLoc(); + mlir::Operation *lastOper = op->getRegion(0).back().getTerminator(); + firOpBuilder.setInsertionPoint(lastOper); + + mlir::Value iv = op->getRegion(0).front().getArguments()[0]; + mlir::Value ub = + mlir::dyn_cast(op).getUpperBound()[0]; + mlir::Value step = mlir::dyn_cast(op).getStep()[0]; + + // v = iv + step + // cmp = step < 0 ? v < ub : v > ub + mlir::Value v = firOpBuilder.create(loc, iv, step); + mlir::Value zero = + firOpBuilder.createIntegerConstant(loc, step.getType(), 0); + mlir::Value negativeStep = firOpBuilder.create( + loc, mlir::arith::CmpIPredicate::slt, step, zero); + mlir::Value vLT = firOpBuilder.create( + loc, mlir::arith::CmpIPredicate::slt, v, ub); + mlir::Value vGT = firOpBuilder.create( + loc, mlir::arith::CmpIPredicate::sgt, v, ub); + mlir::Value cmpOp = firOpBuilder.create( + loc, negativeStep, vLT, vGT); + + auto ifOp = firOpBuilder.create(loc, cmpOp, /*else*/ false); firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + assert(loopIV && "loopIV was not set"); + firOpBuilder.create(op->getLoc(), v, loopIV); lastPrivIP = firOpBuilder.saveInsertionPoint(); } else { TODO(converter.getCurrentLocation(), @@ -2128,6 +2154,8 @@ static void createBodyOfOp( proc.processStep1(); proc.processStep2(op, is_loop); } else { + if (is_loop && args.size() > 0) + dsp->setLoopIV(converter.getSymbolAddress(*args[0])); dsp->processStep2(op, is_loop); } diff --git a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 index 06f3e1ca82234e..7d2118305fb4c4 100644 --- a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 +++ b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 @@ -19,12 +19,18 @@ !CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) { !CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref -!CHECK: %[[val_11:.*]] = arith.cmpi eq, %[[arg]], %[[val_c100_i32]] : i32 -!CHECK: fir.if %[[val_11]] { -!CHECK: %[[val_12:.*]] = fir.load %[[val_9]] : !fir.ref -!CHECK: fir.store %[[val_12]] to %[[val_5]] : !fir.ref -!CHECK: %[[val_13:.*]] = fir.load %[[val_10]] : !fir.ref -!CHECK: fir.store %[[val_13]] to %[[val_8]] : !fir.ref +!CHECK: %[[val_11:.*]] = arith.addi %[[arg]], %[[val_c1_i32_0]] : i32 +!CHECK: %[[val_c0_i32:.*]] = arith.constant 0 : i32 +!CHECK: %[[val_12:.*]] = arith.cmpi slt, %[[val_c1_i32_0]], %[[val_c0_i32]] : i32 +!CHECK: %[[val_13:.*]] = arith.cmpi slt, %[[val_11]], %[[val_c100_i32]] : i32 +!CHECK: %[[val_14:.*]] = arith.cmpi sgt, %[[val_11]], %[[val_c100_i32]] : i32 +!CHECK: %[[val_15:.*]] = arith.select %[[val_12]], %[[val_13]], %[[val_14]] : i1 +!CHECK: fir.if %[[val_15]] { +!CHECK: fir.store %[[val_11]] to %[[val_0]] : !fir.ref +!CHECK: %[[val_16:.*]] = fir.load %[[val_9]] : !fir.ref +!CHECK: fir.store %[[val_16]] to %[[val_5]] : !fir.ref +!CHECK: %[[val_17:.*]] = fir.load %[[val_10]] : !fir.ref +!CHECK: fir.store %[[val_17]] to %[[val_8]] : !fir.ref !CHECK: } !CHECK: omp.yield !CHECK: } diff --git a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 index aa95c0712fbb88..67e24088c53386 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-lastprivate-clause-scalar.f90 @@ -1,4 +1,4 @@ -! This test checks lowering of `FIRSTPRIVATE` clause for scalar types. +! This test checks lowering of `LASTPRIVATE` clause for scalar types. ! RUN: bbc -fopenmp -emit-fir %s -o - | FileCheck %s ! RUN: flang-new -fc1 -fopenmp -emit-fir %s -o - | FileCheck %s @@ -24,8 +24,14 @@ !CHECK-NEXT: %[[CALL_END_IO:.*]] = fir.call @_FortranAioEndIoStatement(%[[CALL_BEGIN_IO]]) ! Testing last iteration check -!CHECK-NEXT: %[[IV_CMP:.*]] = arith.cmpi eq, %[[INDX_WS]] +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 !CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref ! Testing lastprivate val update !CHECK-DAG: %[[CVT:.*]] = fir.convert %[[ARG1_REF]] : (!fir.ref>) -> !fir.ref @@ -52,8 +58,14 @@ subroutine lastprivate_character(arg1) !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check -!CHECK-DAG: %[[IV_CMP:.*]] = arith.cmpi eq, %[[INDX_WS]] -!CHECK-DAG: fir.if %[[IV_CMP]] { +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref ! Testing lastprivate val update !CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE]] : !fir.ref @@ -81,8 +93,14 @@ subroutine lastprivate_int(arg1) !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check -!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]] -!CHECK-NEXT: fir.if %[[IV_CMP1]] { +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref ! Testing lastprivate val update !CHECK-DAG: %[[CLONE_LD1:.*]] = fir.load %[[CLONE1]] : !fir.ref !CHECK-DAG: fir.store %[[CLONE_LD1]] to %[[ARG1]] : !fir.ref @@ -112,8 +130,14 @@ subroutine mult_lastprivate_int(arg1, arg2) !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { !Testing last iteration check -!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]] -!CHECK-NEXT: fir.if %[[IV_CMP1]] { +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref !Testing lastprivate val update !CHECK-DAG: %[[CLONE_LD2:.*]] = fir.load %[[CLONE2]] : !fir.ref !CHECK-DAG: fir.store %[[CLONE_LD2]] to %[[ARG2]] : !fir.ref @@ -148,8 +172,14 @@ subroutine mult_lastprivate_int2(arg1, arg2) !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check -!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]] -!CHECK-NEXT: fir.if %[[IV_CMP1]] { +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref ! Testing lastprivate val update !CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE2]] : !fir.ref !CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG2]] : !fir.ref @@ -179,8 +209,14 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) !CHECK-NEXT: omp.barrier !CHECK: omp.wsloop for (%[[INDX_WS:.*]]) : {{.*}} { ! Testing last iteration check -!CHECK: %[[IV_CMP1:.*]] = arith.cmpi eq, %[[INDX_WS]] -!CHECK-NEXT: fir.if %[[IV_CMP1]] { +!CHECK: %[[V:.*]] = arith.addi %[[INDX_WS]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[T1:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[T2:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[T3:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[IV_CMP:.*]] = arith.select %[[T1]], %[[T2]], %[[T3]] : i1 +!CHECK: fir.if %[[IV_CMP]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref ! Testing lastprivate val update !CHECK-NEXT: %[[CLONE_LD:.*]] = fir.load %[[CLONE1]] : !fir.ref !CHECK-NEXT: fir.store %[[CLONE_LD]] to %[[ARG1]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 index 06fa0a12107763..a11bdee156637b 100644 --- a/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 +++ b/flang/test/Lower/OpenMP/lastprivate-commonblock.f90 @@ -16,8 +16,14 @@ !CHECK: %[[PRIVATE_Y_REF:.*]] = fir.alloca f32 {bindc_name = "y", pinned, uniq_name = "_QFlastprivate_commonEy"} !CHECK: %[[PRIVATE_Y_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_Y_REF]] {uniq_name = "_QFlastprivate_commonEy"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) inclusive step (%{{.*}}) { -!CHECK: %[[LAST_ITER:.*]] = arith.cmpi eq, %[[I]], %{{.*}} : i32 +!CHECK: %[[V:.*]] = arith.addi %[[I]], %{{.*}} : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[NEG_STEP:.*]] = arith.cmpi slt, %{{.*}}, %[[C0]] : i32 +!CHECK: %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %{{.*}} : i32 +!CHECK: %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %{{.*}} : i32 +!CHECK: %[[LAST_ITER:.*]] = arith.select %[[NEG_STEP]], %[[V_LT]], %[[V_GT]] : i1 !CHECK: fir.if %[[LAST_ITER]] { +!CHECK: fir.store %[[V]] to %{{.*}} : !fir.ref !CHECK: %[[PRIVATE_X_VAL:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref !CHECK: hlfir.assign %[[PRIVATE_X_VAL]] to %[[X_DECL]]#0 temporary_lhs : f32, !fir.ref !CHECK: %[[PRIVATE_Y_VAL:.*]] = fir.load %[[PRIVATE_Y_DECL]]#0 : !fir.ref diff --git a/flang/test/Lower/OpenMP/lastprivate-iv.f90 b/flang/test/Lower/OpenMP/lastprivate-iv.f90 new file mode 100644 index 00000000000000..70fe500129d128 --- /dev/null +++ b/flang/test/Lower/OpenMP/lastprivate-iv.f90 @@ -0,0 +1,66 @@ +! Test LASTPRIVATE with iteration variable. +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +!CHECK-LABEL: func @_QPlastprivate_iv_inc +!CHECK: %[[I_MEM:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +!CHECK: %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_incEi"} +!CHECK: %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_incEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[LB:.*]] = arith.constant 4 : i32 +!CHECK: %[[UB:.*]] = arith.constant 10 : i32 +!CHECK: %[[STEP:.*]] = arith.constant 3 : i32 +!CHECK: omp.wsloop for (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { +!CHECK: fir.store %[[IV]] to %[[I]]#1 : !fir.ref +!CHECK: %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[STEP_NEG:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] : i32 +!CHECK: %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %[[UB]] : i32 +!CHECK: %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %[[UB]] : i32 +!CHECK: %[[CMP:.*]] = arith.select %[[STEP_NEG]], %[[V_LT]], %[[V_GT]] : i1 +!CHECK: fir.if %[[CMP]] { +!CHECK: fir.store %[[V]] to %[[I]]#1 : !fir.ref +!CHECK: %[[I_VAL:.*]] = fir.load %[[I]]#0 : !fir.ref +!CHECK: hlfir.assign %[[I_VAL]] to %[[I2]]#0 temporary_lhs : i32, !fir.ref +!CHECK: } +!CHECK: omp.yield +!CHECK: } +subroutine lastprivate_iv_inc() + integer :: i + + !$omp do lastprivate(i) + do i = 4, 10, 3 + end do + !$omp end do +end subroutine + +!CHECK-LABEL: func @_QPlastprivate_iv_dec +!CHECK: %[[I_MEM:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +!CHECK: %[[I:.*]]:2 = hlfir.declare %[[I_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[I2_MEM:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_iv_decEi"} +!CHECK: %[[I2:.*]]:2 = hlfir.declare %[[I2_MEM]] {uniq_name = "_QFlastprivate_iv_decEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +!CHECK: %[[LB:.*]] = arith.constant 10 : i32 +!CHECK: %[[UB:.*]] = arith.constant 1 : i32 +!CHECK: %[[STEP:.*]] = arith.constant -3 : i32 +!CHECK: omp.wsloop for (%[[IV:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { +!CHECK: fir.store %[[IV]] to %[[I]]#1 : !fir.ref +!CHECK: %[[V:.*]] = arith.addi %[[IV]], %[[STEP]] : i32 +!CHECK: %[[C0:.*]] = arith.constant 0 : i32 +!CHECK: %[[STEP_NEG:.*]] = arith.cmpi slt, %[[STEP]], %[[C0]] : i32 +!CHECK: %[[V_LT:.*]] = arith.cmpi slt, %[[V]], %[[UB]] : i32 +!CHECK: %[[V_GT:.*]] = arith.cmpi sgt, %[[V]], %[[UB]] : i32 +!CHECK: %[[CMP:.*]] = arith.select %[[STEP_NEG]], %[[V_LT]], %[[V_GT]] : i1 +!CHECK: fir.if %[[CMP]] { +!CHECK: fir.store %[[V]] to %[[I]]#1 : !fir.ref +!CHECK: %[[I_VAL:.*]] = fir.load %[[I]]#0 : !fir.ref +!CHECK: hlfir.assign %[[I_VAL]] to %[[I2]]#0 temporary_lhs : i32, !fir.ref +!CHECK: } +!CHECK: omp.yield +!CHECK: } +subroutine lastprivate_iv_dec() + integer :: i + + !$omp do lastprivate(i) + do i = 10, 1, -3 + end do + !$omp end do +end subroutine