[flang][openacc] Support array reduction for max in lowering

Add loweirng support for array reduction with the max operator. Simplify generation of init value. Depends on D153661 Reviewed By: jeanPerier Differential Revision: https://reviews.llvm.org/D153663
llvm · Jun 26, 2023 · 80ea480 · 80ea480
1 parent 4afa2ab
commit 80ea480
Show file tree

Hide file tree

Showing 2 changed files with 117 additions and 64 deletions.
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
@@ -550,6 +550,18 @@ static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) {
       return llvm::APFloat::getLargest(floatTy.getFloatSemantics(),
                                        /*negative=*/false);
     }
+  } else if (op == mlir::acc::ReductionOperator::AccMax) {
+    // max init value -> smallest
+    if constexpr (std::is_same_v<R, llvm::APInt>) {
+      assert(ty.isIntOrIndex() && "expect integer or index type");
+      return llvm::APInt::getSignedMinValue(ty.getIntOrFloatBitWidth());
+    }
+    if constexpr (std::is_same_v<R, llvm::APFloat>) {
+      auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty);
+      assert(floatTy && "expect float type");
+      return llvm::APFloat::getSmallest(floatTy.getFloatSemantics(),
+                                        /*negative=*/true);
+    }
   } else {
     // +, ior, ieor init value -> 0
     // * init value -> 1
@@ -580,78 +592,40 @@ static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder,
       op != mlir::acc::ReductionOperator::AccMax)
     TODO(loc, "reduction operator");
 
-  // min -> largest
-  if (op == mlir::acc::ReductionOperator::AccMin) {
-    if (ty.isIntOrIndex())
-      return builder.create<mlir::arith::ConstantOp>(
-          loc, ty,
-          builder.getIntegerAttr(ty,
-                                 getReductionInitValue<llvm::APInt>(op, ty)));
+  if (ty.isIntOrIndex())
+    return builder.create<mlir::arith::ConstantOp>(
+        loc, ty,
+        builder.getIntegerAttr(ty, getReductionInitValue<llvm::APInt>(op, ty)));
+  if (op == mlir::acc::ReductionOperator::AccMin ||
+      op == mlir::acc::ReductionOperator::AccMax) {
     if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty))
       return builder.create<mlir::arith::ConstantOp>(
           loc, ty,
           builder.getFloatAttr(ty,
                                getReductionInitValue<llvm::APFloat>(op, ty)));
-    if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
-      if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
-        mlir::Type vecTy =
-            mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy());
-        auto shTy = vecTy.cast<mlir::ShapedType>();
-        if (seqTy.getEleTy().isIntOrIndex())
-          return builder.create<mlir::arith::ConstantOp>(
-              loc, vecTy,
-              mlir::DenseElementsAttr::get(
-                  shTy,
-                  getReductionInitValue<llvm::APInt>(op, seqTy.getEleTy())));
-        if (mlir::isa<mlir::FloatType>(seqTy.getEleTy()))
-          return builder.create<mlir::arith::ConstantOp>(
-              loc, vecTy,
-              mlir::DenseElementsAttr::get(
-                  shTy,
-                  getReductionInitValue<llvm::APFloat>(op, seqTy.getEleTy())));
-      }
-    }
-    // max -> least
-  } else if (op == mlir::acc::ReductionOperator::AccMax) {
-    if (ty.isIntOrIndex())
-      return builder.create<mlir::arith::ConstantOp>(
-          loc, ty,
-          builder.getIntegerAttr(
-              ty, llvm::APInt::getSignedMinValue(ty.getIntOrFloatBitWidth())
-                      .getSExtValue()));
-    if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty))
-      return builder.create<mlir::arith::ConstantOp>(
-          loc, ty,
-          builder.getFloatAttr(
-              ty, llvm::APFloat::getSmallest(floatTy.getFloatSemantics(),
-                                             /*negative=*/true)));
   } else {
-    if (ty.isIntOrIndex())
-      return builder.create<mlir::arith::ConstantOp>(
-          loc, ty,
-          builder.getIntegerAttr(ty, getReductionInitValue<int64_t>(op, ty)));
-    if (mlir::isa<mlir::FloatType>(ty))
+    if (auto floatTy = mlir::dyn_cast_or_null<mlir::FloatType>(ty))
       return builder.create<mlir::arith::ConstantOp>(
           loc, ty,
           builder.getFloatAttr(ty, getReductionInitValue<int64_t>(op, ty)));
-    if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
-      if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
-        mlir::Type vecTy =
-            mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy());
-        auto shTy = vecTy.cast<mlir::ShapedType>();
-        if (seqTy.getEleTy().isIntOrIndex())
-          return builder.create<mlir::arith::ConstantOp>(
-              loc, vecTy,
-              mlir::DenseElementsAttr::get(
-                  shTy,
-                  getReductionInitValue<llvm::APInt>(op, seqTy.getEleTy())));
-        if (mlir::isa<mlir::FloatType>(seqTy.getEleTy()))
-          return builder.create<mlir::arith::ConstantOp>(
-              loc, vecTy,
-              mlir::DenseElementsAttr::get(
-                  shTy,
-                  getReductionInitValue<llvm::APFloat>(op, seqTy.getEleTy())));
-      }
+  }
+  if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
+    if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
+      mlir::Type vecTy =
+          mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy());
+      auto shTy = vecTy.cast<mlir::ShapedType>();
+      if (seqTy.getEleTy().isIntOrIndex())
+        return builder.create<mlir::arith::ConstantOp>(
+            loc, vecTy,
+            mlir::DenseElementsAttr::get(
+                shTy,
+                getReductionInitValue<llvm::APInt>(op, seqTy.getEleTy())));
+      if (mlir::isa<mlir::FloatType>(seqTy.getEleTy()))
+        return builder.create<mlir::arith::ConstantOp>(
+            loc, vecTy,
+            mlir::DenseElementsAttr::get(
+                shTy,
+                getReductionInitValue<llvm::APFloat>(op, seqTy.getEleTy())));
     }
   }
 

diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -2,6 +2,27 @@
 
 ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_100xf32 : !fir.ref<!fir.array<100xf32>> reduction_operator <max> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xf32>>):
+! CHECK:   %[[CST:.*]] = arith.constant dense<-1.401300e-45> : vector<100xf32>
+! CHECK:   acc.yield %[[CST]] : vector<100xf32>
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100xf32>>):
+! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+! CHECK:     %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]] : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+! CHECK:     %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]] : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+! CHECK:     %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
+! CHECK:     %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
+! CHECK:     %[[CMP:.*]] = arith.cmpf ogt, %[[LOAD1]], %[[LOAD2]] : f32
+! CHECK:     %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD1]], %[[LOAD2]] : f32
+! CHECK:     fir.store %[[SELECT]] to %[[COORD1]] : !fir.ref<f32>
+! CHECK:   }
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100xf32>>
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_max_f32 : f32 reduction_operator <max> init {
 ! CHECK: ^bb0(%{{.*}}: f32):
 ! CHECK:   %[[INIT:.*]] = arith.constant -1.401300e-45 : f32
@@ -13,6 +34,32 @@
 ! CHECK:   acc.yield %[[SELECT]] : f32
 ! CHECK: }
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_100x10xi32 : !fir.ref<!fir.array<100x10xi32>> reduction_operator <max> init {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.array<100x10xi32>>):
+! CHECK:   %[[CST:.*]] = arith.constant dense<-2147483648> : vector<100x10xi32>
+! CHECK:   acc.yield %[[CST]] : vector<100x10xi32>
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
+! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 9 : index
+! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
+! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
+! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
+! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
+! CHECK:       %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
+! CHECK:       %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD1]], %[[LOAD2]] : i32
+! CHECK:       fir.store %[[SELECT]] to %[[COORD1]] : !fir.ref<i32>
+! CHECK:     }
+! CHECK:   }
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100x10xi32>>
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_max_i32 : i32 reduction_operator <max> init {
 ! CHECK: ^bb0(%arg0: i32):
 ! CHECK:   %[[INIT:.*]] = arith.constant -2147483648 : i32
@@ -436,7 +483,6 @@ subroutine acc_reduction_min_int_array_1d(a, b)
 ! CHECK: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100xi32>>) bounds(%2) -> !fir.ref<!fir.array<100xi32>> {name = "b"} 
 ! CHECK: acc.loop reduction(@reduction_min_ref_100xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100xi32>>)
 
-
 subroutine acc_reduction_min_float(a, b)
   real :: a(100), b
   integer :: i
@@ -485,6 +531,23 @@ subroutine acc_reduction_max_int(a, b)
 ! CHECL:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<i32>) -> !fir.ref<i32> {name = "b"} 
 ! CHECK:       acc.loop reduction(@reduction_max_i32 -> %[[RED_B]] : !fir.ref<i32>)
 
+subroutine acc_reduction_max_int_array2d(a, b)
+  integer :: a(100, 10), b(100, 10)
+  integer :: i, j
+
+  !$acc loop reduction(max:b) collapse(2)
+  do i = 1, 100
+    do j = 1, 10
+      b(i, j) = max(b(i, j), a(i, j))
+    end do
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_max_int_array2d(
+! CHECK-SAME: %{{.*}}: !fir.ref<!fir.array<100x10xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>> {fir.bindc_name = "b"})
+! CHECK: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100x10xi32>>) bounds(%{{.*}}, %{{.*}}) -> !fir.ref<!fir.array<100x10xi32>> {name = "b"} 
+! CHECK: acc.loop reduction(@reduction_max_ref_100x10xi32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100x10xi32>>)
+
 subroutine acc_reduction_max_float(a, b)
   real :: a(100), b
   integer :: i
@@ -499,3 +562,19 @@ subroutine acc_reduction_max_float(a, b)
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<f32> {fir.bindc_name = "b"})
 ! CHECK:       %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref<f32>) -> !fir.ref<f32> {name = "b"} 
 ! CHECK:       acc.loop reduction(@reduction_max_f32 -> %[[RED_B]] : !fir.ref<f32>)
+
+subroutine acc_reduction_max_float_array1d(a, b)
+  real :: a(100), b(100)
+  integer :: i
+
+  !$acc loop reduction(max:b)
+  do i = 1, 100
+    b(i) = max(b(i), a(i))
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_max_float_array1d(
+! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "b"})
+! CHECK:       %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "b"} 
+! CHECK:       acc.loop reduction(@reduction_max_ref_100xf32 -> %[[RED_ARG1]] : !fir.ref<!fir.array<100xf32>>) {
+