[FLANG] Change loop versioning to use shift instead of divide

Despite me being convinced that the use of divide didn't produce any divide instructions, it does in fact add more instructions than using a plain shift operation. This patch simply changes the divide to a shift right, with an assert to check that the "divisor" is a power of two. Reviewed By: kiranchandramohan, tblah Differential Revision: https://reviews.llvm.org/D151880
llvm · Jun 1, 2023 · b812932 · b812932
1 parent 8dc1395
commit b812932
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 7 deletions.
diff --git a/flang/lib/Optimizer/Transforms/LoopVersioning.cpp b/flang/lib/Optimizer/Transforms/LoopVersioning.cpp
@@ -262,15 +262,19 @@ void LoopVersioningPass::runOnOperation() {
                                             loc, curIndex, totalIndex)
                                       : curIndex;
           }
-          mlir::Value elemSize =
-              builder.createIntegerConstant(loc, idxTy, arg.size);
           // This is the lowest dimension - which doesn't need scaling
           mlir::Value finalIndex =
               builder.createConvert(loc, idxTy, coop->getOperand(1));
           if (totalIndex) {
+            assert(llvm::isPowerOf2_32(arg.size) &&
+                   "Expected power of two here");
+            unsigned bits = llvm::Log2_32(arg.size);
+            mlir::Value elemShift =
+                builder.createIntegerConstant(loc, idxTy, bits);
             totalIndex = builder.create<mlir::arith::AddIOp>(
                 loc,
-                builder.create<mlir::arith::DivSIOp>(loc, totalIndex, elemSize),
+                builder.create<mlir::arith::ShRSIOp>(loc, totalIndex,
+                                                     elemShift),
                 finalIndex);
           } else {
             totalIndex = finalIndex;

diff --git a/flang/test/Transforms/loop-versioning.fir b/flang/test/Transforms/loop-versioning.fir
@@ -366,9 +366,9 @@ func.func @sum1dfixed(%arg0: !fir.ref<!fir.array<?xf64>> {fir.bindc_name = "a"},
 // Check the 2D -> 1D coordinate conversion, should have a multiply and a final add.
 // Some other operations are checked to synch the different parts.
 // CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}}
-// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index
 // CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}}
-// CHECK: %[[OUTER_DIV:.*]] = arith.divsi %[[OUTER_IDX]], %[[ITEMSIZE]]
+// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index
+// CHECK: %[[OUTER_DIV:.*]] = arith.shrsi %[[OUTER_IDX]], %[[ITEMSHIFT]]
 // CHECK: %[[C2D:.*]] = arith.addi %[[OUTER_DIV]], %[[INNER_IDX]]
 // CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C2D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
 // CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>
@@ -498,9 +498,9 @@ func.func @sum1dfixed(%arg0: !fir.ref<!fir.array<?xf64>> {fir.bindc_name = "a"},
 // CHECK: %[[OUTER_IDX:.*]] = arith.muli %[[DIMS2]]#2, {{.*}}
 // CHECK: %[[MIDDLE_IDX:.*]] = arith.muli %[[DIMS1]]#2, {{.*}}
 // CHECK: %[[MIDDLE_SUM:.*]] = arith.addi %[[MIDDLE_IDX]], %[[OUTER_IDX]]
-// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index
 // CHECK: %[[INNER_IDX:.*]] = fir.convert {{.*}}
-// CHECK: %[[MIDDLE_DIV:.*]] = arith.divsi %[[MIDDLE_SUM]], %[[ITEMSIZE]]
+// CHECK: %[[ITEMSHIFT:.*]] = arith.constant 3 : index
+// CHECK: %[[MIDDLE_DIV:.*]] = arith.shrsi %[[MIDDLE_SUM]], %[[ITEMSHIFT]]
 // CHECK: %[[C3D:.*]] = arith.addi %[[MIDDLE_DIV]], %[[INNER_IDX]]
 // CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C3D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
 // CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>