[IndVars] Eliminate redundant type cast between integer and float

Recompute the range: match for fptosi of sitofp, and then query the range of the input to the sitofp according the comment on D129140. Fixes #55505. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D129191
llvm · Jul 8, 2022 · 716e1b8 · 716e1b8
1 parent 7b9a3b9
commit 716e1b8
Show file tree

Hide file tree

Showing 3 changed files with 150 additions and 2 deletions.
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -79,6 +79,7 @@ namespace {
 
     bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
     bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
+    bool replaceFloatIVWithIntegerIV(Instruction *UseInst);
 
     bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
     bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
@@ -673,6 +674,35 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
   return true;
 }
 
+/// Eliminate redundant type cast between integer and float.
+bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
+  if (UseInst->getOpcode() != CastInst::SIToFP)
+    return false;
+
+  Value *IVOperand = UseInst->getOperand(0);
+  // Get the symbolic expression for this instruction.
+  ConstantRange IVRange = SE->getSignedRange(SE->getSCEV(IVOperand));
+  unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
+  if (IVRange.getActiveBits() <= DestNumSigBits) {
+    for (User *U : UseInst->users()) {
+      // Match for fptosi of sitofp and with same type.
+      auto *CI = dyn_cast<FPToSIInst>(U);
+      if (!CI || IVOperand->getType() != CI->getType())
+        continue;
+
+      CI->replaceAllUsesWith(IVOperand);
+      DeadInsts.push_back(CI);
+      LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
+                        << " with: " << *IVOperand << '\n');
+
+      ++NumFoldedUser;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
 /// Eliminate any operation that SCEV can prove is an identity function.
 bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
                                            Instruction *IVOperand) {
@@ -896,6 +926,13 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
       }
     }
 
+    // Try to use integer induction for FPToSI of float induction directly.
+    if (replaceFloatIVWithIntegerIV(UseInst)) {
+      // Re-queue the potentially new direct uses of IVOperand.
+      pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
+      continue;
+    }
+
     CastInst *Cast = dyn_cast<CastInst>(UseInst);
     if (V && Cast) {
       V->visitCast(Cast);

diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-iv.ll
@@ -382,8 +382,7 @@ define void @pr55505_remove_redundant_fptosi_for_float_iv(i32 %index, ptr %dst)
 ; CHECK-NEXT:    [[FLOAT_IV_INT:%.*]] = phi i32 [ 1000, [[ENTRY:%.*]] ], [ [[FLOAT_IV_NEXT_INT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[INDVAR_CONV:%.*]] = sitofp i32 [[FLOAT_IV_INT]] to float
 ; CHECK-NEXT:    call void @use.float(float [[INDVAR_CONV]])
-; CHECK-NEXT:    [[CONV_I32:%.*]] = fptosi float [[INDVAR_CONV]] to i32
-; CHECK-NEXT:    call void @use.i32(i32 [[CONV_I32]])
+; CHECK-NEXT:    call void @use.i32(i32 [[FLOAT_IV_INT]])
 ; CHECK-NEXT:    [[CONV_I16:%.*]] = fptosi float [[INDVAR_CONV]] to i16
 ; CHECK-NEXT:    [[CONV_I64:%.*]] = fptosi float [[INDVAR_CONV]] to i64
 ; CHECK-NEXT:    call void @use.i16(i16 [[CONV_I16]])

diff --git a/llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll b/llvm/test/Transforms/IndVarSimplify/floating-point-small-iv.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+@array = dso_local global [16777219 x i32] zeroinitializer, align 4
+
+define void @small_const_bound(i32 %index) {
+; CHECK-LABEL: @small_const_bound(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV_INT:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IV_INT]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    store i32 [[IV_INT]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[DEC_INT]] = add nsw i32 [[IV_INT]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[DEC_INT]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv.int = phi i32 [ 100, %entry ], [ %dec.int, %for.body ]
+  %indvar.conv = sitofp i32 %iv.int to float
+  %conv = fptosi float %indvar.conv to i32
+  %idxprom = sext i32 %conv to i64
+  %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom
+  store i32 %conv, i32* %arrayidx, align 4
+  %dec.int = add nsw i32 %iv.int, -1
+  %cmp = icmp ugt i32 %dec.int, 0
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %for.body
+  ret void
+}
+
+; Negative test: The transform is *not* valid because there are too many significant bits
+define void @overflow_masked_const_bound(i32 %index) {
+; CHECK-LABEL: @overflow_masked_const_bound(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV_INT:%.*]] = phi i32 [ 16777218, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to float
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[INDVAR_CONV]] to i32
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[CONV]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    store i32 [[CONV]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[DEC_INT]] = add nsw i32 [[IV_INT]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[DEC_INT]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv.int = phi i32 [ 16777218, %entry ], [ %dec.int, %for.body ] ; intermediate 16777218 (= 1 << 24 + 2)
+  %indvar.conv = sitofp i32 %iv.int to float
+  %conv = fptosi float %indvar.conv to i32
+  %idxprom = sext i32 %conv to i64
+  %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom
+  store i32 %conv, i32* %arrayidx, align 4
+  %dec.int = add nsw i32 %iv.int, -1
+  %cmp = icmp ugt i32 %dec.int, 0
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %for.body
+  ret void
+}
+
+; Negative test: Type mismatch between the integer IV and the fptosi result
+define void @mismatch_type_const(i32 %index) {
+;
+; CHECK-LABEL: @mismatch_type_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV_INT:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[DEC_INT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[INDVAR_CONV:%.*]] = sitofp i32 [[IV_INT]] to float
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[INDVAR_CONV]] to i16
+; CHECK-NEXT:    [[IDXPROM32:%.*]] = sext i16 [[CONV]] to i32
+; CHECK-NEXT:    [[IDXPROM64:%.*]] = sext i16 [[CONV]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 [[IDXPROM64]]
+; CHECK-NEXT:    store i32 [[IDXPROM32]], i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[DEC_INT]] = add nsw i32 [[IV_INT]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[DEC_INT]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv.int = phi i32 [ 100, %entry ], [ %dec.int, %for.body ]
+  %indvar.conv = sitofp i32 %iv.int to float
+  %conv = fptosi float %indvar.conv to i16
+  %idxprom32 = sext i16 %conv to i32
+  %idxprom64 = sext i16 %conv to i64
+  %arrayidx = getelementptr inbounds [16777219 x i32], [16777219 x i32]* @array, i64 0, i64 %idxprom64
+  store i32 %idxprom32, i32* %arrayidx, align 4
+  %dec.int = add nsw i32 %iv.int, -1
+  %cmp = icmp ugt i32 %dec.int, 0
+  br i1 %cmp, label %for.body, label %cleanup
+
+cleanup:                                          ; preds = %for.body
+  ret void
+}