[SCEVExpander] Expand explicit PtrToInt casts just like we would impl…

…icit ones I.e., use GetOptimalInsertionPointForCastOf() helper to get the insertion point, and try to reuse casts first.
llvm · Apr 19, 2021 · ecc9d7e · ecc9d7e
1 parent 442c408
commit ecc9d7e
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 24 deletions.
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1691,7 +1691,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
 Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
   Value *V =
       expandCodeForImpl(S->getOperand(), S->getOperand()->getType(), false);
-  return Builder.CreatePtrToInt(V, S->getType());
+  return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
+                           GetOptimalInsertionPointForCastOf(V));
 }
 
 Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
@@ -40,6 +40,8 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; LV-LABEL: f1
 ; LV-LABEL: for.body.lver.check
 
+; LV-NEXT: [[A0:%[^ ]*]] = ptrtoint i16* %a to i64
+
 ; LV:      [[BETrunc:%[^ ]*]] = trunc i64 [[BE:%[^ ]*]] to i32
 ; LV-NEXT: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc]])
 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
@@ -55,7 +57,6 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
 
-; LV-NEXT: [[A0:%[^ ]*]] = ptrtoint i16* %a to i64
 
 ; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0

diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
@@ -9,6 +9,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %d, i32* noalias %e, i64 %N) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A2:%.*]] = ptrtoint i32* [[A:%.*]] to i64
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_CHECK:%.*]]
 ; CHECK:       for.body.lver.check:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -1
@@ -25,18 +26,17 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = or i1 false, [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint i32* [[A:%.*]] to i64
-; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
-; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp ugt i64 [[TMP13]], [[TMP11]]
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP12]], [[TMP11]]
-; CHECK-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]]
-; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
+; CHECK-NEXT:    [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
+; CHECK-NEXT:    [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0
+; CHECK-NEXT:    [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[A2]], [[MUL_RESULT4]]
+; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], [[A2]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp ult i64 [[TMP11]], [[A2]]
+; CHECK-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW5]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]]
+; CHECK-NEXT:    br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
 ; CHECK:       for.body.ph.lver.orig:
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; CHECK:       for.body.lver.orig:
@@ -97,10 +97,10 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
 ; CHECK-NEXT:    [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end.loopexit:
 ; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end.loopexit5:
+; CHECK:       for.end.loopexit6:
 ; CHECK-NEXT:    br label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -154,6 +154,7 @@ declare void @use64(i64)
 define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i32* noalias %e, i64 %N) {
 ; CHECK-LABEL: @f_with_offset(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GLOBAL_A:%.*]] = ptrtoint [8192 x i32]* @global_a to i64
 ; CHECK-NEXT:    [[A_BASE:%.*]] = getelementptr [8192 x i32], [8192 x i32]* @global_a, i32 0, i32 0
 ; CHECK-NEXT:    [[A_INTPTR:%.*]] = ptrtoint i32* [[A_BASE]] to i64
 ; CHECK-NEXT:    call void @use64(i64 [[A_INTPTR]])
@@ -174,17 +175,18 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = or i1 false, [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[GLOBAL_A]], 168
 ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
 ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add i64 add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168), [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168), [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168)
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp ult i64 [[TMP11]], add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168)
-; CHECK-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], [[MUL_RESULT3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 [[TMP11]], [[MUL_RESULT3]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp ugt i64 [[TMP13]], [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP12]], [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]]
+; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
 ; CHECK:       for.body.ph.lver.orig:
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; CHECK:       for.body.lver.orig: