[SCCP] Tune cast instruction handling for overdefined operand

Extended value is known to be inside range smaller than full one. Prevent SCCP to mark such value as overdefined. Fixes PR52253 Differential Revision: https://reviews.llvm.org/D112721
llvm · Nov 8, 2021 · ce4fa93 · ce4fa93
1 parent 28b3cac
commit ce4fa93
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 53 deletions.
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -808,16 +808,24 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
     return;
 
   ValueLatticeElement OpSt = getValueState(I.getOperand(0));
+  if (OpSt.isUnknownOrUndef())
+    return;
+
   if (Constant *OpC = getConstant(OpSt)) {
     // Fold the constant as we build.
     Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
     if (isa<UndefValue>(C))
       return;
     // Propagate constant value
     markConstant(&I, C);
-  } else if (OpSt.isConstantRange() && I.getDestTy()->isIntegerTy()) {
+  } else if (I.getDestTy()->isIntegerTy()) {
     auto &LV = getValueState(&I);
-    ConstantRange OpRange = OpSt.getConstantRange();
+    ConstantRange OpRange =
+        OpSt.isConstantRange()
+            ? OpSt.getConstantRange()
+            : ConstantRange::getFull(
+                  I.getOperand(0)->getType()->getScalarSizeInBits());
+
     Type *DestTy = I.getDestTy();
     // Vectors where all elements have the same known constant range are treated
     // as a single constant range in the lattice. When bitcasting such vectors,
@@ -832,7 +840,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
     ConstantRange Res =
         OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy));
     mergeInValue(LV, &I, ValueLatticeElement::getRange(Res));
-  } else if (!OpSt.isUnknownOrUndef())
+  } else
     markOverdefined(&I);
 }
 

diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr52253.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr52253.ll
@@ -1,17 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -O3 -S < %s | FileCheck %s
 ; RUN: opt -instcombine -sccp -bdce -S < %s | FileCheck %s
-; RUN: opt -aggressive-instcombine -instcombine -sccp -bdce -S < %s | FileCheck %s --check-prefix=AIC_FIRST
+; RUN: opt -aggressive-instcombine -instcombine -sccp -bdce -S < %s | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define i1 @foo(i32 %t4, i32 %t10) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    ret i1 false
-;
-; AIC_FIRST-LABEL: @foo(
-; AIC_FIRST-NEXT:    ret i1 false
 ;
   %t09 = shl i32 %t10, 24
   %t010 = ashr exact i32 %t09, 24
@@ -27,28 +24,7 @@ define i1 @foo(i32 %t4, i32 %t10) {
 
 define i1 @bar(i32 %t4, i32 %t10) {
 ; CHECK-LABEL: @bar(
-; CHECK-NEXT:    [[T09:%.*]] = shl i32 [[T10:%.*]], 24
-; CHECK-NEXT:    [[T010:%.*]] = ashr exact i32 [[T09]], 24
-; CHECK-NEXT:    [[T011:%.*]] = add nsw i32 [[T010]], 979
-; CHECK-NEXT:    [[T11:%.*]] = trunc i32 [[T4:%.*]] to i8
-; CHECK-NEXT:    [[T12:%.*]] = icmp eq i8 [[T11]], 0
-; CHECK-NEXT:    [[T14:%.*]] = zext i1 [[T12]] to i8
-; CHECK-NEXT:    [[T15:%.*]] = shl i8 [[T11]], [[T14]]
-; CHECK-NEXT:    [[T17:%.*]] = zext i8 [[T15]] to i32
-; CHECK-NEXT:    [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
-; CHECK-NEXT:    ret i1 [[T18]]
-;
-; AIC_FIRST-LABEL: @bar(
-; AIC_FIRST-NEXT:    [[T09:%.*]] = shl i32 [[T10:%.*]], 24
-; AIC_FIRST-NEXT:    [[T010:%.*]] = ashr exact i32 [[T09]], 24
-; AIC_FIRST-NEXT:    [[T011:%.*]] = add nsw i32 [[T010]], 979
-; AIC_FIRST-NEXT:    [[T11:%.*]] = trunc i32 [[T4:%.*]] to i8
-; AIC_FIRST-NEXT:    [[T12:%.*]] = icmp eq i8 [[T11]], 0
-; AIC_FIRST-NEXT:    [[T14:%.*]] = zext i1 [[T12]] to i8
-; AIC_FIRST-NEXT:    [[T15:%.*]] = shl i8 [[T11]], [[T14]]
-; AIC_FIRST-NEXT:    [[T17:%.*]] = zext i8 [[T15]] to i32
-; AIC_FIRST-NEXT:    [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
-; AIC_FIRST-NEXT:    ret i1 [[T18]]
+; CHECK-NEXT:    ret i1 false
 ;
   %t09 = shl i32 %t10, 24
   %t010 = ashr exact i32 %t09, 24
@@ -65,18 +41,6 @@ define i1 @bar(i32 %t4, i32 %t10) {
 define i1 @foobar(i32 %t4, i32 %t10) {
 ; CHECK-LABEL: @foobar(
 ; CHECK-NEXT:    ret i1 false
-;
-; AIC_FIRST-LABEL: @foobar(
-; AIC_FIRST-NEXT:    [[T09:%.*]] = shl i32 [[T10:%.*]], 24
-; AIC_FIRST-NEXT:    [[T010:%.*]] = ashr exact i32 [[T09]], 24
-; AIC_FIRST-NEXT:    [[T011:%.*]] = add nsw i32 [[T010]], 979
-; AIC_FIRST-NEXT:    [[T11:%.*]] = trunc i32 [[T4:%.*]] to i8
-; AIC_FIRST-NEXT:    [[T12:%.*]] = icmp eq i8 [[T11]], 0
-; AIC_FIRST-NEXT:    [[T14:%.*]] = zext i1 [[T12]] to i8
-; AIC_FIRST-NEXT:    [[T15:%.*]] = shl i8 [[T11]], [[T14]]
-; AIC_FIRST-NEXT:    [[T17:%.*]] = zext i8 [[T15]] to i32
-; AIC_FIRST-NEXT:    [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
-; AIC_FIRST-NEXT:    ret i1 [[T18]]
 ;
   %t09 = shl i32 %t10, 24
   %t010 = ashr exact i32 %t09, 24

diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll
@@ -318,8 +318,8 @@ entry:
 
 define internal i64 @f.sext_to_zext(i32 %t) {
 ; CHECK-LABEL: @f.sext_to_zext(
-; CHECK-NEXT:    [[A:%.*]] = sext i32 [[T:%.*]] to i64
-; CHECK-NEXT:    ret i64 [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[T:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %a = sext i32 %t to i64
   ret i64 %a

diff --git a/llvm/test/Transforms/SCCP/overdefined-ext.ll b/llvm/test/Transforms/SCCP/overdefined-ext.ll
@@ -4,8 +4,7 @@
 define i32 @zext_lshr(i1 %t0) {
 ; CHECK-LABEL: @zext_lshr(
 ; CHECK-NEXT:    [[T1:%.*]] = zext i1 [[T0:%.*]] to i32
-; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[T1]], 1
-; CHECK-NEXT:    ret i32 [[T2]]
+; CHECK-NEXT:    ret i32 0
 ;
   %t1 = zext i1 %t0 to i32
   %t2 = lshr i32 %t1, 1
@@ -15,14 +14,15 @@ define i32 @zext_lshr(i1 %t0) {
 define i1 @zext_icmp(i1 %t0) {
 ; CHECK-LABEL: @zext_icmp(
 ; CHECK-NEXT:    [[T1:%.*]] = zext i1 [[T0:%.*]] to i32
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 2
-; CHECK-NEXT:    ret i1 [[T2]]
+; CHECK-NEXT:    ret i1 false
 ;
   %t1 = zext i1 %t0 to i32
   %t2 = icmp eq i32 %t1, 2
   ret i1 %t2
 }
 
+; negative test. SCCP operates poorly with vector ranges
+
 define <2 x i1> @zext_vector(<2 x i1> %t0) {
 ; CHECK-LABEL: @zext_vector(
 ; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32>
@@ -34,6 +34,8 @@ define <2 x i1> @zext_vector(<2 x i1> %t0) {
   ret <2 x i1> %t2
 }
 
+; negative test. SCCP operates poorly with vector ranges
+
 define <2 x i1> @zext_vector2(<2 x i1> %t0) {
 ; CHECK-LABEL: @zext_vector2(
 ; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32>
@@ -47,6 +49,8 @@ define <2 x i1> @zext_vector2(<2 x i1> %t0) {
   ret <2 x i1> %t3
 }
 
+; negative test: %t2 can be replaced by %t1, but SCCP operates by ranges only
+
 define i32 @sext_ashr(i1 %t0) {
 ; CHECK-LABEL: @sext_ashr(
 ; CHECK-NEXT:    [[T1:%.*]] = sext i1 [[T0:%.*]] to i32
@@ -61,14 +65,15 @@ define i32 @sext_ashr(i1 %t0) {
 define i1 @sext_icmp(i1 %t0) {
 ; CHECK-LABEL: @sext_icmp(
 ; CHECK-NEXT:    [[T1:%.*]] = sext i1 [[T0:%.*]] to i32
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 2
-; CHECK-NEXT:    ret i1 [[T2]]
+; CHECK-NEXT:    ret i1 false
 ;
   %t1 = sext i1 %t0 to i32
   %t2 = icmp eq i32 %t1, 2
   ret i1 %t2
 }
 
+; negative test. SCCP operates poorly with vector ranges
+
 define <2 x i1> @sext_vector(<2 x i1> %t0) {
 ; CHECK-LABEL: @sext_vector(
 ; CHECK-NEXT:    [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32>
@@ -80,6 +85,8 @@ define <2 x i1> @sext_vector(<2 x i1> %t0) {
   ret <2 x i1> %t2
 }
 
+; negative test. SCCP operates poorly with vector ranges
+
 define <2 x i1> @sext_vector2(<2 x i1> %t0) {
 ; CHECK-LABEL: @sext_vector2(
 ; CHECK-NEXT:    [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32>

diff --git a/llvm/test/Transforms/SCCP/pr52253.ll b/llvm/test/Transforms/SCCP/pr52253.ll
@@ -35,8 +35,7 @@ define i1 @bar(i32 %t4, i32 %t10) {
 ; CHECK-NEXT:    [[T14:%.*]] = zext i1 [[T12]] to i8
 ; CHECK-NEXT:    [[T15:%.*]] = shl i8 [[T11]], [[T14]]
 ; CHECK-NEXT:    [[T17:%.*]] = zext i8 [[T15]] to i32
-; CHECK-NEXT:    [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
-; CHECK-NEXT:    ret i1 [[T18]]
+; CHECK-NEXT:    ret i1 false
 ;
   %t09 = shl i32 %t10, 24
   %t010 = ashr exact i32 %t09, 24
@@ -62,8 +61,7 @@ define i1 @foobar(i32 %t4, i32 %t10) {
 ; CHECK-NEXT:    [[T15:%.*]] = shl nuw nsw i32 [[T13]], [[T14]]
 ; CHECK-NEXT:    [[T16:%.*]] = trunc i32 [[T15]] to i8
 ; CHECK-NEXT:    [[T17:%.*]] = zext i8 [[T16]] to i32
-; CHECK-NEXT:    [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
-; CHECK-NEXT:    ret i1 [[T18]]
+; CHECK-NEXT:    ret i1 false
 ;
   %t09 = shl i32 %t10, 24
   %t010 = ashr exact i32 %t09, 24