Skip to content

Commit

Permalink
[SCCP] Tune cast instruction handling for overdefined operand
Browse files Browse the repository at this point in the history
Extended value is known to be inside range smaller than full one.
Prevent SCCP to mark such value as overdefined.

Fixes PR52253

Differential Revision: https://reviews.llvm.org/D112721
  • Loading branch information
anton-afanasyev committed Nov 8, 2021
1 parent 28b3cac commit ce4fa93
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 53 deletions.
14 changes: 11 additions & 3 deletions llvm/lib/Transforms/Utils/SCCPSolver.cpp
Expand Up @@ -808,16 +808,24 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
return;

ValueLatticeElement OpSt = getValueState(I.getOperand(0));
if (OpSt.isUnknownOrUndef())
return;

if (Constant *OpC = getConstant(OpSt)) {
// Fold the constant as we build.
Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
if (isa<UndefValue>(C))
return;
// Propagate constant value
markConstant(&I, C);
} else if (OpSt.isConstantRange() && I.getDestTy()->isIntegerTy()) {
} else if (I.getDestTy()->isIntegerTy()) {
auto &LV = getValueState(&I);
ConstantRange OpRange = OpSt.getConstantRange();
ConstantRange OpRange =
OpSt.isConstantRange()
? OpSt.getConstantRange()
: ConstantRange::getFull(
I.getOperand(0)->getType()->getScalarSizeInBits());

Type *DestTy = I.getDestTy();
// Vectors where all elements have the same known constant range are treated
// as a single constant range in the lattice. When bitcasting such vectors,
Expand All @@ -832,7 +840,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
ConstantRange Res =
OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy));
mergeInValue(LV, &I, ValueLatticeElement::getRange(Res));
} else if (!OpSt.isUnknownOrUndef())
} else
markOverdefined(&I);
}

Expand Down
40 changes: 2 additions & 38 deletions llvm/test/Transforms/PhaseOrdering/X86/pr52253.ll
@@ -1,17 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -O3 -S < %s | FileCheck %s
; RUN: opt -instcombine -sccp -bdce -S < %s | FileCheck %s
; RUN: opt -aggressive-instcombine -instcombine -sccp -bdce -S < %s | FileCheck %s --check-prefix=AIC_FIRST
; RUN: opt -aggressive-instcombine -instcombine -sccp -bdce -S < %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i1 @foo(i32 %t4, i32 %t10) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: ret i1 false
;
; AIC_FIRST-LABEL: @foo(
; AIC_FIRST-NEXT: ret i1 false
;
%t09 = shl i32 %t10, 24
%t010 = ashr exact i32 %t09, 24
Expand All @@ -27,28 +24,7 @@ define i1 @foo(i32 %t4, i32 %t10) {

define i1 @bar(i32 %t4, i32 %t10) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: [[T09:%.*]] = shl i32 [[T10:%.*]], 24
; CHECK-NEXT: [[T010:%.*]] = ashr exact i32 [[T09]], 24
; CHECK-NEXT: [[T011:%.*]] = add nsw i32 [[T010]], 979
; CHECK-NEXT: [[T11:%.*]] = trunc i32 [[T4:%.*]] to i8
; CHECK-NEXT: [[T12:%.*]] = icmp eq i8 [[T11]], 0
; CHECK-NEXT: [[T14:%.*]] = zext i1 [[T12]] to i8
; CHECK-NEXT: [[T15:%.*]] = shl i8 [[T11]], [[T14]]
; CHECK-NEXT: [[T17:%.*]] = zext i8 [[T15]] to i32
; CHECK-NEXT: [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
; CHECK-NEXT: ret i1 [[T18]]
;
; AIC_FIRST-LABEL: @bar(
; AIC_FIRST-NEXT: [[T09:%.*]] = shl i32 [[T10:%.*]], 24
; AIC_FIRST-NEXT: [[T010:%.*]] = ashr exact i32 [[T09]], 24
; AIC_FIRST-NEXT: [[T011:%.*]] = add nsw i32 [[T010]], 979
; AIC_FIRST-NEXT: [[T11:%.*]] = trunc i32 [[T4:%.*]] to i8
; AIC_FIRST-NEXT: [[T12:%.*]] = icmp eq i8 [[T11]], 0
; AIC_FIRST-NEXT: [[T14:%.*]] = zext i1 [[T12]] to i8
; AIC_FIRST-NEXT: [[T15:%.*]] = shl i8 [[T11]], [[T14]]
; AIC_FIRST-NEXT: [[T17:%.*]] = zext i8 [[T15]] to i32
; AIC_FIRST-NEXT: [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
; AIC_FIRST-NEXT: ret i1 [[T18]]
; CHECK-NEXT: ret i1 false
;
%t09 = shl i32 %t10, 24
%t010 = ashr exact i32 %t09, 24
Expand All @@ -65,18 +41,6 @@ define i1 @bar(i32 %t4, i32 %t10) {
define i1 @foobar(i32 %t4, i32 %t10) {
; CHECK-LABEL: @foobar(
; CHECK-NEXT: ret i1 false
;
; AIC_FIRST-LABEL: @foobar(
; AIC_FIRST-NEXT: [[T09:%.*]] = shl i32 [[T10:%.*]], 24
; AIC_FIRST-NEXT: [[T010:%.*]] = ashr exact i32 [[T09]], 24
; AIC_FIRST-NEXT: [[T011:%.*]] = add nsw i32 [[T010]], 979
; AIC_FIRST-NEXT: [[T11:%.*]] = trunc i32 [[T4:%.*]] to i8
; AIC_FIRST-NEXT: [[T12:%.*]] = icmp eq i8 [[T11]], 0
; AIC_FIRST-NEXT: [[T14:%.*]] = zext i1 [[T12]] to i8
; AIC_FIRST-NEXT: [[T15:%.*]] = shl i8 [[T11]], [[T14]]
; AIC_FIRST-NEXT: [[T17:%.*]] = zext i8 [[T15]] to i32
; AIC_FIRST-NEXT: [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
; AIC_FIRST-NEXT: ret i1 [[T18]]
;
%t09 = shl i32 %t10, 24
%t010 = ashr exact i32 %t09, 24
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/SCCP/ip-ranges-casts.ll
Expand Up @@ -318,8 +318,8 @@ entry:

define internal i64 @f.sext_to_zext(i32 %t) {
; CHECK-LABEL: @f.sext_to_zext(
; CHECK-NEXT: [[A:%.*]] = sext i32 [[T:%.*]] to i64
; CHECK-NEXT: ret i64 [[A]]
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[T:%.*]] to i64
; CHECK-NEXT: ret i64 [[TMP1]]
;
%a = sext i32 %t to i64
ret i64 %a
Expand Down
19 changes: 13 additions & 6 deletions llvm/test/Transforms/SCCP/overdefined-ext.ll
Expand Up @@ -4,8 +4,7 @@
define i32 @zext_lshr(i1 %t0) {
; CHECK-LABEL: @zext_lshr(
; CHECK-NEXT: [[T1:%.*]] = zext i1 [[T0:%.*]] to i32
; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[T1]], 1
; CHECK-NEXT: ret i32 [[T2]]
; CHECK-NEXT: ret i32 0
;
%t1 = zext i1 %t0 to i32
%t2 = lshr i32 %t1, 1
Expand All @@ -15,14 +14,15 @@ define i32 @zext_lshr(i1 %t0) {
define i1 @zext_icmp(i1 %t0) {
; CHECK-LABEL: @zext_icmp(
; CHECK-NEXT: [[T1:%.*]] = zext i1 [[T0:%.*]] to i32
; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 2
; CHECK-NEXT: ret i1 [[T2]]
; CHECK-NEXT: ret i1 false
;
%t1 = zext i1 %t0 to i32
%t2 = icmp eq i32 %t1, 2
ret i1 %t2
}

; negative test. SCCP operates poorly with vector ranges

define <2 x i1> @zext_vector(<2 x i1> %t0) {
; CHECK-LABEL: @zext_vector(
; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32>
Expand All @@ -34,6 +34,8 @@ define <2 x i1> @zext_vector(<2 x i1> %t0) {
ret <2 x i1> %t2
}

; negative test. SCCP operates poorly with vector ranges

define <2 x i1> @zext_vector2(<2 x i1> %t0) {
; CHECK-LABEL: @zext_vector2(
; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32>
Expand All @@ -47,6 +49,8 @@ define <2 x i1> @zext_vector2(<2 x i1> %t0) {
ret <2 x i1> %t3
}

; negative test: %t2 can be replaced by %t1, but SCCP operates by ranges only

define i32 @sext_ashr(i1 %t0) {
; CHECK-LABEL: @sext_ashr(
; CHECK-NEXT: [[T1:%.*]] = sext i1 [[T0:%.*]] to i32
Expand All @@ -61,14 +65,15 @@ define i32 @sext_ashr(i1 %t0) {
define i1 @sext_icmp(i1 %t0) {
; CHECK-LABEL: @sext_icmp(
; CHECK-NEXT: [[T1:%.*]] = sext i1 [[T0:%.*]] to i32
; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 2
; CHECK-NEXT: ret i1 [[T2]]
; CHECK-NEXT: ret i1 false
;
%t1 = sext i1 %t0 to i32
%t2 = icmp eq i32 %t1, 2
ret i1 %t2
}

; negative test. SCCP operates poorly with vector ranges

define <2 x i1> @sext_vector(<2 x i1> %t0) {
; CHECK-LABEL: @sext_vector(
; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32>
Expand All @@ -80,6 +85,8 @@ define <2 x i1> @sext_vector(<2 x i1> %t0) {
ret <2 x i1> %t2
}

; negative test. SCCP operates poorly with vector ranges

define <2 x i1> @sext_vector2(<2 x i1> %t0) {
; CHECK-LABEL: @sext_vector2(
; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32>
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Transforms/SCCP/pr52253.ll
Expand Up @@ -35,8 +35,7 @@ define i1 @bar(i32 %t4, i32 %t10) {
; CHECK-NEXT: [[T14:%.*]] = zext i1 [[T12]] to i8
; CHECK-NEXT: [[T15:%.*]] = shl i8 [[T11]], [[T14]]
; CHECK-NEXT: [[T17:%.*]] = zext i8 [[T15]] to i32
; CHECK-NEXT: [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
; CHECK-NEXT: ret i1 [[T18]]
; CHECK-NEXT: ret i1 false
;
%t09 = shl i32 %t10, 24
%t010 = ashr exact i32 %t09, 24
Expand All @@ -62,8 +61,7 @@ define i1 @foobar(i32 %t4, i32 %t10) {
; CHECK-NEXT: [[T15:%.*]] = shl nuw nsw i32 [[T13]], [[T14]]
; CHECK-NEXT: [[T16:%.*]] = trunc i32 [[T15]] to i8
; CHECK-NEXT: [[T17:%.*]] = zext i8 [[T16]] to i32
; CHECK-NEXT: [[T18:%.*]] = icmp eq i32 [[T011]], [[T17]]
; CHECK-NEXT: ret i1 [[T18]]
; CHECK-NEXT: ret i1 false
;
%t09 = shl i32 %t10, 24
%t010 = ashr exact i32 %t09, 24
Expand Down

0 comments on commit ce4fa93

Please sign in to comment.