Skip to content

Commit

Permalink
[InstCombine] Fold Ext(i1) Pred shr(A, BW - 1) => i1 Pred A s< 0 (#68244
Browse files Browse the repository at this point in the history
)

Resolves #67916 .
This patch folds `Ext(icmp (A, xxx)) Pred shr(A, BW - 1)` into `i1 Pred
A s< 0`.
[Alive2](https://alive2.llvm.org/ce/z/k53Xwa).
  • Loading branch information
XChy committed Oct 13, 2023
1 parent d2aa523 commit b22917e
Show file tree
Hide file tree
Showing 3 changed files with 249 additions and 72 deletions.
56 changes: 27 additions & 29 deletions llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5390,35 +5390,6 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
}

// Test if 2 values have different or same signbits:
// (X u>> BitWidth - 1) == zext (Y s> -1) --> (X ^ Y) < 0
// (X u>> BitWidth - 1) != zext (Y s> -1) --> (X ^ Y) > -1
// (X s>> BitWidth - 1) == sext (Y s> -1) --> (X ^ Y) < 0
// (X s>> BitWidth - 1) != sext (Y s> -1) --> (X ^ Y) > -1
Instruction *ExtI;
if (match(Op1, m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(A)))) &&
(Op0->hasOneUse() || Op1->hasOneUse())) {
unsigned OpWidth = Op0->getType()->getScalarSizeInBits();
Instruction *ShiftI;
Value *X, *Y;
ICmpInst::Predicate Pred2;
if (match(Op0, m_CombineAnd(m_Instruction(ShiftI),
m_Shr(m_Value(X),
m_SpecificIntAllowUndef(OpWidth - 1)))) &&
match(A, m_ICmp(Pred2, m_Value(Y), m_AllOnes())) &&
Pred2 == ICmpInst::ICMP_SGT && X->getType() == Y->getType()) {
unsigned ExtOpc = ExtI->getOpcode();
unsigned ShiftOpc = ShiftI->getOpcode();
if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) ||
(ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) {
Value *Xor = Builder.CreateXor(X, Y, "xor.signbits");
Value *R = (Pred == ICmpInst::ICMP_EQ) ? Builder.CreateIsNeg(Xor)
: Builder.CreateIsNotNeg(Xor);
return replaceInstUsesWith(I, R);
}
}
}

// (A >> C) == (B >> C) --> (A^B) u< (1 << C)
// For lshr and ashr pairs.
const APInt *AP1, *AP2;
Expand Down Expand Up @@ -7194,6 +7165,33 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this))
return R;
}

Value *X, *Y;
// Signbit test folds
// Fold (X u>> BitWidth - 1 Pred ZExt(i1)) --> X s< 0 Pred i1
// Fold (X s>> BitWidth - 1 Pred SExt(i1)) --> X s< 0 Pred i1
Instruction *ExtI;
if ((I.isUnsigned() || I.isEquality()) &&
match(Op1,
m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(Y)))) &&
Y->getType()->getScalarSizeInBits() == 1 &&
(Op0->hasOneUse() || Op1->hasOneUse())) {
unsigned OpWidth = Op0->getType()->getScalarSizeInBits();
Instruction *ShiftI;
if (match(Op0, m_CombineAnd(m_Instruction(ShiftI),
m_Shr(m_Value(X), m_SpecificIntAllowUndef(
OpWidth - 1))))) {
unsigned ExtOpc = ExtI->getOpcode();
unsigned ShiftOpc = ShiftI->getOpcode();
if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) ||
(ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) {
Value *SLTZero =
Builder.CreateICmpSLT(X, Constant::getNullValue(X->getType()));
Value *Cmp = Builder.CreateICmp(Pred, SLTZero, Y, I.getName());
return replaceInstUsesWith(I, Cmp);
}
}
}
}

if (Instruction *Res = foldICmpEquality(I))
Expand Down
161 changes: 118 additions & 43 deletions llvm/test/Transforms/InstCombine/icmp-shr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1302,9 +1302,9 @@ define i1 @lshr_neg_sgt_zero(i8 %x) {

define i1 @exactly_one_set_signbit(i8 %x, i8 %y) {
; CHECK-LABEL: @exactly_one_set_signbit(
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[XOR_SIGNBITS]], 0
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[TMP2]]
;
%xsign = lshr i8 %x, 7
%ypos = icmp sgt i8 %y, -1
Expand All @@ -1317,9 +1317,9 @@ define i1 @exactly_one_set_signbit_use1(i8 %x, i8 %y) {
; CHECK-LABEL: @exactly_one_set_signbit_use1(
; CHECK-NEXT: [[XSIGN:%.*]] = lshr i8 [[X:%.*]], 7
; CHECK-NEXT: call void @use(i8 [[XSIGN]])
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[XOR_SIGNBITS]], 0
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[TMP2]]
;
%xsign = lshr i8 %x, 7
call void @use(i8 %xsign)
Expand All @@ -1331,9 +1331,9 @@ define i1 @exactly_one_set_signbit_use1(i8 %x, i8 %y) {

define <2 x i1> @same_signbit(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @same_signbit(
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR_SIGNBITS]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[R1]]
;
%xsign = lshr <2 x i8> %x, <i8 7, i8 7>
%ypos = icmp sgt <2 x i8> %y, <i8 -1, i8 -1>
Expand All @@ -1347,9 +1347,9 @@ define i1 @same_signbit_use2(i8 %x, i8 %y) {
; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YPOSZ:%.*]] = zext i1 [[YPOS]] to i8
; CHECK-NEXT: call void @use(i8 [[YPOSZ]])
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X:%.*]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[XOR_SIGNBITS]], -1
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y]]
; CHECK-NEXT: [[R1:%.*]] = icmp sgt i8 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R1]]
;
%xsign = lshr i8 %x, 7
%ypos = icmp sgt i8 %y, -1
Expand Down Expand Up @@ -1382,9 +1382,10 @@ define i1 @same_signbit_use3(i8 %x, i8 %y) {

define <2 x i1> @same_signbit_poison_elts(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @same_signbit_poison_elts(
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR_SIGNBITS]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[R]]
; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], <i8 -1, i8 poison>
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[R1:%.*]] = xor <2 x i1> [[TMP1]], [[YPOS]]
; CHECK-NEXT: ret <2 x i1> [[R1]]
;
%xsign = lshr <2 x i8> %x, <i8 7, i8 poison>
%ypos = icmp sgt <2 x i8> %y, <i8 -1, i8 poison>
Expand All @@ -1397,11 +1398,10 @@ define <2 x i1> @same_signbit_poison_elts(<2 x i8> %x, <2 x i8> %y) {

define i1 @same_signbit_wrong_type(i8 %x, i32 %y) {
; CHECK-LABEL: @same_signbit_wrong_type(
; CHECK-NEXT: [[XSIGN:%.*]] = lshr i8 [[X:%.*]], 7
; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i32 [[Y:%.*]], -1
; CHECK-NEXT: [[YPOSZ:%.*]] = zext i1 [[YPOS]] to i8
; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[XSIGN]], [[YPOSZ]]
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[R1:%.*]] = xor i1 [[TMP1]], [[YPOS]]
; CHECK-NEXT: ret i1 [[R1]]
;
%xsign = lshr i8 %x, 7
%ypos = icmp sgt i32 %y, -1
Expand Down Expand Up @@ -1450,11 +1450,9 @@ define i1 @exactly_one_set_signbit_wrong_shr(i8 %x, i8 %y) {

define i1 @exactly_one_set_signbit_wrong_pred(i8 %x, i8 %y) {
; CHECK-LABEL: @exactly_one_set_signbit_wrong_pred(
; CHECK-NEXT: [[XSIGN:%.*]] = lshr i8 [[X:%.*]], 7
; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YPOSZ:%.*]] = zext i1 [[YPOS]] to i8
; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[XSIGN]], [[YPOSZ]]
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[R1:%.*]] = icmp slt i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R1]]
;
%xsign = lshr i8 %x, 7
%ypos = icmp sgt i8 %y, -1
Expand All @@ -1465,9 +1463,9 @@ define i1 @exactly_one_set_signbit_wrong_pred(i8 %x, i8 %y) {

define i1 @exactly_one_set_signbit_signed(i8 %x, i8 %y) {
; CHECK-LABEL: @exactly_one_set_signbit_signed(
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[XOR_SIGNBITS]], 0
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[TMP2]]
;
%xsign = ashr i8 %x, 7
%ypos = icmp sgt i8 %y, -1
Expand All @@ -1480,9 +1478,9 @@ define i1 @exactly_one_set_signbit_use1_signed(i8 %x, i8 %y) {
; CHECK-LABEL: @exactly_one_set_signbit_use1_signed(
; CHECK-NEXT: [[XSIGN:%.*]] = ashr i8 [[X:%.*]], 7
; CHECK-NEXT: call void @use(i8 [[XSIGN]])
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[XOR_SIGNBITS]], 0
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X]], [[Y:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[TMP2]]
;
%xsign = ashr i8 %x, 7
call void @use(i8 %xsign)
Expand All @@ -1494,9 +1492,9 @@ define i1 @exactly_one_set_signbit_use1_signed(i8 %x, i8 %y) {

define <2 x i1> @same_signbit_signed(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @same_signbit_signed(
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR_SIGNBITS]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[R1]]
;
%xsign = ashr <2 x i8> %x, <i8 7, i8 7>
%ypos = icmp sgt <2 x i8> %y, <i8 -1, i8 -1>
Expand All @@ -1510,9 +1508,9 @@ define i1 @same_signbit_use2_signed(i8 %x, i8 %y) {
; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i8 [[Y:%.*]], -1
; CHECK-NEXT: [[YPOSZ:%.*]] = sext i1 [[YPOS]] to i8
; CHECK-NEXT: call void @use(i8 [[YPOSZ]])
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor i8 [[X:%.*]], [[Y]]
; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[XOR_SIGNBITS]], -1
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[X:%.*]], [[Y]]
; CHECK-NEXT: [[R1:%.*]] = icmp sgt i8 [[TMP1]], -1
; CHECK-NEXT: ret i1 [[R1]]
;
%xsign = ashr i8 %x, 7
%ypos = icmp sgt i8 %y, -1
Expand Down Expand Up @@ -1545,9 +1543,10 @@ define i1 @same_signbit_use3_signed(i8 %x, i8 %y) {

define <2 x i1> @same_signbit_poison_elts_signed(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @same_signbit_poison_elts_signed(
; CHECK-NEXT: [[XOR_SIGNBITS:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i8> [[XOR_SIGNBITS]], <i8 -1, i8 -1>
; CHECK-NEXT: ret <2 x i1> [[R]]
; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], <i8 -1, i8 poison>
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[R1:%.*]] = xor <2 x i1> [[TMP1]], [[YPOS]]
; CHECK-NEXT: ret <2 x i1> [[R1]]
;
%xsign = ashr <2 x i8> %x, <i8 7, i8 poison>
%ypos = icmp sgt <2 x i8> %y, <i8 -1, i8 poison>
Expand All @@ -1560,11 +1559,10 @@ define <2 x i1> @same_signbit_poison_elts_signed(<2 x i8> %x, <2 x i8> %y) {

define i1 @same_signbit_wrong_type_signed(i8 %x, i32 %y) {
; CHECK-LABEL: @same_signbit_wrong_type_signed(
; CHECK-NEXT: [[XSIGN:%.*]] = ashr i8 [[X:%.*]], 7
; CHECK-NEXT: [[YPOS:%.*]] = icmp sgt i32 [[Y:%.*]], -1
; CHECK-NEXT: [[YPOSZ:%.*]] = sext i1 [[YPOS]] to i8
; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[XSIGN]], [[YPOSZ]]
; CHECK-NEXT: ret i1 [[R]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
; CHECK-NEXT: [[R1:%.*]] = xor i1 [[TMP1]], [[YPOS]]
; CHECK-NEXT: ret i1 [[R1]]
;
%xsign = ashr i8 %x, 7
%ypos = icmp sgt i32 %y, -1
Expand All @@ -1589,3 +1587,80 @@ define i1 @exactly_one_set_signbit_wrong_shamt_signed(i8 %x, i8 %y) {
%r = icmp eq i8 %xsign, %yposz
ret i1 %r
}

define i1 @slt_zero_ult_i1(i32 %a, i1 %b) {
; CHECK-LABEL: @slt_zero_ult_i1(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[B:%.*]], true
; CHECK-NEXT: [[CMP21:%.*]] = and i1 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i1 [[CMP21]]
;
%conv = zext i1 %b to i32
%cmp1 = lshr i32 %a, 31
%cmp2 = icmp ult i32 %conv, %cmp1
ret i1 %cmp2
}

define i1 @slt_zero_ult_i1_fail1(i32 %a, i1 %b) {
; CHECK-LABEL: @slt_zero_ult_i1_fail1(
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[B:%.*]] to i32
; CHECK-NEXT: [[CMP1:%.*]] = lshr i32 [[A:%.*]], 30
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[CMP1]], [[CONV]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%conv = zext i1 %b to i32
%cmp1 = lshr i32 %a, 30
%cmp2 = icmp ult i32 %conv, %cmp1
ret i1 %cmp2
}

define i1 @slt_zero_ult_i1_fail2(i32 %a, i1 %b) {
; CHECK-LABEL: @slt_zero_ult_i1_fail2(
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[B:%.*]] to i32
; CHECK-NEXT: [[CMP1:%.*]] = ashr i32 [[A:%.*]], 31
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[CMP1]], [[CONV]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%conv = zext i1 %b to i32
%cmp1 = ashr i32 %a, 31
%cmp2 = icmp ult i32 %conv, %cmp1
ret i1 %cmp2
}

define i1 @slt_zero_slt_i1_fail(i32 %a, i1 %b) {
; CHECK-LABEL: @slt_zero_slt_i1_fail(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[B:%.*]], true
; CHECK-NEXT: [[CMP21:%.*]] = and i1 [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret i1 [[CMP21]]
;
%conv = zext i1 %b to i32
%cmp1 = lshr i32 %a, 31
%cmp2 = icmp slt i32 %conv, %cmp1
ret i1 %cmp2
}

define i1 @slt_zero_eq_i1_signed(i32 %a, i1 %b) {
; CHECK-LABEL: @slt_zero_eq_i1_signed(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[A:%.*]], -1
; CHECK-NEXT: [[CMP21:%.*]] = xor i1 [[TMP1]], [[B:%.*]]
; CHECK-NEXT: ret i1 [[CMP21]]
;
%conv = sext i1 %b to i32
%cmp1 = ashr i32 %a, 31
%cmp2 = icmp eq i32 %conv, %cmp1
ret i1 %cmp2
}

define i1 @slt_zero_eq_i1_fail_signed(i32 %a, i1 %b) {
; CHECK-LABEL: @slt_zero_eq_i1_fail_signed(
; CHECK-NEXT: [[CONV:%.*]] = sext i1 [[B:%.*]] to i32
; CHECK-NEXT: [[CMP1:%.*]] = lshr i32 [[A:%.*]], 31
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[CMP1]], [[CONV]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%conv = sext i1 %b to i32
%cmp1 = lshr i32 %a, 31
%cmp2 = icmp eq i32 %conv, %cmp1
ret i1 %cmp2
}

11 comments on commit b22917e

@alexfh
Copy link
Contributor

@alexfh alexfh commented on b22917e Oct 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit breaks some of our code. An example on compiler explorer that shows a change in the behavior: https://gcc.godbolt.org/z/axb17MYMG

@alexfh
Copy link
Contributor

@alexfh alexfh commented on b22917e Oct 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The difference in the produced IR caused by this patch is:

@@ -15,38 +15,33 @@
   %7 = sub i64 %5, %6
   %8 = ashr exact i64 %7, 3
   %9 = icmp eq ptr %3, %4
-  br i1 %9, label %29, label %10
+  br i1 %9, label %24, label %10

 10:                                               ; preds = %1
   %11 = tail call i64 @llvm.umax.i64(i64 %8, i64 1)
   %12 = load i64, ptr %4, align 8, !tbaa !9
   %13 = icmp sgt i64 %12, -1
-  br i1 %13, label %14, label %29
+  br i1 %13, label %14, label %24

-14:                                               ; preds = %10, %19
-  %15 = phi i64 [ %23, %19 ], [ %12, %10 ]
-  %16 = phi i64 [ %17, %19 ], [ 0, %10 ]
-  %17 = add nuw i64 %16, 1
-  %18 = icmp eq i64 %17, %11
-  br i1 %18, label %27, label %19, !llvm.loop !11
-
-19:                                               ; preds = %14
-  %20 = getelementptr inbounds i64, ptr %4, i64 %17
-  %21 = load i64, ptr %20, align 8, !tbaa !9
-  %22 = ashr i64 %21, 63
-  %23 = add i64 %21, %15
-  %24 = icmp ult i64 %23, %15
-  %25 = sext i1 %24 to i64
-  %26 = icmp eq i64 %22, %25
-  br i1 %26, label %14, label %27, !llvm.loop !11
-
-27:                                               ; preds = %14, %19
-  %28 = icmp ugt i64 %8, %17
-  br label %29
-
-29:                                               ; preds = %27, %10, %1
-  %30 = phi i1 [ false, %1 ], [ true, %10 ], [ %28, %27 ]
-  ret i1 %30
+14:                                               ; preds = %10, %18
+  %15 = phi i64 [ %16, %18 ], [ 0, %10 ]
+  %16 = add nuw i64 %15, 1
+  %17 = icmp eq i64 %16, %11
+  br i1 %17, label %22, label %18, !llvm.loop !11
+
+18:                                               ; preds = %14
+  %19 = getelementptr inbounds i64, ptr %4, i64 %16
+  %20 = load i64, ptr %19, align 8, !tbaa !9
+  %21 = icmp sgt i64 %20, -1
+  br i1 %21, label %14, label %22, !llvm.loop !11
+
+22:                                               ; preds = %14, %18
+  %23 = icmp ugt i64 %8, %16
+  br label %24
+
+24:                                               ; preds = %22, %10, %1
+  %25 = phi i1 [ false, %1 ], [ true, %10 ], [ %23, %22 ]
+  ret i1 %25
 }

 ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
@@ -63,30 +58,14 @@
   %4 = load volatile i64, ptr %3, align 8, !tbaa !9
   %5 = load volatile i64, ptr %3, align 8, !tbaa !9
   %6 = load volatile i64, ptr %3, align 8, !tbaa !9
-  %7 = icmp sgt i64 %4, -1
-  br i1 %7, label %8, label %21
-
-8:                                                ; preds = %2
-  %9 = ashr i64 %5, 63
-  %10 = add i64 %5, %4
-  %11 = icmp ult i64 %10, %4
-  %12 = sext i1 %11 to i64
-  %13 = icmp eq i64 %9, %12
-  br i1 %13, label %14, label %21, !llvm.loop !11
-
-14:                                               ; preds = %8
-  %15 = ashr i64 %6, 63
-  %16 = xor i64 %10, -1
-  %17 = icmp ugt i64 %6, %16
-  %18 = sext i1 %17 to i64
-  %19 = icmp ne i64 %15, %18
-  %20 = zext i1 %19 to i32
-  br label %21, !llvm.loop !11
-
-21:                                               ; preds = %14, %8, %2
-  %22 = phi i32 [ 1, %2 ], [ 1, %8 ], [ %20, %14 ]
+  %7 = icmp slt i64 %4, 0
+  %8 = icmp slt i64 %5, 0
+  %9 = select i1 %7, i1 true, i1 %8
+  %10 = icmp slt i64 %6, 0
+  %11 = select i1 %9, i1 true, i1 %10
+  %12 = zext i1 %11 to i32
   call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
-  ret i32 %22
+  ret i32 %12
 }

 declare dso_local i32 @__gxx_personality_v0(...)
@@ -104,7 +83,7 @@

 !0 = !{i32 1, !"wchar_size", i32 4}
 !1 = !{i32 7, !"uwtable", i32 2}
-!2 = !{!"clang version (d2aa523f2a2efcffbc0485b3958c0cab772051b3)"}
+!2 = !{!"clang version (b22917e6e2a0aec05474f58e64b7e87d1ea0a054)"}
 !3 = !{!4, !5, i64 8}
 !4 = !{!"_ZTSNSt12_Vector_baseIlSaIlEE17_Vector_impl_dataE", !5, i64 0, !5, i64 8, !5, i64 16}
 !5 = !{!"any pointer", !6, i64 0}

@alexfh
Copy link
Contributor

@alexfh alexfh commented on b22917e Oct 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A self-contained test case without external headers: https://gcc.godbolt.org/z/51cscMc7e

The corresponding IR: https://gcc.godbolt.org/z/f9zWTh4Pa

source_filename = "in2.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%struct.uint128 = type { i64, i64 }

$_ZN7uint128cvnEv = comdat any

$_ZN7uint128C2En = comdat any

$_Z6ToU128ImEvT_P7uint128 = comdat any

$_Z6ToU128IlEvT_P7uint128 = comdat any

; Function Attrs: mustprogress uwtable
define dso_local { i64, i64 } @_Zpl7uint128S_(i64 %0, i64 %1, i64 %2, i64 %3) #0 {
  %5 = alloca %struct.uint128, align 8
  %6 = alloca %struct.uint128, align 8
  %7 = alloca %struct.uint128, align 8
  %8 = alloca i128, align 16
  %9 = alloca i128, align 16
  %10 = alloca i128, align 16
  %11 = getelementptr inbounds { i64, i64 }, ptr %6, i32 0, i32 0
  store i64 %0, ptr %11, align 8
  %12 = getelementptr inbounds { i64, i64 }, ptr %6, i32 0, i32 1
  store i64 %1, ptr %12, align 8
  %13 = getelementptr inbounds { i64, i64 }, ptr %7, i32 0, i32 0
  store i64 %2, ptr %13, align 8
  %14 = getelementptr inbounds { i64, i64 }, ptr %7, i32 0, i32 1
  store i64 %3, ptr %14, align 8
  %15 = call noundef { i64, i64 } @_ZN7uint128cvnEv(ptr noundef nonnull align 8 dereferenceable(16) %6)
  %16 = getelementptr inbounds { i64, i64 }, ptr %8, i32 0, i32 0
  %17 = extractvalue { i64, i64 } %15, 0
  store i64 %17, ptr %16, align 16
  %18 = getelementptr inbounds { i64, i64 }, ptr %8, i32 0, i32 1
  %19 = extractvalue { i64, i64 } %15, 1
  store i64 %19, ptr %18, align 8
  %20 = load i128, ptr %8, align 16, !tbaa !3
  %21 = call noundef { i64, i64 } @_ZN7uint128cvnEv(ptr noundef nonnull align 8 dereferenceable(16) %7)
  %22 = getelementptr inbounds { i64, i64 }, ptr %9, i32 0, i32 0
  %23 = extractvalue { i64, i64 } %21, 0
  store i64 %23, ptr %22, align 16
  %24 = getelementptr inbounds { i64, i64 }, ptr %9, i32 0, i32 1
  %25 = extractvalue { i64, i64 } %21, 1
  store i64 %25, ptr %24, align 8
  %26 = load i128, ptr %9, align 16, !tbaa !3
  %27 = add nsw i128 %20, %26
  store i128 %27, ptr %10, align 16, !tbaa !3
  %28 = getelementptr inbounds { i64, i64 }, ptr %10, i32 0, i32 0
  %29 = load i64, ptr %28, align 16
  %30 = getelementptr inbounds { i64, i64 }, ptr %10, i32 0, i32 1
  %31 = load i64, ptr %30, align 8
  call void @_ZN7uint128C2En(ptr noundef nonnull align 8 dereferenceable(16) %5, i64 noundef %29, i64 noundef %31)
  %32 = load { i64, i64 }, ptr %5, align 8
  ret { i64, i64 } %32
}
; Function Attrs: mustprogress nounwind uwtable
define linkonce_odr dso_local noundef { i64, i64 } @_ZN7uint128cvnEv(ptr noundef nonnull align 8 dereferenceable(16) %0) #1 comdat align 2 {
  %2 = alloca i128, align 16
  %3 = alloca ptr, align 8
  store ptr %0, ptr %3, align 8, !tbaa !7
  %4 = load ptr, ptr %3, align 8
  %5 = getelementptr inbounds %struct.uint128, ptr %4, i32 0, i32 1
  %6 = load i64, ptr %5, align 8, !tbaa !9
  %7 = zext i64 %6 to i128
  %8 = shl i128 %7, 64
  %9 = getelementptr inbounds %struct.uint128, ptr %4, i32 0, i32 0
  %10 = load i64, ptr %9, align 8, !tbaa !12
  %11 = zext i64 %10 to i128
  %12 = add nsw i128 %8, %11
  store i128 %12, ptr %2, align 16
  %13 = load { i64, i64 }, ptr %2, align 16
  ret { i64, i64 } %13
}

; Function Attrs: nounwind uwtable
define linkonce_odr dso_local void @_ZN7uint128C2En(ptr noundef nonnull align 8 dereferenceable(16) %0, i64 noundef %1, i64 noundef %2) unnamed_addr #2 comdat align 2 {
  %4 = alloca i128, align 16
  %5 = alloca ptr, align 8
  %6 = alloca i128, align 16
  %7 = getelementptr inbounds { i64, i64 }, ptr %4, i32 0, i32 0
  store i64 %1, ptr %7, align 16
  %8 = getelementptr inbounds { i64, i64 }, ptr %4, i32 0, i32 1
  store i64 %2, ptr %8, align 8
  %9 = load i128, ptr %4, align 16, !tbaa !3
  store ptr %0, ptr %5, align 8, !tbaa !7
  store i128 %9, ptr %6, align 16, !tbaa !3
  %10 = load ptr, ptr %5, align 8
  %11 = getelementptr inbounds %struct.uint128, ptr %10, i32 0, i32 0
  %12 = load i128, ptr %6, align 16, !tbaa !3
  %13 = trunc i128 %12 to i64
  store i64 %13, ptr %11, align 8, !tbaa !12
  %14 = getelementptr inbounds %struct.uint128, ptr %10, i32 0, i32 1
  %15 = load i128, ptr %6, align 16, !tbaa !3
  %16 = ashr i128 %15, 64
  %17 = trunc i128 %16 to i64
  store i64 %17, ptr %14, align 8, !tbaa !9
  ret void
}

; Function Attrs: mustprogress uwtable
define dso_local noundef zeroext i1 @_Z6Assign7uint128Pm(i64 %0, i64 %1, ptr noundef %2) #0 {
  %4 = alloca i1, align 1
  %5 = alloca %struct.uint128, align 8
  %6 = alloca ptr, align 8
  %7 = alloca i64, align 8
  %8 = alloca i128, align 16
  %9 = alloca i32, align 4
  %10 = getelementptr inbounds { i64, i64 }, ptr %5, i32 0, i32 0
  store i64 %0, ptr %10, align 8
  %11 = getelementptr inbounds { i64, i64 }, ptr %5, i32 0, i32 1
  store i64 %1, ptr %11, align 8
  store ptr %2, ptr %6, align 8, !tbaa !7
  call void @llvm.lifetime.start.p0(i64 8, ptr %7) #6
  %12 = call noundef { i64, i64 } @_ZN7uint128cvnEv(ptr noundef nonnull align 8 dereferenceable(16) %5)
  %13 = getelementptr inbounds { i64, i64 }, ptr %8, i32 0, i32 0
  %14 = extractvalue { i64, i64 } %12, 0
    store i64 %14, ptr %13, align 16
  %15 = getelementptr inbounds { i64, i64 }, ptr %8, i32 0, i32 1
  %16 = extractvalue { i64, i64 } %12, 1
  store i64 %16, ptr %15, align 8
  %17 = load i128, ptr %8, align 16, !tbaa !3
  %18 = trunc i128 %17 to i64
  store i64 %18, ptr %7, align 8, !tbaa !13
  %19 = getelementptr inbounds %struct.uint128, ptr %5, i32 0, i32 1
  %20 = load i64, ptr %19, align 8, !tbaa !9
  %21 = icmp eq i64 %20, 0
  br i1 %21, label %22, label %25

22:                                               ; preds = %3
  %23 = load i64, ptr %7, align 8, !tbaa !13
  %24 = load ptr, ptr %6, align 8, !tbaa !7
  store i64 %23, ptr %24, align 8, !tbaa !13
  store i1 true, ptr %4, align 1
  store i32 1, ptr %9, align 4
  br label %26

25:                                               ; preds = %3
  store i1 false, ptr %4, align 1
  store i32 1, ptr %9, align 4
  br label %26

26:                                               ; preds = %25, %22
  call void @llvm.lifetime.end.p0(i64 8, ptr %7) #6
  %27 = load i1, ptr %4, align 1
  ret i1 %27
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3

; Function Attrs: mustprogress uwtable
define dso_local noundef zeroext i1 @_Z3AddmlPm(i64 noundef %0, i64 noundef %1, ptr noundef %2) #0 {
  %4 = alloca i64, align 8
  %5 = alloca i64, align 8
  %6 = alloca ptr, align 8
  %7 = alloca %struct.uint128, align 8
  %8 = alloca %struct.uint128, align 8
  %9 = alloca %struct.uint128, align 8
  %10 = alloca %struct.uint128, align 8
  %11 = alloca %struct.uint128, align 8
  %12 = alloca %struct.uint128, align 8
  store i64 %0, ptr %4, align 8, !tbaa !13
  store i64 %1, ptr %5, align 8, !tbaa !13
  store ptr %2, ptr %6, align 8, !tbaa !7
  call void @llvm.lifetime.start.p0(i64 16, ptr %7) #6
  call void @llvm.lifetime.start.p0(i64 16, ptr %8) #6
  %13 = load i64, ptr %4, align 8, !tbaa !13
  call void @_Z6ToU128ImEvT_P7uint128(i64 noundef %13, ptr noundef %7)
  %14 = load i64, ptr %5, align 8, !tbaa !13
  call void @_Z6ToU128IlEvT_P7uint128(i64 noundef %14, ptr noundef %8)
  call void @llvm.lifetime.start.p0(i64 16, ptr %9) #6
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %10, ptr align 8 %7, i64 16, i1 false), !tbaa.struct !14
    call void @llvm.memcpy.p0.p0.i64(ptr align 8 %11, ptr align 8 %8, i64 16, i1 false), !tbaa.struct !14
  %15 = getelementptr inbounds { i64, i64 }, ptr %10, i32 0, i32 0
  %16 = load i64, ptr %15, align 8
  %17 = getelementptr inbounds { i64, i64 }, ptr %10, i32 0, i32 1
  %18 = load i64, ptr %17, align 8
  %19 = getelementptr inbounds { i64, i64 }, ptr %11, i32 0, i32 0
  %20 = load i64, ptr %19, align 8
  %21 = getelementptr inbounds { i64, i64 }, ptr %11, i32 0, i32 1
  %22 = load i64, ptr %21, align 8
  %23 = call { i64, i64 } @_Zpl7uint128S_(i64 %16, i64 %18, i64 %20, i64 %22)
  %24 = getelementptr inbounds { i64, i64 }, ptr %9, i32 0, i32 0
  %25 = extractvalue { i64, i64 } %23, 0
  store i64 %25, ptr %24, align 8
  %26 = getelementptr inbounds { i64, i64 }, ptr %9, i32 0, i32 1
  %27 = extractvalue { i64, i64 } %23, 1
  store i64 %27, ptr %26, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %12, ptr align 8 %9, i64 16, i1 false), !tbaa.struct !14
  %28 = load ptr, ptr %6, align 8, !tbaa !7
  %29 = getelementptr inbounds { i64, i64 }, ptr %12, i32 0, i32 0
  %30 = load i64, ptr %29, align 8
  %31 = getelementptr inbounds { i64, i64 }, ptr %12, i32 0, i32 1
  %32 = load i64, ptr %31, align 8
  %33 = call noundef zeroext i1 @_Z6Assign7uint128Pm(i64 %30, i64 %32, ptr noundef %28)
  call void @llvm.lifetime.end.p0(i64 16, ptr %9) #6
  call void @llvm.lifetime.end.p0(i64 16, ptr %8) #6
  call void @llvm.lifetime.end.p0(i64 16, ptr %7) #6
  ret i1 %33
}

; Function Attrs: mustprogress uwtable
define linkonce_odr dso_local void @_Z6ToU128ImEvT_P7uint128(i64 noundef %0, ptr noundef %1) #0 comdat {
  %3 = alloca i64, align 8
  %4 = alloca ptr, align 8
  %5 = alloca %struct.uint128, align 8
  %6 = alloca i128, align 16
  store i64 %0, ptr %3, align 8, !tbaa !13
  store ptr %1, ptr %4, align 8, !tbaa !7
  call void @llvm.lifetime.start.p0(i64 16, ptr %5) #6
  %7 = load i64, ptr %3, align 8, !tbaa !13
  %8 = zext i64 %7 to i128
  store i128 %8, ptr %6, align 16, !tbaa !3
  %9 = getelementptr inbounds { i64, i64 }, ptr %6, i32 0, i32 0
  %10 = load i64, ptr %9, align 16
  %11 = getelementptr inbounds { i64, i64 }, ptr %6, i32 0, i32 1
  %12 = load i64, ptr %11, align 8
  call void @_ZN7uint128C2En(ptr noundef nonnull align 8 dereferenceable(16) %5, i64 noundef %10, i64 noundef %12)
  %13 = load ptr, ptr %4, align 8, !tbaa !7
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %13, ptr align 8 %5, i64 16, i1 false), !tbaa.struct !14
  call void @llvm.lifetime.end.p0(i64 16, ptr %5) #6
  ret void
}

; Function Attrs: mustprogress uwtable
define linkonce_odr dso_local void @_Z6ToU128IlEvT_P7uint128(i64 noundef %0, ptr noundef %1) #0 comdat {
  %3 = alloca i64, align 8
  %4 = alloca ptr, align 8
  %5 = alloca %struct.uint128, align 8
  %6 = alloca i128, align 16
  store i64 %0, ptr %3, align 8, !tbaa !13
  store ptr %1, ptr %4, align 8, !tbaa !7
  call void @llvm.lifetime.start.p0(i64 16, ptr %5) #6
    %7 = load i64, ptr %3, align 8, !tbaa !13
  %8 = sext i64 %7 to i128
  store i128 %8, ptr %6, align 16, !tbaa !3
  %9 = getelementptr inbounds { i64, i64 }, ptr %6, i32 0, i32 0
  %10 = load i64, ptr %9, align 16
  %11 = getelementptr inbounds { i64, i64 }, ptr %6, i32 0, i32 1
  %12 = load i64, ptr %11, align 8
  call void @_ZN7uint128C2En(ptr noundef nonnull align 8 dereferenceable(16) %5, i64 noundef %10, i64 noundef %12)
  %13 = load ptr, ptr %4, align 8, !tbaa !7
  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %13, ptr align 8 %5, i64 16, i1 false), !tbaa.struct !14
  call void @llvm.lifetime.end.p0(i64 16, ptr %5) #6
  ret void
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #4

; Function Attrs: mustprogress uwtable
define dso_local noundef zeroext i1 @_Z3CCCli(i64 noundef %0, i32 noundef %1) #0 {
  %3 = alloca i1, align 1
  %4 = alloca i64, align 8
  %5 = alloca i32, align 4
  %6 = alloca i64, align 8
  %7 = alloca i32, align 4
  %8 = alloca i32, align 4
  store i64 %0, ptr %4, align 8, !tbaa !13
  store i32 %1, ptr %5, align 4, !tbaa !15
  call void @llvm.lifetime.start.p0(i64 8, ptr %6) #6
  store i64 0, ptr %6, align 8, !tbaa !13
  call void @llvm.lifetime.start.p0(i64 4, ptr %7) #6
  store i32 0, ptr %7, align 4, !tbaa !15
  br label %9

9:                                                ; preds = %20, %2
  %10 = load i32, ptr %7, align 4, !tbaa !15
  %11 = load i32, ptr %5, align 4, !tbaa !15
  %12 = icmp slt i32 %10, %11
  br i1 %12, label %14, label %13

13:                                               ; preds = %9
  store i32 2, ptr %8, align 4
  br label %23

14:                                               ; preds = %9
  %15 = load i64, ptr %6, align 8, !tbaa !13
  %16 = load i64, ptr %4, align 8, !tbaa !13
  %17 = call noundef zeroext i1 @_Z3AddmlPm(i64 noundef %15, i64 noundef %16, ptr noundef %6)
  br i1 %17, label %19, label %18

18:                                               ; preds = %14
  store i1 true, ptr %3, align 1
  store i32 1, ptr %8, align 4
  br label %23

19:                                               ; preds = %14
  br label %20

20:                                               ; preds = %19
  %21 = load i32, ptr %7, align 4, !tbaa !15
    %22 = add nsw i32 %21, 1
  store i32 %22, ptr %7, align 4, !tbaa !15
  br label %9, !llvm.loop !17

23:                                               ; preds = %18, %13
  call void @llvm.lifetime.end.p0(i64 4, ptr %7) #6
  %24 = load i32, ptr %8, align 4
  switch i32 %24, label %26 [
    i32 2, label %25
  ]

25:                                               ; preds = %23
  store i1 false, ptr %3, align 1
  store i32 1, ptr %8, align 4
  br label %26

26:                                               ; preds = %25, %23
  call void @llvm.lifetime.end.p0(i64 8, ptr %6) #6
  %27 = load i1, ptr %3, align 1
  ret i1 %27
}

; Function Attrs: mustprogress norecurse uwtable
define dso_local noundef i32 @main(i32 noundef %0, ptr noundef %1) #5 {
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  %5 = alloca ptr, align 8
  %6 = alloca i64, align 8
  store i32 0, ptr %3, align 4
  store i32 %0, ptr %4, align 4, !tbaa !15
  store ptr %1, ptr %5, align 8, !tbaa !7
  call void @llvm.lifetime.start.p0(i64 8, ptr %6) #6
  store volatile i64 9223372036854775807, ptr %6, align 8, !tbaa !13
  %7 = load volatile i64, ptr %6, align 8, !tbaa !13
  %8 = call noundef zeroext i1 @_Z3CCCli(i64 noundef %7, i32 noundef 3)
  %9 = zext i1 %8 to i32
  call void @llvm.lifetime.end.p0(i64 8, ptr %6) #6
  ret i32 %9
}

attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { mustprogress nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #5 = { mustprogress norecurse uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #6 = { nounwind }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 2}
!2 = !{!"clang version (d2aa523f2a2efcffbc0485b3958c0cab772051b3)"}
!3 = !{!4, !4, i64 0}
!4 = !{!"__int128", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C++ TBAA"}
!7 = !{!8, !8, i64 0}
!8 = !{!"any pointer", !5, i64 0}
!9 = !{!10, !11, i64 8}
!10 = !{!"_ZTS7uint128", !11, i64 0, !11, i64 8}
!11 = !{!"long", !5, i64 0}
!12 = !{!10, !11, i64 0}
!13 = !{!11, !11, i64 0}
!14 = !{i64 0, i64 8, !13, i64 8, i64 8, !13}
!15 = !{!16, !16, i64 0}
!16 = !{!"int", !5, i64 0}
!17 = distinct !{!17, !18}
!18 = !{!"llvm.loop.mustprogress"}

@alexfh
Copy link
Contributor

@alexfh alexfh commented on b22917e Oct 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@XChy Please take a look!

@XChy
Copy link
Member Author

@XChy XChy commented on b22917e Oct 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@XChy Please take a look!

Sure, I'm investigating it now.

@XChy
Copy link
Member Author

@XChy XChy commented on b22917e Oct 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A self-contained test case without external headers: https://gcc.godbolt.org/z/51cscMc7e

The corresponding IR: https://gcc.godbolt.org/z/f9zWTh4Pa

Here is the proof for the correctness of the transform(src is clang17.0.0, tgt is trunk). This includes all changes (except main) of your link at IR level.

https://alive2.llvm.org/ce/z/B66Wha
https://alive2.llvm.org/ce/z/PMmyRj

For changes in main, Alive2 cannot handle lifetime currently. But I don't think it's a correct transform, which eliminates overflow check.

What confuse me is that opt trunk did well here, https://gcc.godbolt.org/z/nojq9qT6K.
Maybe this commit exposes the bug of other optimization.

You could post a issue about it and feel free to revert this commit.

@nikic
Copy link
Contributor

@nikic nikic commented on b22917e Oct 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the problematic transform is this one: https://alive2.llvm.org/ce/z/XUTETD alive2 says it's correct (even if I add -src-unroll and -tgt-unroll), but I don't get why it would be correct to remove the overflow check there...

@XChy
Copy link
Member Author

@XChy XChy commented on b22917e Oct 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the problematic transform is this one: https://alive2.llvm.org/ce/z/XUTETD alive2 says it's correct (even if I add -src-unroll and -tgt-unroll), but I don't get why it would be correct to remove the overflow check there...

That's also a problem of Alive2? I think we should also post an issue on Alive2's repo.

@nikic
Copy link
Contributor

@nikic nikic commented on b22917e Oct 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've opened an alive2 issue here: AliveToolkit/alive2#951

I think the JumpThreading problem is that computeValueKnownInPredecessorsImpl() will perform phi translation for one of the icmp operands, while keeping the other one, which means that they now refer to values from two different loop iterations.

@nikic
Copy link
Contributor

@nikic nikic commented on b22917e Oct 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've filed #70651 for the JumpThreading issue.

@alexfh
Copy link
Contributor

@alexfh alexfh commented on b22917e Oct 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@XChy @nikic thanks for the prompt reaction!

You could post a issue about it and feel free to revert this commit.

That's a bit complicated at this point due to all the commits after this one that change the same files. I hope the fix @nikic proposed (#70664) will help mitigate the issue, then there's no need to revert.

Please sign in to comment.