[IndVarSimplify] WRONG code with i128 and -replexitval=always #83268

JonPsson1 · 2024-02-28T14:38:55Z

This program should print -86:

int printf(const char *, ...);

unsigned short a = 0, b = 0;
unsigned __int128 c = 0, d = 0;

unsigned char e(unsigned char f, unsigned char g) { return f + g; }

long h() {
  for (; c != 2; c = e(c, 3)) {
    --d;
    if (d)
      continue;
    return a;
  }
  return b;
}

int main() {
  h();
  printf("%d\n", (int)d);
}

With the option -replexitval=always, the IndVarSimplifyPass does something quite weird that I don't understand. It seems it is trying to compute the final value that is stored in the preheader, but that computation is '0', so it must be wrong.

clang -O3 -march=z15 wrong0.i -o ./a.out <> clang -O3 -march=z15 wrong0.i -o ./a.out -mllvm -replexitval=always

IR Dump After IndVarSimplifyPass on for.body

define dso_local i64 @h() local_unnamed_addr #1 {               define dso_local i64 @h() local_unnamed_addr #1 {
entry:                                                          entry:
  %.pr = load i128, ptr @c, align 8, !tbaa !4                     %.pr = load i128, ptr @c, align 8, !tbaa !4
  %d.promoted = load i128, ptr @d, align 1, !tbaa !4              %d.promoted = load i128, ptr @d, align 1, !tbaa !4
  %cmp.not5 = icmp eq i128 %.pr, 2                                %cmp.not5 = icmp eq i128 %.pr, 2
  %extract.t9 = trunc i128 %.pr to i8                             %extract.t9 = trunc i128 %.pr to i8
  br i1 %cmp.not5, label %return, label %for.body.prehea          br i1 %cmp.not5, label %return, label %for.body.prehea

for.body.preheader:                               ; pred        for.body.preheader:                               ; pred
                                                           >      %0 = add i128 %d.promoted, -1
                                                           >      %1 = mul i8 %extract.t9, 85
                                                           >      %2 = add i8 %1, 85
                                                           >      %3 = zext i8 %2 to i128
                                                           >      %umin = call i128 @llvm.umin.i128(i128 %0, i128 %3)
                                                           >      %4 = sub i128 %0, %umin
  br label %for.body                                              br label %for.body

for.body:                                         ; pred        for.body:                                         ; pred
  %.off0 = phi i8 [ %add.i, %for.inc ], [ %extract.t9, %          %.off0 = phi i8 [ %add.i, %for.inc ], [ %extract.t9, %
  %dec46 = phi i128 [ %dec, %for.inc ], [ %d.promoted, %          %dec46 = phi i128 [ %dec, %for.inc ], [ %d.promoted, %
  %dec = add i128 %dec46, -1                                      %dec = add i128 %dec46, -1
  %tobool.not = icmp eq i128 %dec, 0                              %tobool.not = icmp eq i128 %dec, 0
  br i1 %tobool.not, label %for.body.return_crit_edge, l          br i1 %tobool.not, label %for.body.return_crit_edge, l

for.inc:                                          ; pred        for.inc:                                          ; pred
  %add.i = add i8 %.off0, 3                                       %add.i = add i8 %.off0, 3
  %conv2 = zext i8 %add.i to i128                                 %conv2 = zext i8 %add.i to i128
  store i128 %conv2, ptr @c, align 8, !tbaa !4                    store i128 %conv2, ptr @c, align 8, !tbaa !4
  %cmp.not = icmp eq i8 %add.i, 2                                 %cmp.not = icmp eq i8 %add.i, 2
  br i1 %cmp.not, label %for.cond.return_crit_edge, labe          br i1 %cmp.not, label %for.cond.return_crit_edge, labe

for.cond.return_crit_edge:                        ; pred        for.cond.return_crit_edge:                        ; pred
  %dec.lcssa10 = phi i128 [ %dec, %for.inc ]               |      store i128 %4, ptr @d, align 8, !tbaa !4
  store i128 %dec.lcssa10, ptr @d, align 8, !tbaa !4       <
  br label %return                                                br label %return

for.body.return_crit_edge:                        ; pred        for.body.return_crit_edge:                        ; pred
  %dec.lcssa = phi i128 [ %dec, %for.body ]                |      store i128 %4, ptr @d, align 8, !tbaa !4
  store i128 %dec.lcssa, ptr @d, align 8, !tbaa !4         <
  br label %return                                                br label %return

return:                                           ; pred        return:                                           ; pred
  %retval.0.in.in = phi ptr [ @a, %for.body.return_crit_          %retval.0.in.in = phi ptr [ @a, %for.body.return_crit_
  %retval.0.in = load i16, ptr %retval.0.in.in, align 2,          %retval.0.in = load i16, ptr %retval.0.in.in, align 2,
  %retval.0 = zext i16 %retval.0.in to i64                        %retval.0 = zext i16 %retval.0.in to i64
  ret i64 %retval.0                                               ret i64 %retval.0
}                                                               }
;

@nikic @xortator @uweigand

The text was updated successfully, but these errors were encountered:

nikic · 2024-03-13T15:50:02Z

Are you sure IndVarSimplify is at fault here? Everything looks correct to me here:

; %extract.t9 = i8 0
; %d.promoted = i128 0
%0 = add i128 %d.promoted, -1 ; %0 = i128 -1
%1 = mul i8 %extract.t9, 85 ; %1 = i8 0
%2 = add i8 %1, 85 ; %2 = i8 85
%3 = zext i8 %2 to i128 ; %3 = i128 85
%umin = call i128 @llvm.umin.i128(i128 %0, i128 %3) ; %umin = i128 85
%4 = sub i128 %0, %umin ; %4 = i128 -86

Maybe this just exposes an s390x backend problem? I can't reproduce this on x86.

JonPsson1 · 2024-03-17T14:50:47Z

Are you sure IndVarSimplify is at fault here? Everything looks correct to me here:

; %extract.t9 = i8 0
; %d.promoted = i128 0
%0 = add i128 %d.promoted, -1 ; %0 = i128 -1
%1 = mul i8 %extract.t9, 85 ; %1 = i8 0
%2 = add i8 %1, 85 ; %2 = i8 85
%3 = zext i8 %2 to i128 ; %3 = i128 85
%umin = call i128 @llvm.umin.i128(i128 %0, i128 %3) ; %umin = i128 85
%4 = sub i128 %0, %umin ; %4 = i128 -86

Maybe this just exposes an s390x backend problem? I can't reproduce this on x86.

Ah! I think you are actually right (thanks for helping out) - this could be an i128 related problem in the backend. If I used "llc -zEC12" for the backend part and then assemled/linked, it prints the correct result. If I instead do "llc -z13" it prints 0 (wrong).

I think it is going wrong in the preheader:

for.body.preheader.i:                             ; preds = %entry
  %extract.t6.i = trunc i128 %.pr.i to i8
  %d.promoted.i = load i128, ptr @d, align 8
  %0 = add i128 %d.promoted.i, -1
  %1 = mul i8 %extract.t6.i, 85
  %2 = add i8 %1, 85
  %3 = zext i8 %2 to i128
  %4 = tail call i128 @llvm.usub.sat.i128(i128 %0, i128 %3)
  br label %for.body.i

=>  # *** IR Dump After SystemZ DAG->DAG Pattern Instruction Selection (systemz-isel) ***:

bb.2.for.body.preheader.i:
; predecessors: %bb.0
  successors: %bb.3(0x80000000); %bb.3(100.00%)

  %15:gr64bit = VLGVF %0:vr128bit, $noreg, 3
  %2:gr32bit = COPY %15.subreg_l32:gr64bit
  %16:addr64bit = LARL @d
  %3:vr128bit = VL killed %16:addr64bit, 0, $noreg :: (dereferenceable load (s128) from @d, align 8)
  %17:vr128bit = VGBM 65535
  %18:vr128bit = VAQ %3:vr128bit, killed %17:vr128bit
  %19:gr32bit = MHI %2:gr32bit(tied-def 0), 85
  %20:gr32bit = AHIMuxK killed %19:gr32bit, 85, implicit-def dead $cc
  %21:vr128bit = VGBM 0
  %22:vr128bit = VLVGB %21:vr128bit(tied-def 0), killed %20:gr32bit, $noreg, 15
  %23:vr128bit = VSQ %18:vr128bit, %22:vr128bit
  %24:vr128bit = VSCBIQ %18:vr128bit, %22:vr128bit
  %25:gr64bit = VLGVF killed %24:vr128bit, $noreg, 3
  %26:grx32bit = COPY %25.subreg_l32:gr64bit
  CHIMux killed %26:grx32bit, 0, implicit-def $cc
  %27:vr128bit = VGBM 0
  %4:vr128bit = Select128 killed %27:vr128bit, killed %23:vr128bit, 14, 6, implicit $cc

=> 

.LBB2_2:                                # %for.body.preheader.i
        vlgvf   %r0, %v0, 3       # 0
        larl    %r1, d
        vl      %v0, 0(%r1), 3    # 0
        lr      %r1, %r0          # 0
        mhi     %r1, 85           # 0
        vgbm    %v1, 65535        # -1
        vgbm    %v5, 0            # 0
        vaq     %v4, %v0, %v1     # -1
        ahi     %r1, 85           # 85
        vlvgb   %v5, %r1, 15      # 85
        vscbiq  %v3, %v4, %v5     # 1
        vlgvf   %r1, %v3, 3       # 1
        vgbm    %v2, 0
        vgbm    %v3, 0
        cijlh   %r1, 0, .LBB2_4   # %r1 = 1 => %v3 = 0 at .LBB2_4   WRONG
# %bb.3:                                # %for.body.preheader.i
        vsq     %v3, %v4, %v5
.LBB2_4:                                # %for.body.preheader.i
        larl    %r1, c

llc -mtriple=s390x-linux-gnu -mcpu=z13 -O3 wrong0.ll -o wrong0.s
wrong0.tar.gz

CC @uweigand

uweigand · 2024-03-18T11:23:52Z

Ah, I see. Common code is replacing a USUBSAT with a USUBO and is inspecting the overflow result. We're currently generating that overflow result via the VSCBIQ instruction for i128, but that actually has inverted semantics: it sets the result to 1 if there was no carry out from the subtraction, and it sets the result to 0 if there was a carry out. Common code expects it exactly the other way round.

Current code still works when feeding the result of a USUBO into a USUBO_CARRY, because both use the same inverted sense. But if LLVM common code inspects the overflow result for its own purposes, this will break. I guess we'll have to emit explicit inversion operations for USUBO and USUBO_CARRY (which should cancel out when used in combination). I'll have a look.

uweigand · 2024-03-18T19:42:50Z

Hi @JonPsson1, can you check whether this patch fixes the problem for you? Thanks!
diff-usubo-fix.txt

JonPsson1 · 2024-03-18T20:50:28Z

Hi @JonPsson1, can you check whether this patch fixes the problem for you? Thanks! diff-usubo-fix.txt

@uweigand Yes, this fixes my test case so that it now produces the expected result.

uweigand · 2024-03-19T10:21:25Z

Committed as d9c31ee.

uweigand · 2024-03-19T10:51:52Z

Committed as d9c31ee.

Sorry, had to revert after a mis-applied merge. Will re-test and reapply.

We use the VSCBIQ/VSBIQ/VSBCBIQ family of instructions to implement USUBO/USUBO_CARRY for the i128 data type. However, these instructions use an inverted sense of the borrow indication flag (a value of 1 indicates *no* borrow, while a value of 0 indicated borrow). This does not match the semantics of the boolean "overflow" flag of the USUBO/USUBO_CARRY ISD nodes. Fix this by generating code to explicitly invert the flag. These cancel out of the result of USUBO feeds into an USUBO_CARRY. To avoid unnecessary zero-extend operations, also improve the DAGCombine handling of ZERO_EXTEND to optimize (zext (xor (trunc))) sequences where appropriate. Fixes: #83268

JonPsson1 · 2024-03-20T13:11:11Z

@uweigand @tstellar backport bugfix to llvmorg-18.1.2..?

uweigand · 2024-03-22T18:06:12Z

Agreed, this should be backported.

We use the VSCBIQ/VSBIQ/VSBCBIQ family of instructions to implement USUBO/USUBO_CARRY for the i128 data type. However, these instructions use an inverted sense of the borrow indication flag (a value of 1 indicates *no* borrow, while a value of 0 indicated borrow). This does not match the semantics of the boolean "overflow" flag of the USUBO/USUBO_CARRY ISD nodes. Fix this by generating code to explicitly invert the flag. These cancel out of the result of USUBO feeds into an USUBO_CARRY. To avoid unnecessary zero-extend operations, also improve the DAGCombine handling of ZERO_EXTEND to optimize (zext (xor (trunc))) sequences where appropriate. Fixes: llvm#83268

tstellar · 2024-03-24T04:35:55Z

What was the commit hash of the re-applied version?

uweigand · 2024-03-24T19:03:14Z

/cherry-pick 335f365

llvmbot · 2024-03-24T19:09:50Z

Failed to cherry-pick: 335f365

https://github.com/llvm/llvm-project/actions/runs/8411476158

Please manually backport the fix and push it to your github fork. Once this is done, please create a pull request

uweigand · 2024-03-25T08:19:37Z

/cherry-pick d9c31ee

We use the VSCBIQ/VSBIQ/VSBCBIQ family of instructions to implement USUBO/USUBO_CARRY for the i128 data type. However, these instructions use an inverted sense of the borrow indication flag (a value of 1 indicates *no* borrow, while a value of 0 indicated borrow). This does not match the semantics of the boolean "overflow" flag of the USUBO/USUBO_CARRY ISD nodes. Fix this by generating code to explicitly invert the flag. These cancel out of the result of USUBO feeds into an USUBO_CARRY. To avoid unnecessary zero-extend operations, also improve the DAGCombine handling of ZERO_EXTEND to optimize (zext (xor (trunc))) sequences where appropriate. Fixes: llvm#83268 (cherry picked from commit d9c31ee)

llvmbot · 2024-03-25T08:24:41Z

/pull-request #86475

We use the VSCBIQ/VSBIQ/VSBCBIQ family of instructions to implement USUBO/USUBO_CARRY for the i128 data type. However, these instructions use an inverted sense of the borrow indication flag (a value of 1 indicates *no* borrow, while a value of 0 indicated borrow). This does not match the semantics of the boolean "overflow" flag of the USUBO/USUBO_CARRY ISD nodes. Fix this by generating code to explicitly invert the flag. These cancel out of the result of USUBO feeds into an USUBO_CARRY. To avoid unnecessary zero-extend operations, also improve the DAGCombine handling of ZERO_EXTEND to optimize (zext (xor (trunc))) sequences where appropriate. Fixes: llvm#83268

uweigand · 2024-03-25T11:50:51Z

Manual backport PR at #86491

We use the VSCBIQ/VSBIQ/VSBCBIQ family of instructions to implement USUBO/USUBO_CARRY for the i128 data type. However, these instructions use an inverted sense of the borrow indication flag (a value of 1 indicates *no* borrow, while a value of 0 indicated borrow). This does not match the semantics of the boolean "overflow" flag of the USUBO/USUBO_CARRY ISD nodes. Fix this by generating code to explicitly invert the flag. These cancel out of the result of USUBO feeds into an USUBO_CARRY. To avoid unnecessary zero-extend operations, also improve the DAGCombine handling of ZERO_EXTEND to optimize (zext (xor (trunc))) sequences where appropriate. Fixes: #83268

JonPsson1 added miscompilation loopoptim labels Feb 28, 2024

JonPsson1 mentioned this issue Mar 17, 2024

[IndVarSimplify] WRONG code #78051

Closed

uweigand closed this as completed in d9c31ee Mar 19, 2024

EugeneZelenko added backend:SystemZ and removed loopoptim labels Mar 19, 2024

tstellar added this to the LLVM 18.X Release milestone Mar 20, 2024

llvmbot added the release:cherry-pick-failed label Mar 24, 2024

EugeneZelenko added the release:backport label Mar 24, 2024

llvmbot removed the release:cherry-pick-failed label Mar 25, 2024

uweigand mentioned this issue Mar 25, 2024

release/18.x: [SystemZ] Fix overflow flag for i128 USUBO #86491

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[IndVarSimplify] WRONG code with i128 and -replexitval=always #83268

[IndVarSimplify] WRONG code with i128 and -replexitval=always #83268

JonPsson1 commented Feb 28, 2024

nikic commented Mar 13, 2024 •

edited

JonPsson1 commented Mar 17, 2024

uweigand commented Mar 18, 2024

uweigand commented Mar 18, 2024

JonPsson1 commented Mar 18, 2024 •

edited

uweigand commented Mar 19, 2024

uweigand commented Mar 19, 2024

JonPsson1 commented Mar 20, 2024

uweigand commented Mar 22, 2024

tstellar commented Mar 24, 2024

uweigand commented Mar 24, 2024

llvmbot commented Mar 24, 2024

uweigand commented Mar 25, 2024

llvmbot commented Mar 25, 2024

uweigand commented Mar 25, 2024

[IndVarSimplify] WRONG code with i128 and -replexitval=always #83268

[IndVarSimplify] WRONG code with i128 and -replexitval=always #83268

Comments

JonPsson1 commented Feb 28, 2024

nikic commented Mar 13, 2024 • edited

JonPsson1 commented Mar 17, 2024

uweigand commented Mar 18, 2024

uweigand commented Mar 18, 2024

JonPsson1 commented Mar 18, 2024 • edited

uweigand commented Mar 19, 2024

uweigand commented Mar 19, 2024

JonPsson1 commented Mar 20, 2024

uweigand commented Mar 22, 2024

tstellar commented Mar 24, 2024

uweigand commented Mar 24, 2024

llvmbot commented Mar 24, 2024

uweigand commented Mar 25, 2024

llvmbot commented Mar 25, 2024

uweigand commented Mar 25, 2024

nikic commented Mar 13, 2024 •

edited

JonPsson1 commented Mar 18, 2024 •

edited