From ae37d533e8c9bdc053ef58906305485b2b32503d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 20 Oct 2025 10:56:26 +0100 Subject: [PATCH] [DAG] visitTRUNCATE - more aggressively fold trunc(add(x,x)) -> add(trunc(x),trunc(x)) We're very careful not to truncate binary arithmetic ops if it will affect legality, or cause additional truncation instructions, hence we limit this to cases where at one operand is constant. But if both ops are the same (i.e. add/mul) then we wouldn't increase the number of truncations,so can be slightly more aggressive at folding the truncation. Alter SystemZ tests to avoid add(x,x) pattern --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +- llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll | 100 ++++++++---------- llvm/test/CodeGen/AArch64/zext-shuffle.ll | 6 +- llvm/test/CodeGen/SystemZ/int-conv-14.ll | 45 ++++---- llvm/test/CodeGen/SystemZ/int-conv-15.ll | 45 ++++---- 5 files changed, 98 insertions(+), 101 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6bf9008c3d677..310d35d9b1d1e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16433,7 +16433,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { case ISD::OR: case ISD::XOR: if (!LegalOperations && N0.hasOneUse() && - (isConstantOrConstantVector(N0.getOperand(0), true) || + (N0.getOperand(0) == N0.getOperand(1) || + isConstantOrConstantVector(N0.getOperand(0), true) || isConstantOrConstantVector(N0.getOperand(1), true))) { // TODO: We already restricted this to pre-legalization, but for vectors // we are extra cautious to not create an unsupported operation. diff --git a/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll b/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll index c4de177176e33..d7a2a83cf3660 100644 --- a/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll +++ b/llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll @@ -5,32 +5,30 @@ define <16 x i8> @lower_trunc_16xi8(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 ; CHECK-LABEL: lower_trunc_16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr h1, [sp] +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov v0.b[1], w1 +; CHECK-NEXT: mov v0.b[2], w2 +; CHECK-NEXT: mov v0.b[3], w3 +; CHECK-NEXT: mov v0.b[4], w4 +; CHECK-NEXT: mov v0.b[5], w5 +; CHECK-NEXT: mov v0.b[6], w6 +; CHECK-NEXT: mov v0.b[7], w7 +; CHECK-NEXT: ld1 { v0.b }[8], [x8] ; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: ld1 { v1.h }[1], [x8] +; CHECK-NEXT: ld1 { v0.b }[9], [x8] ; CHECK-NEXT: add x8, sp, #16 -; CHECK-NEXT: mov v0.h[1], w1 -; CHECK-NEXT: ld1 { v1.h }[2], [x8] +; CHECK-NEXT: ld1 { v0.b }[10], [x8] ; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: mov v0.h[2], w2 -; CHECK-NEXT: ld1 { v1.h }[3], [x8] +; CHECK-NEXT: ld1 { v0.b }[11], [x8] ; CHECK-NEXT: add x8, sp, #32 -; CHECK-NEXT: mov v0.h[3], w3 -; CHECK-NEXT: ld1 { v1.h }[4], [x8] +; CHECK-NEXT: ld1 { v0.b }[12], [x8] ; CHECK-NEXT: add x8, sp, #40 -; CHECK-NEXT: ld1 { v1.h }[5], [x8] +; CHECK-NEXT: ld1 { v0.b }[13], [x8] ; CHECK-NEXT: add x8, sp, #48 -; CHECK-NEXT: mov v0.h[4], w4 -; CHECK-NEXT: ld1 { v1.h }[6], [x8] +; CHECK-NEXT: ld1 { v0.b }[14], [x8] ; CHECK-NEXT: add x8, sp, #56 -; CHECK-NEXT: mov v0.h[5], w5 -; CHECK-NEXT: ld1 { v1.h }[7], [x8] -; CHECK-NEXT: mov v0.h[6], w6 -; CHECK-NEXT: add v2.8h, v1.8h, v1.8h -; CHECK-NEXT: mov v0.h[7], w7 -; CHECK-NEXT: add v3.8h, v0.8h, v0.8h -; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; CHECK-NEXT: uzp1 v1.16b, v3.16b, v2.16b +; CHECK-NEXT: ld1 { v0.b }[15], [x8] +; CHECK-NEXT: add v1.16b, v0.16b, v0.16b ; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a1 = insertelement <16 x i16> poison, i16 %a, i16 0 @@ -59,18 +57,15 @@ define <16 x i8> @lower_trunc_16xi8(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 define <8 x i16> @lower_trunc_8xi16(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) { ; CHECK-LABEL: lower_trunc_8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s0, w4 -; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: mov v0.s[1], w5 -; CHECK-NEXT: mov v1.s[1], w1 -; CHECK-NEXT: mov v0.s[2], w6 -; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: mov v0.s[3], w7 -; CHECK-NEXT: mov v1.s[3], w3 -; CHECK-NEXT: add v2.4s, v0.4s, v0.4s -; CHECK-NEXT: add v3.4s, v1.4s, v1.4s -; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h -; CHECK-NEXT: uzp1 v1.8h, v3.8h, v2.8h +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov v0.h[1], w1 +; CHECK-NEXT: mov v0.h[2], w2 +; CHECK-NEXT: mov v0.h[3], w3 +; CHECK-NEXT: mov v0.h[4], w4 +; CHECK-NEXT: mov v0.h[5], w5 +; CHECK-NEXT: mov v0.h[6], w6 +; CHECK-NEXT: mov v0.h[7], w7 +; CHECK-NEXT: add v1.8h, v0.8h, v0.8h ; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a1 = insertelement <8 x i32> poison, i32 %a, i32 0 @@ -91,14 +86,11 @@ define <8 x i16> @lower_trunc_8xi16(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 define <4 x i32> @lower_trunc_4xi32(i64 %a, i64 %b, i64 %c, i64 %d) { ; CHECK-LABEL: lower_trunc_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x2 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: mov v0.d[1], x3 -; CHECK-NEXT: mov v1.d[1], x1 -; CHECK-NEXT: add v2.2d, v0.2d, v0.2d -; CHECK-NEXT: add v3.2d, v1.2d, v1.2d -; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp1 v1.4s, v3.4s, v2.4s +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov v0.s[1], w1 +; CHECK-NEXT: mov v0.s[2], w2 +; CHECK-NEXT: mov v0.s[3], w3 +; CHECK-NEXT: add v1.4s, v0.4s, v0.4s ; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %a1 = insertelement <4 x i64> poison, i64 %a, i64 0 @@ -115,24 +107,20 @@ define <4 x i32> @lower_trunc_4xi32(i64 %a, i64 %b, i64 %c, i64 %d) { define <8 x i32> @lower_trunc_8xi32(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) { ; CHECK-LABEL: lower_trunc_8xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x2 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: fmov d2, x6 -; CHECK-NEXT: fmov d3, x4 -; CHECK-NEXT: mov v0.d[1], x3 -; CHECK-NEXT: mov v1.d[1], x1 -; CHECK-NEXT: mov v2.d[1], x7 -; CHECK-NEXT: mov v3.d[1], x5 -; CHECK-NEXT: add v4.2d, v0.2d, v0.2d -; CHECK-NEXT: add v5.2d, v1.2d, v1.2d -; CHECK-NEXT: add v6.2d, v2.2d, v2.2d -; CHECK-NEXT: add v7.2d, v3.2d, v3.2d +; CHECK-NEXT: fmov d0, x6 +; CHECK-NEXT: fmov d1, x4 +; CHECK-NEXT: fmov d2, x2 +; CHECK-NEXT: fmov d3, x0 +; CHECK-NEXT: mov v0.d[1], x7 +; CHECK-NEXT: mov v1.d[1], x5 +; CHECK-NEXT: mov v2.d[1], x3 +; CHECK-NEXT: mov v3.d[1], x1 +; CHECK-NEXT: uzp1 v1.4s, v1.4s, v0.4s ; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s -; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp1 v3.4s, v5.4s, v4.4s -; CHECK-NEXT: uzp1 v1.4s, v7.4s, v6.4s -; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b -; CHECK-NEXT: eor v1.16b, v2.16b, v1.16b +; CHECK-NEXT: add v3.4s, v1.4s, v1.4s +; CHECK-NEXT: add v0.4s, v2.4s, v2.4s +; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b +; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b ; CHECK-NEXT: ret %a1 = insertelement <8 x i64> poison, i64 %a, i64 0 %b1 = insertelement <8 x i64> %a1, i64 %b, i64 1 diff --git a/llvm/test/CodeGen/AArch64/zext-shuffle.ll b/llvm/test/CodeGen/AArch64/zext-shuffle.ll index 20d2071d7fe54..a0d4e18acb6c8 100644 --- a/llvm/test/CodeGen/AArch64/zext-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/zext-shuffle.ll @@ -674,10 +674,8 @@ define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) { define i16 @undeftop(<8 x i16> %0) { ; CHECK-LABEL: undeftop: ; CHECK: // %bb.0: -; CHECK-NEXT: dup v0.8h, v0.h[4] -; CHECK-NEXT: uaddl v0.4s, v0.4h, v0.4h -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: add v0.8h, v0.8h, v0.8h +; CHECK-NEXT: umov w0, v0.h[4] ; CHECK-NEXT: ret %2 = shufflevector <8 x i16> %0, <8 x i16> zeroinitializer, <8 x i32> %3 = zext <8 x i16> %2 to <8 x i64> diff --git a/llvm/test/CodeGen/SystemZ/int-conv-14.ll b/llvm/test/CodeGen/SystemZ/int-conv-14.ll index 98dc88f289620..baab5ac7f4b5c 100644 --- a/llvm/test/CodeGen/SystemZ/int-conv-14.ll +++ b/llvm/test/CodeGen/SystemZ/int-conv-14.ll @@ -55,14 +55,15 @@ define i128 @f4(ptr %ptr) { } ; Truncation to i64. -define i64 @f5(i128 %a) { +define i64 @f5(i128 %a, i128 %b) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvg %r2, %v0, 1 ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i64 ret i64 %res } @@ -134,15 +135,16 @@ define i128 @f10(ptr %ptr) { } ; Truncation to i32. -define i32 @f11(i128 %a) { +define i32 @f11(i128 %a, i128 %b) { ; CHECK-LABEL: f11: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i32 ret i32 %res } @@ -215,15 +217,16 @@ define i128 @f16(ptr %ptr) { } ; Truncation to i16. -define i16 @f17(i128 %a) { +define i16 @f17(i128 %a, i128 %b) { ; CHECK-LABEL: f17: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i16 ret i16 %res } @@ -296,15 +299,16 @@ define i128 @f22(ptr %ptr) { } ; Truncation to i8. -define i8 @f23(i128 %a) { +define i8 @f23(i128 %a, i128 %b) { ; CHECK-LABEL: f23: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i8 ret i8 %res } @@ -385,15 +389,16 @@ define i128 @f28(ptr %ptr) { } ; Truncation to i1. -define i1 @f29(i128 %a) { +define i1 @f29(i128 %a, i128 %b) { ; CHECK-LABEL: f29: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i1 ret i1 %res } diff --git a/llvm/test/CodeGen/SystemZ/int-conv-15.ll b/llvm/test/CodeGen/SystemZ/int-conv-15.ll index 0d8ee75b10b85..f2c9ee5fa1f57 100644 --- a/llvm/test/CodeGen/SystemZ/int-conv-15.ll +++ b/llvm/test/CodeGen/SystemZ/int-conv-15.ll @@ -55,14 +55,15 @@ define i128 @f4(ptr %ptr) { } ; Truncation to i64. -define i64 @f5(i128 %a) { +define i64 @f5(i128 %a, i128 %b) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvg %r2, %v0, 1 ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i64 ret i64 %res } @@ -134,15 +135,16 @@ define i128 @f10(ptr %ptr) { } ; Truncation to i32. -define i32 @f11(i128 %a) { +define i32 @f11(i128 %a, i128 %b) { ; CHECK-LABEL: f11: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i32 ret i32 %res } @@ -215,15 +217,16 @@ define i128 @f16(ptr %ptr) { } ; Truncation to i16. -define i16 @f17(i128 %a) { +define i16 @f17(i128 %a, i128 %b) { ; CHECK-LABEL: f17: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i16 ret i16 %res } @@ -296,15 +299,16 @@ define i128 @f22(ptr %ptr) { } ; Truncation to i8. -define i8 @f23(i128 %a) { +define i8 @f23(i128 %a, i128 %b) { ; CHECK-LABEL: f23: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i8 ret i8 %res } @@ -383,15 +387,16 @@ define i128 @f28(ptr %ptr) { } ; Truncation to i1. -define i1 @f29(i128 %a) { +define i1 @f29(i128 %a, i128 %b) { ; CHECK-LABEL: f29: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 0(%r2), 3 -; CHECK-NEXT: vaq %v0, %v0, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r2), 3 +; CHECK-NEXT: vaq %v0, %v1, %v0 ; CHECK-NEXT: vlgvf %r2, %v0, 3 ; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d ; CHECK-NEXT: br %r14 - %op = add i128 %a, %a + %op = add i128 %a, %b %res = trunc i128 %op to i1 ret i1 %res }