-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAGCombiner] Set shift flags during visit. #91239
base: main
Are you sure you want to change the base?
Conversation
goldsteinn
commented
May 6, 2024
- [CodeGen] Regen some old tests; NFC
- [DAGCombiner] Set shift flags during visit.
This is basically a direct port of what we have in InstCombine. The goal is to reduce the need to essentially re-implement these checks whenever one of the flags needs to be tested. Leaving as draft as the diffs are pretty bad.
@llvm/pr-subscribers-backend-webassembly @llvm/pr-subscribers-backend-x86 Author: None (goldsteinn) Changes
Patch is 6.14 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91239.diff 393 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 05ab6e2e48206f..a182fbdde4f17b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9738,6 +9738,64 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
return SDValue();
}
+static SDValue setShiftFlags(SelectionDAG &DAG, const SDLoc &DL, SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ assert((Opc == ISD::SHL || Opc == ISD::SRA || Opc == ISD::SRL) &&
+ "Unknown shift opcode");
+ SDNodeFlags Flags = N->getFlags();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ // Check if we already have the flags.
+ if (Opc == ISD::SHL) {
+ if (Flags.hasNoSignedWrap() && Flags.hasNoUnsignedWrap())
+ return SDValue();
+
+ } else {
+ if (Flags.hasExact())
+ return SDValue();
+
+ // shr (shl X, Y), Y
+ if (sd_match(N0, m_Shl(m_Value(), m_Specific(N1)))) {
+ Flags.setExact(true);
+ return DAG.getNode(Opc, DL, VT, N0, N1, Flags);
+ }
+ }
+
+ // Compute what we know about shift count.
+ KnownBits KnownCnt = DAG.computeKnownBits(N1);
+ // Compute what we know about shift amt.
+ KnownBits KnownAmt = DAG.computeKnownBits(N0);
+ APInt MaxCnt = KnownCnt.getMaxValue();
+ bool Changed = false;
+ if (Opc == ISD::SHL) {
+ // If we have as many leading zeros than maximum shift cnt we have nuw.
+ if (!Flags.hasNoUnsignedWrap() &&
+ MaxCnt.ule(KnownAmt.countMinLeadingZeros())) {
+ Flags.setNoUnsignedWrap(true);
+ Changed = true;
+ }
+ // If we have more sign bits than maximum shift cnt we have nsw.
+ if (!Flags.hasNoSignedWrap()) {
+ if (MaxCnt.ult(KnownAmt.countMinSignBits()) ||
+ MaxCnt.ult(DAG.ComputeNumSignBits(N0))) {
+ Flags.setNoSignedWrap(true);
+ Changed = true;
+ }
+ }
+ } else {
+ // If we have at least as many trailing zeros as maximum count then we have
+ // exact.
+ Changed = MaxCnt.ule(KnownAmt.countMinTrailingZeros());
+ Flags.setExact(Changed);
+ }
+
+ if (Changed)
+ return DAG.getNode(Opc, DL, VT, N0, N1, Flags);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -9745,6 +9803,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return V;
SDLoc DL(N);
+ if (SDValue V = setShiftFlags(DAG, DL, N))
+ return V;
+
EVT VT = N0.getValueType();
EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -9895,7 +9956,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
LHSC.getZExtValue() <= RHSC.getZExtValue();
};
-
+
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
if (N0->getFlags().hasExact()) {
@@ -10188,6 +10249,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
return V;
SDLoc DL(N);
+ if (SDValue V = setShiftFlags(DAG, DL, N))
+ return V;
+
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -10389,6 +10453,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return V;
SDLoc DL(N);
+ if (SDValue V = setShiftFlags(DAG, DL, N))
+ return V;
EVT VT = N0.getValueType();
EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -10638,6 +10704,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+
+
SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
diff --git a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll
index 71f4da2b465c13..26f41f4d98c5cc 100644
--- a/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll
+++ b/llvm/test/CodeGen/AArch64/DAGCombine_vscale.ll
@@ -7,8 +7,8 @@
define <vscale x 4 x i32> @sext_inreg(<vscale x 4 x i32> %a) {
; CHECK-LABEL: sext_inreg:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sxth z0.s, p0/m, z0.s
+; CHECK-NEXT: lsl z0.s, z0.s, #16
+; CHECK-NEXT: asr z0.s, z0.s, #16
; CHECK-NEXT: ret
%in = insertelement <vscale x 4 x i32> undef, i32 16, i32 0
%splat = shufflevector <vscale x 4 x i32> %in, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
index d8280dadc550ea..da29a480959394 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -o - | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
@@ -5,13 +6,18 @@ target triple = "arm64-apple-macosx10.9"
; Check that sexts get promoted above adds.
define void @foo(ptr nocapture %a, i32 %i) {
+; CHECK-LABEL: foo:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: add x9, x8, #1
+; CHECK-NEXT: add x8, x8, #2
+; CHECK-NEXT: ldr w9, [x0, x9, lsl #2]
+; CHECK-NEXT: ldr w8, [x0, x8, lsl #2]
+; CHECK-NEXT: add w8, w8, w9
+; CHECK-NEXT: str w8, [x0, w1, sxtw #2]
+; CHECK-NEXT: ret
entry:
-; CHECK-LABEL: _foo:
-; CHECK: add
-; CHECK-NEXT: ldp
-; CHECK-NEXT: add
-; CHECK-NEXT: str
-; CHECK-NEXT: ret
%add = add nsw i32 %i, 1
%idxprom = sext i32 %add to i64
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %idxprom
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 20215fe9146924..fed1747c23e1c9 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -376,7 +376,7 @@ define i1 @uadd_add(i8 %a, i8 %b, ptr %p) {
; CHECK-NEXT: mov w8, #255 // =0xff
; CHECK-NEXT: bic w8, w8, w0
; CHECK-NEXT: add w8, w8, w1, uxtb
-; CHECK-NEXT: lsr w0, w8, #8
+; CHECK-NEXT: ubfx w0, w8, #8, #1
; CHECK-NEXT: add w8, w8, #1
; CHECK-NEXT: strb w8, [x2]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll b/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
index 81c3195584701c..033ac301d7abe1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-narrow-st-merge.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple aarch64 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple aarch64 -mattr=+strict-align -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-STRICT
@@ -7,6 +8,23 @@
; CHECK-STRICT: strh wzr
; CHECK-STRICT: strh wzr
define void @Strh_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strh_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: strh wzr, [x0, w1, sxtw #1]
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strh_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: strh wzr, [x0, w1, sxtw #1]
+; CHECK-STRICT-NEXT: add x8, x8, #1
+; CHECK-STRICT-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i16, ptr %P, i64 %idxprom
@@ -26,6 +44,31 @@ entry:
; CHECK-STRICT: strh wzr
; CHECK-STRICT: strh wzr
define void @Strh_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strh_zero_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: strh wzr, [x0, w1, sxtw #1]
+; CHECK-NEXT: add x9, x8, #1
+; CHECK-NEXT: add x10, x8, #2
+; CHECK-NEXT: add x8, x8, #3
+; CHECK-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x10, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strh_zero_4:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: strh wzr, [x0, w1, sxtw #1]
+; CHECK-STRICT-NEXT: add x9, x8, #1
+; CHECK-STRICT-NEXT: add x10, x8, #2
+; CHECK-STRICT-NEXT: add x8, x8, #3
+; CHECK-STRICT-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x10, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i16, ptr %P, i64 %idxprom
@@ -50,6 +93,23 @@ entry:
; CHECK-STRICT-LABEL: Strw_zero
; CHECK-STRICT: stp wzr, wzr
define void @Strw_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strw_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strw_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-STRICT-NEXT: add x8, x8, #1
+; CHECK-STRICT-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i32, ptr %P, i64 %idxprom
@@ -64,6 +124,23 @@ entry:
; CHECK-LABEL: Strw_zero_nonzero
; CHECK: stp wzr, w1
define void @Strw_zero_nonzero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strw_zero_nonzero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: str w1, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strw_zero_nonzero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-STRICT-NEXT: add x8, x8, #1
+; CHECK-STRICT-NEXT: str w1, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i32, ptr %P, i64 %idxprom
@@ -81,6 +158,31 @@ entry:
; CHECK-STRICT: stp wzr, wzr
; CHECK-STRICT: stp wzr, wzr
define void @Strw_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Strw_zero_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-NEXT: add x9, x8, #1
+; CHECK-NEXT: add x10, x8, #2
+; CHECK-NEXT: add x8, x8, #3
+; CHECK-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x10, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Strw_zero_4:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: str wzr, [x0, w1, sxtw #2]
+; CHECK-STRICT-NEXT: add x9, x8, #1
+; CHECK-STRICT-NEXT: add x10, x8, #2
+; CHECK-STRICT-NEXT: add x8, x8, #3
+; CHECK-STRICT-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x10, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%idxprom = sext i32 %n to i64
%arrayidx = getelementptr inbounds i32, ptr %P, i64 %idxprom
@@ -106,6 +208,18 @@ entry:
; CHECK-STRICT: sturb wzr
; CHECK-STRICT: sturb wzr
define void @Sturb_zero(ptr nocapture %P, i32 %n) #0 {
+; CHECK-LABEL: Sturb_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: add x8, x0, w1, sxtw
+; CHECK-NEXT: sturh wzr, [x8, #-2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturb_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: add x8, x0, w1, sxtw
+; CHECK-STRICT-NEXT: sturb wzr, [x8, #-2]
+; CHECK-STRICT-NEXT: sturb wzr, [x8, #-1]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -2
%idxprom = sext i32 %sub to i64
@@ -124,6 +238,25 @@ entry:
; CHECK-STRICT: sturh wzr
; CHECK-STRICT: sturh wzr
define void @Sturh_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturh_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: sub x9, x8, #2
+; CHECK-NEXT: sub x8, x8, #3
+; CHECK-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturh_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: sub x9, x8, #2
+; CHECK-STRICT-NEXT: sub x8, x8, #3
+; CHECK-STRICT-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -2
%idxprom = sext i32 %sub to i64
@@ -144,6 +277,33 @@ entry:
; CHECK-STRICT: sturh wzr
; CHECK-STRICT: sturh wzr
define void @Sturh_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturh_zero_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: sub x9, x8, #3
+; CHECK-NEXT: sub x10, x8, #4
+; CHECK-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT: sub x9, x8, #2
+; CHECK-NEXT: sub x8, x8, #1
+; CHECK-NEXT: strh wzr, [x0, x10, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturh_zero_4:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: sub x9, x8, #3
+; CHECK-STRICT-NEXT: sub x10, x8, #4
+; CHECK-STRICT-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT: sub x9, x8, #2
+; CHECK-STRICT-NEXT: sub x8, x8, #1
+; CHECK-STRICT-NEXT: strh wzr, [x0, x10, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x9, lsl #1]
+; CHECK-STRICT-NEXT: strh wzr, [x0, x8, lsl #1]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -3
%idxprom = sext i32 %sub to i64
@@ -169,6 +329,25 @@ entry:
; CHECK-STRICT-LABEL: Sturw_zero
; CHECK-STRICT: stp wzr, wzr
define void @Sturw_zero(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturw_zero:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: sub x9, x8, #3
+; CHECK-NEXT: sub x8, x8, #4
+; CHECK-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturw_zero:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: sub x9, x8, #3
+; CHECK-STRICT-NEXT: sub x8, x8, #4
+; CHECK-STRICT-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -3
%idxprom = sext i32 %sub to i64
@@ -187,6 +366,33 @@ entry:
; CHECK-STRICT: stp wzr, wzr
; CHECK-STRICT: stp wzr, wzr
define void @Sturw_zero_4(ptr nocapture %P, i32 %n) {
+; CHECK-LABEL: Sturw_zero_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: sxtw x8, w1
+; CHECK-NEXT: sub x9, x8, #3
+; CHECK-NEXT: sub x10, x8, #4
+; CHECK-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT: sub x9, x8, #2
+; CHECK-NEXT: sub x8, x8, #1
+; CHECK-NEXT: str wzr, [x0, x10, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-NEXT: ret
+;
+; CHECK-STRICT-LABEL: Sturw_zero_4:
+; CHECK-STRICT: // %bb.0: // %entry
+; CHECK-STRICT-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-STRICT-NEXT: sxtw x8, w1
+; CHECK-STRICT-NEXT: sub x9, x8, #3
+; CHECK-STRICT-NEXT: sub x10, x8, #4
+; CHECK-STRICT-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT: sub x9, x8, #2
+; CHECK-STRICT-NEXT: sub x8, x8, #1
+; CHECK-STRICT-NEXT: str wzr, [x0, x10, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x9, lsl #2]
+; CHECK-STRICT-NEXT: str wzr, [x0, x8, lsl #2]
+; CHECK-STRICT-NEXT: ret
entry:
%sub = add nsw i32 %n, -3
%idxprom = sext i32 %sub to i64
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index f548a0e01feee6..19c0c8940b92b3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -27,8 +27,8 @@ entry:
define i32 @test_rev_w_srl16(i16 %a) {
; CHECK-SD-LABEL: test_rev_w_srl16:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: rev w8, w0
-; CHECK-SD-NEXT: lsr w0, w8, #16
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: rev16 w0, w8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_rev_w_srl16:
@@ -45,12 +45,18 @@ entry:
}
define i32 @test_rev_w_srl16_load(ptr %a) {
-; CHECK-LABEL: test_rev_w_srl16_load:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: rev w8, w8
-; CHECK-NEXT: lsr w0, w8, #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_rev_w_srl16_load:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldrh w8, [x0]
+; CHECK-SD-NEXT: rev16 w0, w8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_rev_w_srl16_load:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldrh w8, [x0]
+; CHECK-GI-NEXT: rev w8, w8
+; CHECK-GI-NEXT: lsr w0, w8, #16
+; CHECK-GI-NEXT: ret
entry:
%0 = load i16, ptr %a
%1 = zext i16 %0 to i32
@@ -88,9 +94,8 @@ entry:
define i64 @test_rev_x_srl32(i32 %a) {
; CHECK-SD-LABEL: test_rev_x_srl32:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-SD-NEXT: rev x8, x0
-; CHECK-SD-NEXT: lsr x0, x8, #32
+; CHECK-SD-NEXT: mov w8, w0
+; CHECK-SD-NEXT: rev32 x0, x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_rev_x_srl32:
@@ -107,12 +112,18 @@ entry:
}
define i64 @test_rev_x_srl32_load(ptr %a) {
-; CHECK-LABEL: test_rev_x_srl32_load:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: rev x8, x8
-; CHECK-NEXT: lsr x0, x8, #32
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_rev_x_srl32_load:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr w8, [x0]
+; CHECK-SD-NEXT: rev32 x0, x8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_rev_x_srl32_load:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: rev x8, x8
+; CHECK-GI-NEXT: lsr x0, x8, #32
+; CHECK-GI-NEXT: ret
entry:
%0 = load i32, ptr %a
%1 = zext i32 %0 to i64
diff --git a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
index cd47fff46729f9..31a649ad64f448 100644
--- a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
@@ -20,10 +20,10 @@ define void @fct32(i32 %arg, i64 %var) {
; CHECK-LABEL: fct32:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: adrp x8, :got:zptr32
+; CHECK-NEXT: sub w9, w0, #1
; CHECK-NEXT: ldr x8, [x8, :got_lo12:zptr32]
; CHECK-NEXT: ldr x8, [x8]
-; CHECK-NEXT: add x8, x8, w0, sxtw #2
-; CHECK-NEXT: stur w1, [x8, #-4]
+; CHECK-NEXT: str w1, [x8, w9, sxtw #2]
; CHECK-NEXT: ret
bb:
%.pre37 = load ptr, ptr @zptr32, align 8
@@ -39,10 +39,10 @@ define void @fct16(i32 %arg, i64 %var) {
; CHECK-LABEL: fct16:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: adrp x8, :got:zptr16
+; CHECK-NEXT...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff 1241e7692a466ceb420be2780f1c3e8bbab7d469 2963c08192399286c138cb7e4d645e43e0476f02 -- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a182fbdde4..1a11d95278 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10704,8 +10704,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
-
-
SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
|
Please can you pull out the regenerations into their own PR? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All the AMDGPU test changes look like regressions
@@ -487,7 +487,8 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(ptr addrspace(1) % | |||
; VI-NEXT: s_waitcnt vmcnt(0) | |||
; VI-NEXT: v_mov_b32_e32 v0, s0 | |||
; VI-NEXT: v_mov_b32_e32 v1, s1 | |||
; VI-NEXT: v_and_b32_e32 v3, 0xffff0000, v2 | |||
; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Everything here looks like a regression
@@ -81,7 +81,8 @@ define amdgpu_ps float @v_test_cvt_f32_bf16_v(float %src) { | |||
; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 | |||
; GCN-NEXT: s_nop 1 | |||
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc | |||
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 | |||
; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More regression
; GCN-NEXT: s_and_b32 s2, s2, 0xffff | ||
; GCN-NEXT: s_lshl_b32 s3, s3, 16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression
@@ -618,6 +618,7 @@ define <2 x i16> @v_bswap_v2i16(<2 x i16> %src) { | |||
; SI-NEXT: v_bfi_b32 v1, s4, v1, v3 | |||
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 | |||
; SI-NEXT: v_alignbit_b32 v0, v1, v0, 16 | |||
; SI-NEXT: v_bfe_u32 v1, v1, 0, 16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression
@@ -14,6 +14,10 @@ define amdgpu_ps float @main(float %arg0, float %arg1) #0 { | |||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 | |||
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc | |||
; SI-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, s0, v0 | |||
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regression
Agreed, althought not really sure why all these folds only apply w.o the flags... |
see: #91250 |
I'm a bit surprised this code works. Shouldn't the new node have tried to CSE with the original node? I thought that would intersect the flags causing the new flags to be dropped. |
It doesn't work. It always CSEs the node and drops the flags. Because |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As noted, this doesn't work.
; CHECK: # %bb.0: | ||
; CHECK-NEXT: sllg %r0, %r2, 32 | ||
; CHECK-NEXT: srag %r2, %r0, 32 | ||
; CHECK-NEXT: cgibhe %r0, 0, 0(%r14) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All the actual changes in the SystemZ tests look to be clear regressions. It doesn't appear to recognize the "abs" patterns with sign extension and more.
I see, any idea for how to create a flag-setting only combine? |
Call |