Use utxb rN, rM, ror #8 to zext from shifts on armv6#197352
Use utxb rN, rM, ror #8 to zext from shifts on armv6#197352mike-goutokuji wants to merge 2 commits into
Conversation
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-selectiondag Author: LumioseSil (LumioseSil) ChangesFull diff: https://github.com/llvm/llvm-project/pull/197352.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 14bf2b704c4da..1a8d0067916c0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11349,6 +11349,18 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
Shift);
return DAG.getNode(ISD::SIGN_EXTEND, DL,
N->getValueType(0), Trunc);
+ } else if ((ShiftAmt > 0) &&
+ (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, TruncVT))) {
+ // Only if the truncate+sext path above does not apply: same
+ // sign-extract value as sign_extend(truncate(srl ...)), but represented
+ // as sign_extend_inreg(srl ...) when truncate is not free. Guard
+ // matches visitSIGN_EXTEND's (sext(trunc)) -> sign_extend_inreg fold.
+ SDValue Amt = DAG.getShiftAmountConstant(ShiftAmt, VT, DL);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
+ N0.getOperand(0), Amt);
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, N->getValueType(0),
+ Shift, DAG.getValueType(TruncVT));
}
}
}
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index c74c84a2602b1..8145f2e19fc60 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -3941,6 +3941,15 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)),
(UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
(UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+
+// Standalone (srl; and #0xff) is equivalent to uxtb with ROR — avoids a
+// separate logical shift when extracting bytes from a word (e.g. popcount
+// LUT indexing).
+def : ARMV6Pat<(and (srl GPRnopc:$Rm, rot_imm:$rot), 0xFF),
+ (UXTB GPRnopc:$Rm, rot_imm:$rot)>;
+// Same for 16-bit zero-extract: (srl; and #0xffff) -> uxth with ROR.
+def : ARMV6Pat<(and (srl GPRnopc:$Rm, rot_imm:$rot), 0xFFFF),
+ (UXTH GPRnopc:$Rm, rot_imm:$rot)>;
}
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
@@ -6319,6 +6328,10 @@ def : ARMV6Pat<(add GPR:$Rn, (and GPR:$Rm, 0xFFFF)),
def : ARMV6Pat<(sext_inreg GPR:$Src, i8), (SXTB GPR:$Src, 0)>;
def : ARMV6Pat<(sext_inreg GPR:$Src, i16), (SXTH GPR:$Src, 0)>;
+def : ARMV6Pat<(sext_inreg (srl GPRnopc:$Rm, rot_imm:$rot), i8),
+ (SXTB GPRnopc:$Rm, rot_imm:$rot)>;
+def : ARMV6Pat<(sext_inreg (srl GPRnopc:$Rm, rot_imm:$rot), i16),
+ (SXTH GPRnopc:$Rm, rot_imm:$rot)>;
def : ARMV6Pat<(add GPR:$Rn, (sext_inreg GPRnopc:$Rm, i8)),
(SXTAB GPR:$Rn, GPRnopc:$Rm, 0)>;
diff --git a/llvm/test/CodeGen/ARM/extract-ext-armv6.ll b/llvm/test/CodeGen/ARM/extract-ext-armv6.ll
new file mode 100644
index 0000000000000..d6ae1759ebe3a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/extract-ext-armv6.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=armv6-none-eabi -O3 %s -o - | FileCheck %s
+
+define i32 @zext_u8_from_shift8(i32 %0) {
+; CHECK-LABEL: zext_u8_from_shift8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: uxtb r0, r0, ror #8
+; CHECK-NEXT: bx lr
+ %2 = lshr i32 %0, 8
+ %3 = and i32 %2, 255
+ ret i32 %3
+}
+
+define i32 @zext_u8_from_shift16(i32 %0) {
+; CHECK-LABEL: zext_u8_from_shift16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: uxtb r0, r0, ror #16
+; CHECK-NEXT: bx lr
+ %2 = lshr i32 %0, 16
+ %3 = and i32 %2, 255
+ ret i32 %3
+}
+
+define i32 @sext_i8_from_bits8(i32 %0) {
+; CHECK-LABEL: sext_i8_from_bits8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: sxtb r0, r0, ror #8
+; CHECK-NEXT: bx lr
+ %2 = shl i32 %0, 16
+ %3 = ashr i32 %2, 24
+ ret i32 %3
+}
+
+define i32 @sext_i8_from_bits16(i32 %0) {
+; CHECK-LABEL: sext_i8_from_bits16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: sxtb r0, r0, ror #16
+; CHECK-NEXT: bx lr
+ %2 = shl i32 %0, 8
+ %3 = ashr i32 %2, 24
+ ret i32 %3
+}
+
+define i32 @zext_u16_from_shift8(i32 %0) {
+; CHECK-LABEL: zext_u16_from_shift8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: uxth r0, r0, ror #8
+; CHECK-NEXT: bx lr
+ %2 = lshr i32 %0, 8
+ %3 = and i32 %2, 65535
+ ret i32 %3
+}
+
+define i32 @sext_i16_from_shift8(i32 %0) {
+; CHECK-LABEL: sext_i16_from_shift8:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: sxth r0, r0, ror #8
+; CHECK-NEXT: bx lr
+ %2 = shl i32 %0, 8
+ %3 = ashr i32 %2, 16
+ ret i32 %3
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/RISCV/short-forward-branch-opt.llIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
🪟 Windows x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/RISCV/short-forward-branch-opt.llIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
75523ee to
c9981b3
Compare
| @@ -0,0 +1,62 @@ | |||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 | |||
| ; RUN: llc -mtriple=armv6-none-eabi -O3 %s -o - | FileCheck %s | |||
There was a problem hiding this comment.
| ; RUN: llc -mtriple=armv6-none-eabi -O3 %s -o - | FileCheck %s | |
| ; RUN: llc -mtriple=armv6-none-eabi < %s | FileCheck %s |
| ; CHECK: @ %bb.0: | ||
| ; CHECK-NEXT: sxtb r0, r0, ror #16 | ||
| ; CHECK-NEXT: bx lr | ||
| %2 = shl i32 %0, 8 |
| if ((ShiftAmt > 0) && | ||
| TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, TruncVT) == | ||
| TargetLowering::Legal) { |
There was a problem hiding this comment.
| if ((ShiftAmt > 0) && | |
| TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, TruncVT) == | |
| TargetLowering::Legal) { | |
| if (ShiftAmt > 0 && | |
| TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, TruncVT)) { |
There was a problem hiding this comment.
Does not do the same thing. Checks if type is legal, not just operation.
a5c5246 to
5c7cb38
Compare
6ab3f67 to
cd2ab13
Compare
c251778 to
15110be
Compare
Mitigate changes in RISCV
|
I know this in theory can be in seldag but because it's only having a benefit in ARM and causing issues elsewhere, I'm probably going to have to move this to arm backend for now... |
No description provided.