[Peephole] rewrite INSERT_SUBREG to SUBREG_TO_REG if upper bits zero

vfdff · vfdff · commit b6655333c255 · 2022-09-09T09:00:54.000+08:00
Restrict the 32-bit form of an instruction of integer as too many test cases will be clobber as the register number updated. From %reg = INSERT_SUBREG %reg, %subreg, subidx To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx Try to prefix the redundant mov instruction at D132325 as the SUBREG_TO_REG should not generate code. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D132939
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -32,6 +32,9 @@
 //    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
 //    operand are set to zero.
 //
+// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
+//     ==> %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
+//
 //===----------------------------------------------------------------------===//
 
 #include "AArch64ExpandImm.h"
@@ -97,6 +100,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
   template <typename T>
   bool visitAND(unsigned Opc, MachineInstr &MI);
   bool visitORR(MachineInstr &MI);
+  bool visitINSERT(MachineInstr &MI);
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   StringRef getPassName() const override {
@@ -250,6 +254,50 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
   return true;
 }
 
+bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
+  // Check this INSERT_SUBREG comes from below zero-extend pattern.
+  //
+  // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
+  // To   %reg:subidx =  SUBREG_TO_REG 0, %subreg, subidx
+  //
+  // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
+  // COPY would destroy the upper part of the register anyway
+  if (!MI.isRegTiedToDefOperand(1))
+    return false;
+
+  Register DstReg = MI.getOperand(0).getReg();
+  const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
+  MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
+  if (!SrcMI)
+    return false;
+
+  // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
+  //
+  // When you use the 32-bit form of an instruction, the upper 32 bits of the
+  // source registers are ignored and the upper 32 bits of the destination
+  // register are set to zero.
+  //
+  // If AArch64's 32-bit form of instruction defines the source operand of
+  // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
+  // real AArch64 instruction and if it is not, do not process the opcode
+  // conservatively.
+  if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
+      !AArch64::GPR64allRegClass.hasSubClassEq(RC))
+    return false;
+
+  // Build a SUBREG_TO_REG instruction
+  MachineInstr *SubregMI =
+      BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+              TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
+          .addImm(0)
+          .add(MI.getOperand(2))
+          .add(MI.getOperand(3));
+  LLVM_DEBUG(dbgs() << MI << "  replace by:\n: " << *SubregMI << "\n");
+  MI.eraseFromParent();
+
+  return true;
+}
+
 template <typename T>
 static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
   // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
@@ -493,6 +541,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
       switch (MI.getOpcode()) {
       default:
         break;
+      case AArch64::INSERT_SUBREG:
+        Changed = visitINSERT(MI);
+        break;
       case AArch64::ANDWrr:
         Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
         break;
diff --git a/llvm/test/CodeGen/AArch64/peephole-insert-subreg.mir b/llvm/test/CodeGen/AArch64/peephole-insert-subreg.mir
@@ -0,0 +1,47 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
+
+--- |
+  define i64 @loop2(i32 noundef %width) {
+  entry:
+    %add = add i32 %width, -1
+    %zext = zext i32 %add to i64
+    %shl = shl nuw nsw i64 %zext, 1
+    ret i64 %shl
+  }
+
+...
+---
+---
+name:            loop2
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr32common, preferred-register: '' }
+  - { id: 1, class: gpr32common, preferred-register: '' }
+  - { id: 2, class: gpr64, preferred-register: '' }
+  - { id: 3, class: gpr64all, preferred-register: '' }
+  - { id: 4, class: gpr64, preferred-register: '' }
+liveins:
+  - { reg: '$w0', virtual-reg: '%0' }
+body: |
+  bb.0.entry:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: loop2
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
+    ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 1, 0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
+    ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[SUBWri]], %subreg.sub_32
+    ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = nuw nsw UBFMXri killed [[SUBREG_TO_REG]], 63, 31
+    ; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:gpr32common = COPY $w0
+    %1:gpr32common = SUBWri %0, 1, 0
+    %3:gpr64all = IMPLICIT_DEF
+    %2:gpr64 = INSERT_SUBREG %3, killed %1, %subreg.sub_32
+    %4:gpr64 = nuw nsw UBFMXri killed %2, 63, 31
+    $x0 = COPY %4
+    RET_ReallyLR implicit $x0