[AArch64][GlobalISel] Fold shifts into G_ICMP

Since G_ICMP can be selected to a SUBS, we can fold shifts into such compares. E.g. ``` cmp w1, w0, lsl #3 cmp w1, w0, lsr #3 cmp w1, w0, asr #3 ``` This is done the same way as for adds and subtracts, using `selectShiftedRegister`. This gives some minor code size savings on CTMark. https://reviews.llvm.org/D79365
llvm · May 6, 2020 · b1b86d1 · b1b86d1
1 parent 17fc651
commit b1b86d1
Show file tree

Hide file tree

Showing 2 changed files with 813 additions and 0 deletions.
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -309,6 +309,9 @@ class AArch64InstructionSelector : public InstructionSelector {
                                                MachineOperand &RHS,
                                                MachineOperand &Predicate,
                                                MachineIRBuilder &MIB) const;
+  MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
+                                          MachineOperand &RHS,
+                                          MachineIRBuilder &MIB) const;
 
   /// Return true if \p MI is a load or store of \p NumBytes bytes.
   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
@@ -3710,6 +3713,12 @@ AArch64InstructionSelector::emitIntegerCompare(
   if (ImmedCmp)
     return {ImmedCmp, (CmpInst::Predicate)Predicate.getPredicate()};
 
+  // If we don't have an immediate, we may have a shift which can be folded
+  // into the compare.
+  MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
+  if (ShiftedCmp)
+    return {ShiftedCmp, (CmpInst::Predicate)Predicate.getPredicate()};
+
   auto CmpMI =
       MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
   // Make sure that we can constrain the compare that we emitted.
@@ -4142,6 +4151,35 @@ MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
   return &*CmpMI;
 }
 
+MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
+    MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const {
+  // We are looking for the following pattern:
+  //
+  // shift = G_SHL/ASHR/LHSR y, c
+  // ...
+  // cmp = G_ICMP pred, something, shift
+  //
+  // Since we will select the G_ICMP to a SUBS, we can potentially fold the
+  // shift into the subtract.
+  static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs};
+  static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR};
+  auto ImmFns = selectShiftedRegister(RHS);
+  if (!ImmFns)
+    return nullptr;
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+  auto Ty = MRI.getType(LHS.getReg());
+  assert(!Ty.isVector() && "Expected scalar or pointer only?");
+  unsigned Size = Ty.getSizeInBits();
+  bool Idx = (Size == 64);
+  Register ZReg = ZRegTable[Idx];
+  unsigned Opc = OpcTable[Idx];
+  auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
+  for (auto &RenderFn : *ImmFns)
+    RenderFn(CmpMI);
+  constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
+  return &*CmpMI;
+}
+
 bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const {
   // Try to match a vector splat operation into a dup instruction.
   // We're looking for this pattern: