[X86FixupLEAs] Turn optIncDec into a generic two address LEA optimize…

…r. Support LEA64_32r properly. INC/DEC is really a special case of a more generic issue. We should also turn leas into add reg/reg or add reg/imm regardless of the slow lea flags. This also supports LEA64_32 which has 64 bit input registers and 32 bit output registers. So we need to convert the 64 bit inputs to their 32 bit equivalents to check if they are equal to base reg. One thing to note, the original code preserved the kill flags by adding operands to the new instruction instead of using addReg. But I think tied operands aren't supposed to have the kill flag set. I dropped the kill flags, but I could probably try to preserve it in the add reg/reg case if we think its important. Not sure which operand its supposed to go on for the LEA64_32r instruction due to the super reg implicit uses. Though I'm also not sure those are needed since they were probably just created by an INSERT_SUBREG from a 32-bit input. Differential Revision: https://reviews.llvm.org/D61472 llvm-svn: 361691
llvm · May 25, 2019 · 46e5052 · 46e5052
1 parent 4b08fcd
commit 46e5052
Show file tree

Hide file tree

Showing 31 changed files with 176 additions and 132 deletions.
diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -8,7 +8,7 @@
 //
 // This file defines the pass that finds instructions that can be
 // re-written as LEA instructions in order to reduce pipeline delays.
-// When optimizing for size it replaces suitable LEAs with INC or DEC.
+// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
 //
 //===----------------------------------------------------------------------===//
 
@@ -70,10 +70,11 @@ class FixupLEAPass : public MachineFunctionPass {
   MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
                                           MachineBasicBlock &MBB);
 
-  /// Look for LEAs that add 1 to reg or subtract 1 from reg
-  /// and convert them to INC or DEC respectively.
-  bool fixupIncDec(MachineBasicBlock::iterator &I,
-                   MachineBasicBlock &MBB) const;
+  /// Look for LEAs that are really two address LEAs that we might be able to
+  /// turn into regular ADD instructions.
+  bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
+                     MachineBasicBlock &MBB, bool OptIncDec,
+                     bool UseLEAForSP) const;
 
   /// Determine if an instruction references a machine register
   /// and, if so, whether it reads or writes the register.
@@ -114,7 +115,8 @@ class FixupLEAPass : public MachineFunctionPass {
 
 private:
   TargetSchedModel TSM;
-  const X86InstrInfo *TII; // Machine instruction info.
+  const X86InstrInfo *TII;
+  const X86RegisterInfo *TRI;
 };
 }
 
@@ -197,13 +199,11 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
   bool LEAUsesAG = ST.LEAusesAG();
 
   bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
-  bool OptLEA = LEAUsesAG || IsSlowLEA || IsSlow3OpsLEA;
-
-  if (!OptLEA && !OptIncDec)
-    return false;
+  bool UseLEAForSP = ST.useLeaForSP();
 
   TSM.init(&ST);
   TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
 
   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
   for (MachineBasicBlock &MBB : MF) {
@@ -212,7 +212,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
       if (!isLEA(I->getOpcode()))
         continue;
 
-      if (OptIncDec && fixupIncDec(I, MBB))
+      if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
         continue;
 
       if (IsSlowLEA) {
@@ -323,8 +323,8 @@ static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
   default:
     llvm_unreachable("Unexpected LEA instruction");
   case X86::LEA32r:
-    return X86::ADD32rr;
   case X86::LEA64_32r:
+    return X86::ADD32rr;
   case X86::LEA64r:
     return X86::ADD64rr;
   }
@@ -344,48 +344,106 @@ static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
   }
 }
 
-/// isLEASimpleIncOrDec - Does this LEA have one these forms:
-/// lea  %reg, 1(%reg)
-/// lea  %reg, -1(%reg)
-static inline bool isLEASimpleIncOrDec(MachineInstr &LEA) {
-  unsigned SrcReg = LEA.getOperand(1 + X86::AddrBaseReg).getReg();
-  unsigned DstReg = LEA.getOperand(0).getReg();
-  const MachineOperand &AddrDisp = LEA.getOperand(1 + X86::AddrDisp);
-  return SrcReg == DstReg &&
-         LEA.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
-         LEA.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
-         AddrDisp.isImm() &&
-         (AddrDisp.getImm() == 1 || AddrDisp.getImm() == -1);
+static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
+  switch (LEAOpcode) {
+  default:
+    llvm_unreachable("Unexpected LEA instruction");
+  case X86::LEA32r:
+  case X86::LEA64_32r:
+    return IsINC ? X86::INC32r : X86::DEC32r;
+  case X86::LEA64r:
+    return IsINC ? X86::INC64r : X86::DEC64r;
+  }
 }
 
-bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
-                               MachineBasicBlock &MBB) const {
+bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
+                                 MachineBasicBlock &MBB, bool OptIncDec,
+                                 bool UseLEAForSP) const {
   MachineInstr &MI = *I;
 
-  if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(MBB, I)) {
-    unsigned NewOpcode;
-    bool isINC = MI.getOperand(1 + X86::AddrDisp).getImm() == 1;
-    switch (MI.getOpcode()) {
-    default:
-      llvm_unreachable("Unexpected LEA instruction");
-    case X86::LEA32r:
-    case X86::LEA64_32r:
-      NewOpcode = isINC ? X86::INC32r : X86::DEC32r;
-      break;
-    case X86::LEA64r:
-      NewOpcode = isINC ? X86::INC64r : X86::DEC64r;
-      break;
-    }
+  const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
+  const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
+  const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
+  const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
+  const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
 
-    MachineInstr *NewMI =
-        BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode))
-            .add(MI.getOperand(0))
-            .add(MI.getOperand(1 + X86::AddrBaseReg));
-    MBB.erase(I);
-    I = static_cast<MachineBasicBlock::iterator>(NewMI);
-    return true;
+  if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
+      !TII->isSafeToClobberEFLAGS(MBB, I))
+    return false;
+
+  unsigned DestReg  = MI.getOperand(0).getReg();
+  unsigned BaseReg  = Base.getReg();
+  unsigned IndexReg = Index.getReg();
+
+  // Don't change stack adjustment LEAs.
+  if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
+    return false;
+
+  // LEA64_32 has 64-bit operands but 32-bit result.
+  if (MI.getOpcode() == X86::LEA64_32r) {
+    if (BaseReg != 0)
+      BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
+    if (IndexReg != 0)
+      IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
   }
-  return false;
+
+  MachineInstr *NewMI = nullptr;
+
+  // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
+  // which can be turned into add %reg2, %reg1
+  if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
+      (DestReg == BaseReg || DestReg == IndexReg)) {
+    unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
+    if (DestReg != BaseReg)
+      std::swap(BaseReg, IndexReg);
+
+    if (MI.getOpcode() == X86::LEA64_32r) {
+      // TODO: Do we need the super register implicit use?
+      NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+        .addReg(BaseReg).addReg(IndexReg)
+        .addReg(Base.getReg(), RegState::Implicit)
+        .addReg(Index.getReg(), RegState::Implicit);
+    } else {
+      NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+        .addReg(BaseReg).addReg(IndexReg);
+    }
+  } else if (DestReg == BaseReg && IndexReg == 0) {
+    // This is an LEA with only a base register and a displacement,
+    // We can use ADDri or INC/DEC.
+
+    // Does this LEA have one these forms:
+    // lea  %reg, 1(%reg)
+    // lea  %reg, -1(%reg)
+    if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
+      bool IsINC = Disp.getImm() == 1;
+      unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
+
+      if (MI.getOpcode() == X86::LEA64_32r) {
+        // TODO: Do we need the super register implicit use?
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
+      } else {
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg);
+      }
+    } else {
+      unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
+      if (MI.getOpcode() == X86::LEA64_32r) {
+        // TODO: Do we need the super register implicit use?
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg).addImm(Disp.getImm())
+          .addReg(Base.getReg(), RegState::Implicit);
+      } else {
+        NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
+          .addReg(BaseReg).addImm(Disp.getImm());
+      }
+    }
+  } else
+    return false;
+
+  MBB.erase(I);
+  I = NewMI;
+  return true;
 }
 
 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,

diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll b/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll
@@ -79,7 +79,7 @@ define i8* @gep8(i32 %i, i8* %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addl $5, %edi
 ; CHECK-NEXT:    movslq %edi, %rax
-; CHECK-NEXT:    leaq (%rsi,%rax), %rax
+; CHECK-NEXT:    addq %rsi, %rax
 ; CHECK-NEXT:    retq
 
   %add = add nsw i32 %i, 5
@@ -166,16 +166,16 @@ define void @PR20134(i32* %a, i32 %i) {
 ; CHECK-NEXT:    cltq
 ; CHECK-NEXT:    movq $4, %rcx
 ; CHECK-NEXT:    imulq %rcx, %rax
-; CHECK-NEXT:    leaq (%rdi,%rax), %rax
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    leal 2(%rsi), %edx
 ; CHECK-NEXT:    movslq %edx, %rdx
 ; CHECK-NEXT:    imulq %rcx, %rdx
-; CHECK-NEXT:    leaq (%rdi,%rdx), %rdx
+; CHECK-NEXT:    addq %rdi, %rdx
 ; CHECK-NEXT:    movl (%rdx), %edx
 ; CHECK-NEXT:    addl (%rax), %edx
 ; CHECK-NEXT:    movslq %esi, %rax
 ; CHECK-NEXT:    imulq %rcx, %rax
-; CHECK-NEXT:    leaq (%rdi,%rax), %rax
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    movl %edx, (%rax)
 ; CHECK-NEXT:    retq
 
@@ -204,10 +204,10 @@ define void @PR20134_zext(i32* %a, i32 %i) {
 ; CHECK-NEXT:    leal 1(%rsi), %eax
 ; CHECK-NEXT:    movq $4, %rcx
 ; CHECK-NEXT:    imulq %rcx, %rax
-; CHECK-NEXT:    leaq (%rdi,%rax), %rax
+; CHECK-NEXT:    addq %rdi, %rax
 ; CHECK-NEXT:    leal 2(%rsi), %edx
 ; CHECK-NEXT:    imulq %rcx, %rdx
-; CHECK-NEXT:    leaq (%rdi,%rdx), %rdx
+; CHECK-NEXT:    addq %rdi, %rdx
 ; CHECK-NEXT:    movl (%rdx), %edx
 ; CHECK-NEXT:    addl (%rax), %edx
 ; CHECK-NEXT:    imulq %rcx, %rsi

diff --git a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
@@ -409,7 +409,7 @@ define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) {
 ; X32-NEXT:    movl 4(%ecx), %ecx
 ; X32-NEXT:    movl %eax, (%esp)
 ; X32-NEXT:    movl $4, %eax
-; X32-NEXT:    leal (%esp,%eax), %eax
+; X32-NEXT:    addl %esp, %eax
 ; X32-NEXT:    movl %edx, {{[0-9]+}}(%esp)
 ; X32-NEXT:    movl %ecx, 4(%eax)
 ; X32-NEXT:    calll variadic_callee

diff --git a/llvm/test/CodeGen/X86/GlobalISel/gep.ll b/llvm/test/CodeGen/X86/GlobalISel/gep.ll
@@ -12,7 +12,7 @@ define i32* @test_gep_i8(i32 *%arr, i8 %ind) {
 ; X64_GISEL-NEXT:    sarq %cl, %rsi
 ; X64_GISEL-NEXT:    movq $4, %rax
 ; X64_GISEL-NEXT:    imulq %rsi, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i8:
@@ -29,7 +29,7 @@ define i32* @test_gep_i8_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i8_const:
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $80, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i8_const:
@@ -50,7 +50,7 @@ define i32* @test_gep_i16(i32 *%arr, i16 %ind) {
 ; X64_GISEL-NEXT:    sarq %cl, %rsi
 ; X64_GISEL-NEXT:    movq $4, %rax
 ; X64_GISEL-NEXT:    imulq %rsi, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i16:
@@ -67,7 +67,7 @@ define i32* @test_gep_i16_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i16_const:
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $80, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i16_const:
@@ -100,7 +100,7 @@ define i32* @test_gep_i32_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i32_const:
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $20, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i32_const:
@@ -116,7 +116,7 @@ define i32* @test_gep_i64(i32 *%arr, i64 %ind) {
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $4, %rax
 ; X64_GISEL-NEXT:    imulq %rsi, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i64:
@@ -131,7 +131,7 @@ define i32* @test_gep_i64_const(i32 *%arr) {
 ; X64_GISEL-LABEL: test_gep_i64_const:
 ; X64_GISEL:       # %bb.0:
 ; X64_GISEL-NEXT:    movq $20, %rax
-; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
+; X64_GISEL-NEXT:    addq %rdi, %rax
 ; X64_GISEL-NEXT:    retq
 ;
 ; X64-LABEL: test_gep_i64_const:

diff --git a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll
@@ -181,7 +181,7 @@ define i32 @test_gep_folding_largeGepIndex(i32* %arr, i32 %val) {
 ; ALL-LABEL: test_gep_folding_largeGepIndex:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    movabsq $228719476720, %rax # imm = 0x3540BE3FF0
-; ALL-NEXT:    leaq (%rdi,%rax), %rax
+; ALL-NEXT:    addq %rdi, %rax
 ; ALL-NEXT:    movl %esi, (%rax)
 ; ALL-NEXT:    movl (%rax), %eax
 ; ALL-NEXT:    retq

diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -632,7 +632,7 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
 ; BWON-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; BWON-NEXT:    movsbq (%rdi,%rcx), %rax
 ; BWON-NEXT:    movzbl (%rdx,%rax), %r9d
-; BWON-NEXT:    leal 1(%rax), %eax
+; BWON-NEXT:    incl %eax
 ; BWON-NEXT:    movsbq %al, %rax
 ; BWON-NEXT:    movzbl (%rdx,%rax), %eax
 ; BWON-NEXT:    movb %r9b, (%rsi,%rcx,2)
@@ -651,7 +651,7 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
 ; BWOFF-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
 ; BWOFF-NEXT:    movsbq (%rdi,%rcx), %rax
 ; BWOFF-NEXT:    movb (%rdx,%rax), %r9b
-; BWOFF-NEXT:    leal 1(%rax), %eax
+; BWOFF-NEXT:    incl %eax
 ; BWOFF-NEXT:    movsbq %al, %rax
 ; BWOFF-NEXT:    movb (%rdx,%rax), %al
 ; BWOFF-NEXT:    movb %r9b, (%rsi,%rcx,2)

diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -772,7 +772,7 @@ define i64 @load_fold_sdiv1(i64* %p) {
 ; CHECK-O3-NEXT:    movq %rdx, %rax
 ; CHECK-O3-NEXT:    shrq $63, %rax
 ; CHECK-O3-NEXT:    sarq $3, %rdx
-; CHECK-O3-NEXT:    leaq (%rdx,%rax), %rax
+; CHECK-O3-NEXT:    addq %rdx, %rax
 ; CHECK-O3-NEXT:    retq
   %v = load atomic i64, i64* %p unordered, align 8
   %ret = sdiv i64 %v, 15