[TwoAddressInstructionPass] Improve the SrcRegMap and DstRegMap compu…

…tation This patch contains following enhancements to SrcRegMap and DstRegMap: 1 In findOnlyInterestingUse not only check if the Reg is two address usage, but also check after commutation can it be two address usage. 2 If a physical register is clobbered, remove SrcRegMap entries that are mapped to it. 3 In processTiedPairs, when create a new COPY instruction, add a SrcRegMap entry only when the COPY instruction is coalescable. (The COPY src is killed) With these enhancements isProfitableToCommute can do better commute decision, and finally more register copies are removed. Differential Revision: https://reviews.llvm.org/D108731
llvm · Oct 11, 2021 · 6599961 · 6599961
1 parent c3dcf39
commit 6599961
Show file tree

Hide file tree

Showing 132 changed files with 2,894 additions and 3,163 deletions.
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -118,6 +118,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   // registers. e.g. r1 = move v1024.
   DenseMap<Register, Register> DstRegMap;
 
+  void removeClobberedSrcRegMap(MachineInstr *MI);
+
   bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);
 
   bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef);
@@ -380,7 +382,8 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
   if (!MRI->hasOneNonDBGUse(Reg))
     // None or more than one use.
     return nullptr;
-  MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
+  MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg);
+  MachineInstr &UseMI = *UseOp.getParent();
   if (UseMI.getParent() != MBB)
     return nullptr;
   Register SrcReg;
@@ -394,6 +397,18 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
     IsDstPhys = DstReg.isPhysical();
     return &UseMI;
   }
+  if (UseMI.isCommutable()) {
+    unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
+    unsigned Src2 = UseMI.getOperandNo(&UseOp);
+    if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
+      MachineOperand &MO = UseMI.getOperand(Src1);
+      if (MO.isReg() && MO.isUse() &&
+          isTwoAddrUse(UseMI, MO.getReg(), DstReg)) {
+        IsDstPhys = DstReg.isPhysical();
+        return &UseMI;
+      }
+    }
+  }
   return nullptr;
 }
 
@@ -422,6 +437,76 @@ static bool regsAreCompatible(Register RegA, Register RegB,
   return TRI->regsOverlap(RegA, RegB);
 }
 
+/// From RegMap remove entries mapped to a physical register which overlaps MO.
+static void removeMapRegEntry(const MachineOperand &MO,
+                              DenseMap<Register, Register> &RegMap,
+                              const TargetRegisterInfo *TRI) {
+  assert(
+      (MO.isReg() || MO.isRegMask()) &&
+      "removeMapRegEntry must be called with a register or regmask operand.");
+
+  SmallVector<Register, 2> Srcs;
+  for (auto SI : RegMap) {
+    Register ToReg = SI.second;
+    if (ToReg.isVirtual())
+      continue;
+
+    if (MO.isReg()) {
+      Register Reg = MO.getReg();
+      if (TRI->regsOverlap(ToReg, Reg))
+        Srcs.push_back(SI.first);
+    } else if (MO.clobbersPhysReg(ToReg))
+      Srcs.push_back(SI.first);
+  }
+
+  for (auto SrcReg : Srcs)
+    RegMap.erase(SrcReg);
+}
+
+/// If a physical register is clobbered, old entries mapped to it should be
+/// deleted. For example
+///
+///     %2:gr64 = COPY killed $rdx
+///     MUL64r %3:gr64, implicit-def $rax, implicit-def $rdx
+///
+/// After the MUL instruction, $rdx contains different value than in the COPY
+/// instruction. So %2 should not map to $rdx after MUL.
+void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
+  if (MI->isCopy()) {
+    // If a virtual register is copied to its mapped physical register, it
+    // doesn't change the potential coalescing between them, so we don't remove
+    // entries mapped to the physical register. For example
+    //
+    // %100 = COPY $r8
+    //     ...
+    // $r8  = COPY %100
+    //
+    // The first copy constructs SrcRegMap[%100] = $r8, the second copy doesn't
+    // destroy the content of $r8, and should not impact SrcRegMap.
+    Register Dst = MI->getOperand(0).getReg();
+    if (!Dst || Dst.isVirtual())
+      return;
+
+    Register Src = MI->getOperand(1).getReg();
+    if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap), TRI))
+      return;
+  }
+
+  for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isRegMask()) {
+      removeMapRegEntry(MO, SrcRegMap, TRI);
+      continue;
+    }
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    Register Reg = MO.getReg();
+    if (!Reg || Reg.isVirtual())
+      continue;
+    removeMapRegEntry(MO, SrcRegMap, TRI);
+  }
+}
+
 // Returns true if Reg is equal or aliased to at least one register in Set.
 static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,
                            const TargetRegisterInfo *TRI) {
@@ -670,9 +755,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
       VirtRegPairs.push_back(NewReg);
       break;
     }
-    bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
-    if (!isNew)
-      assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+    SrcRegMap[NewReg] = Reg;
     VirtRegPairs.push_back(NewReg);
     Reg = NewReg;
   }
@@ -1479,9 +1562,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
     // by SubRegB is compatible with RegA with no subregister. So regardless of
     // whether the dest oper writes a subreg, the source oper should not.
     MO.setSubReg(0);
-
-    // Propagate SrcRegMap.
-    SrcRegMap[RegA] = RegB;
   }
 
   if (AllUsesCopied) {
@@ -1513,6 +1593,9 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
       LV->addVirtualRegisterKilled(RegB, *PrevMI);
     }
 
+    if (RemovedKillFlag && ReplacedAllUntiedUses)
+      SrcRegMap[LastCopiedReg] = RegB;
+
     // Update LiveIntervals.
     if (LIS) {
       LiveInterval &LI = LIS->getInterval(RegB);
@@ -1599,6 +1682,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
       // First scan through all the tied register uses in this instruction
       // and record a list of pairs of tied operands for each register.
       if (!collectTiedOperands(&*mi, TiedOperands)) {
+        removeClobberedSrcRegMap(&*mi);
         mi = nmi;
         continue;
       }
@@ -1623,6 +1707,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
             // The tied operands have been eliminated or shifted further down
             // the block to ease elimination. Continue processing with 'nmi'.
             TiedOperands.clear();
+            removeClobberedSrcRegMap(&*mi);
             mi = nmi;
             continue;
           }
@@ -1652,6 +1737,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
       // Clear TiedOperands here instead of at the top of the loop
       // since most instructions do not have tied operands.
       TiedOperands.clear();
+      removeClobberedSrcRegMap(&*mi);
       mi = nmi;
     }
   }

diff --git a/llvm/test/CodeGen/ARM/ssat.ll b/llvm/test/CodeGen/ARM/ssat.ll
@@ -394,14 +394,14 @@ entry:
 define i32 @no_sat_incorrect_constant(i32 %x) #0 {
 ; V4T-LABEL: no_sat_incorrect_constant:
 ; V4T:       @ %bb.0: @ %entry
-; V4T-NEXT:    mov r2, #1065353216
+; V4T-NEXT:    mov r1, #1065353216
 ; V4T-NEXT:    cmn r0, #8388608
-; V4T-NEXT:    orr r2, r2, #-1073741824
-; V4T-NEXT:    mov r1, r0
-; V4T-NEXT:    orrlt r1, r2, #1
-; V4T-NEXT:    ldr r2, .LCPI11_0
+; V4T-NEXT:    orr r1, r1, #-1073741824
+; V4T-NEXT:    mov r2, r0
+; V4T-NEXT:    orrlt r2, r1, #1
+; V4T-NEXT:    ldr r1, .LCPI11_0
 ; V4T-NEXT:    cmp r0, #8388608
-; V4T-NEXT:    movge r1, r2
+; V4T-NEXT:    movlt r1, r2
 ; V4T-NEXT:    mov r0, r1
 ; V4T-NEXT:    bx lr
 ; V4T-NEXT:    .p2align 2

diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll
@@ -566,12 +566,12 @@ entry:
 define i32 @no_unsigned_sat_incorrect_compare(i32 %x, i32 %y) #0 {
 ; V4T-LABEL: no_unsigned_sat_incorrect_compare:
 ; V4T:       @ %bb.0: @ %entry
-; V4T-NEXT:    ldr r2, .LCPI14_0
 ; V4T-NEXT:    cmp r1, #0
-; V4T-NEXT:    mov r1, r0
-; V4T-NEXT:    movmi r1, #0
+; V4T-NEXT:    mov r2, r0
+; V4T-NEXT:    movmi r2, #0
+; V4T-NEXT:    ldr r1, .LCPI14_0
 ; V4T-NEXT:    cmp r0, #8388608
-; V4T-NEXT:    movge r1, r2
+; V4T-NEXT:    movlt r1, r2
 ; V4T-NEXT:    mov r0, r1
 ; V4T-NEXT:    bx lr
 ; V4T-NEXT:    .p2align 2
@@ -581,12 +581,12 @@ define i32 @no_unsigned_sat_incorrect_compare(i32 %x, i32 %y) #0 {
 ;
 ; V6-LABEL: no_unsigned_sat_incorrect_compare:
 ; V6:       @ %bb.0: @ %entry
-; V6-NEXT:    ldr r2, .LCPI14_0
 ; V6-NEXT:    cmp r1, #0
-; V6-NEXT:    mov r1, r0
-; V6-NEXT:    movmi r1, #0
+; V6-NEXT:    mov r2, r0
+; V6-NEXT:    movmi r2, #0
+; V6-NEXT:    ldr r1, .LCPI14_0
 ; V6-NEXT:    cmp r0, #8388608
-; V6-NEXT:    movge r1, r2
+; V6-NEXT:    movlt r1, r2
 ; V6-NEXT:    mov r0, r1
 ; V6-NEXT:    bx lr
 ; V6-NEXT:    .p2align 2

diff --git a/llvm/test/CodeGen/AVR/ctpop.ll b/llvm/test/CodeGen/AVR/ctpop.ll
@@ -21,7 +21,6 @@ declare i8 @llvm.ctpop.i8(i8)
 ; CHECK: add    {{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
 ; CHECK: mov    {{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
 ; CHECK: swap   {{.*}}[[SCRATCH]]
-; CHECK: add    {{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
-; CHECK: andi   {{.*}}[[SCRATCH]], 15
-; CHECK: mov    {{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
+; CHECK: add    {{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
+; CHECK: andi   {{.*}}[[RESULT]], 15
 ; CHECK: ret
diff --git a/llvm/test/CodeGen/AVR/hardware-mul.ll b/llvm/test/CodeGen/AVR/hardware-mul.ll
@@ -14,16 +14,15 @@ define i8 @mult8(i8 %a, i8 %b) {
 define i16 @mult16(i16 %a, i16 %b) {
 ; CHECK-LABEL: mult16:
 ; CHECK: muls r22, r25
-; CHECK: mov  r18, r0
+; CHECK: mov  r20, r0
 ; CHECK: mul  r22, r24
-; CHECK: mov  r19, r0
-; CHECK: mov  r20, r1
+; CHECK: mov  r21, r0
+; CHECK: mov  r18, r1
 ; CHECK: clr r1
-; CHECK: add  r20, r18
+; CHECK: add  r18, r20
 ; CHECK: muls r23, r24
 ; CHECK: clr r1
-; CHECK: mov  r22, r0
-; CHECK: add  r22, r20
+; CHECK: add  r18, r0
 ; :TODO: finish after reworking shift instructions
   %mul = mul nsw i16 %b, %a
   ret i16 %mul

diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-57.ll b/llvm/test/CodeGen/SystemZ/int-cmp-57.ll
@@ -82,7 +82,7 @@ define i32 @fun6(i32 %arg) {
 ; CHECK-LABEL: fun6:
 ; CHECK: afi
 ; CHECK-NEXT: chi
-; CHECK-NEXT: locrlh
+; CHECK-NEXT: locre
 bb:
   %tmp = add i32 %arg, -2147483648
   %tmp1 = icmp eq i32 %tmp, 0
@@ -94,7 +94,7 @@ define i32 @fun7(i32 %arg) {
 ; CHECK-LABEL: fun7:
 ; CHECK: afi
 ; CHECK-NEXT: chi
-; CHECK-NEXT: locrle
+; CHECK-NEXT: locrh
 bb:
   %tmp = add i32 %arg, -2147483648
   %tmp1 = icmp sgt i32 %tmp, 0

diff --git a/llvm/test/CodeGen/Thumb/pr35836_2.ll b/llvm/test/CodeGen/Thumb/pr35836_2.ll
@@ -36,20 +36,20 @@ entry:
   %mul = mul i128 %add18, %add
   ret i128 %mul
 }
-; CHECK: adds r5, r1, r7
+; CHECK: adds r5, r1, r6
 ; CHECK: mov r5, r4
-; CHECK: adcs r5, r6
+; CHECK: adcs r5, r7
 ; CHECK: ldr r5, [sp, #12]                   @ 4-byte Reload
 ; CHECK: adcs r2, r5
 ; CHECK: ldr r5, [sp, #16]                   @ 4-byte Reload
 ; CHECK: adcs r3, r5
-; CHECK: adds r5, r1, r7
-; CHECK: adcs r4, r6
+; CHECK: adds r6, r1, r6
+; CHECK: adcs r4, r7
 ; CHECK: ldr r1, [r0, #20]
 ; CHECK: str r1, [sp, #16]                   @ 4-byte Spill
-; CHECK: ldr r6, [r0, #28]
+; CHECK: ldr r5, [r0, #28]
 ; CHECK: ldr r1, [r0, #16]
 ; CHECK: ldr r7, [r0, #24]
 ; CHECK: adcs r7, r1
 ; CHECK: ldr r0, [sp, #16]                   @ 4-byte Reload
-; CHECK: adcs r6, r0
+; CHECK: adcs r5, r0
diff --git a/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll b/llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll
@@ -28,8 +28,9 @@ define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind {
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; CHECK-NEXT:    movl %ebp, %esi
 ; CHECK-NEXT:    calll callee@PLT
-; CHECK-NEXT:    leal (%eax,%ebx), %esi
-; CHECK-NEXT:    addl %ebp, %esi
+; CHECK-NEXT:    addl %eax, %ebx
+; CHECK-NEXT:    addl %ebp, %ebx
+; CHECK-NEXT:    movl %ebx, %esi
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    retl
   %b1 = call x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0)

diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll
@@ -132,14 +132,13 @@ define i64 @test_i64(i64 %a) nounwind {
 define i128 @test_i128(i128 %a) nounwind {
 ; X64-LABEL: test_i128:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rsi, %rdx
 ; X64-NEXT:    movq %rdi, %rax
-; X64-NEXT:    movq %rsi, %rcx
-; X64-NEXT:    sarq $63, %rcx
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    adcq %rcx, %rdx
-; X64-NEXT:    xorq %rcx, %rax
-; X64-NEXT:    xorq %rcx, %rdx
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    sarq $63, %rdx
+; X64-NEXT:    addq %rdx, %rax
+; X64-NEXT:    adcq %rdx, %rsi
+; X64-NEXT:    xorq %rdx, %rax
+; X64-NEXT:    xorq %rsi, %rdx
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_i128:

diff --git a/llvm/test/CodeGen/X86/add-cmov.ll b/llvm/test/CodeGen/X86/add-cmov.ll
@@ -136,9 +136,9 @@ define i64 @select_max32_2_i64(i64 %offset, i64 %x) {
 ; CHECK-LABEL: select_max32_2_i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    leaq 2(%rdi), %rax
-; CHECK-NEXT:    addq $2147483647, %rdi # imm = 0x7FFFFFFF
+; CHECK-NEXT:    2147483647(%rdi), %rcx
 ; CHECK-NEXT:    cmpq $41, %rsi
-; CHECK-NEXT:    cmovneq %rdi, %rax
+; CHECK-NEXT:    cmovneq %rcx, %rax
 ; CHECK-NEXT:    retq
   %b = icmp ne i64 %x, 41
   %s = select i1 %b, i64 2147483647, i64 2
@@ -209,10 +209,10 @@ define i32 @select_20_43_i32(i32 %offset, i64 %x) {
 ; CHECK-LABEL: select_20_43_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal 43(%rdi), %eax
-; CHECK-NEXT:    addl $20, %edi
+; CHECK-NEXT:    leal 43(%rdi), %ecx
+; CHECK-NEXT:    20(%rdi), %eax
 ; CHECK-NEXT:    cmpq $42, %rsi
-; CHECK-NEXT:    cmovgel %edi, %eax
+; CHECK-NEXT:    cmovll %ecx, %eax
 ; CHECK-NEXT:    retq
   %b = icmp sgt i64 %x, 41
   %s = select i1 %b, i32 20, i32 43
@@ -224,10 +224,10 @@ define i16 @select_n2_17_i16(i16 %offset, i1 %b) {
 ; CHECK-LABEL: select_n2_17_i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal 17(%rdi), %eax
-; CHECK-NEXT:    addl $65534, %edi # imm = 0xFFFE
+; CHECK-NEXT:    leal 17(%rdi), %ecx
+; CHECK-NEXT:    leal 65534(%rdi), %eax
 ; CHECK-NEXT:    testb $1, %sil
-; CHECK-NEXT:    cmovnel %edi, %eax
+; CHECK-NEXT:    cmovel %ecx, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %s = select i1 %b, i16 -2, i16 17