Skip to content

Commit

Permalink
[TwoAddressInstructionPass] Improve the SrcRegMap and DstRegMap compu…
Browse files Browse the repository at this point in the history
…tation

This patch contains following enhancements to SrcRegMap and DstRegMap:

  1 In findOnlyInterestingUse not only check if the Reg is two address usage,
    but also check after commutation can it be two address usage.

  2 If a physical register is clobbered, remove SrcRegMap entries that are
    mapped to it.

  3 In processTiedPairs, when create a new COPY instruction, add a SrcRegMap
    entry only when the COPY instruction is coalescable. (The COPY src is
    killed)

With these enhancements isProfitableToCommute can do better commute decision,
and finally more register copies are removed.

Differential Revision: https://reviews.llvm.org/D108731
  • Loading branch information
weiguozhi committed Oct 11, 2021
1 parent c3dcf39 commit 6599961
Show file tree
Hide file tree
Showing 132 changed files with 2,894 additions and 3,163 deletions.
100 changes: 93 additions & 7 deletions llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
Expand Up @@ -118,6 +118,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// registers. e.g. r1 = move v1024.
DenseMap<Register, Register> DstRegMap;

void removeClobberedSrcRegMap(MachineInstr *MI);

bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);

bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef);
Expand Down Expand Up @@ -380,7 +382,8 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
if (!MRI->hasOneNonDBGUse(Reg))
// None or more than one use.
return nullptr;
MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg);
MachineInstr &UseMI = *UseOp.getParent();
if (UseMI.getParent() != MBB)
return nullptr;
Register SrcReg;
Expand All @@ -394,6 +397,18 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
IsDstPhys = DstReg.isPhysical();
return &UseMI;
}
if (UseMI.isCommutable()) {
unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
unsigned Src2 = UseMI.getOperandNo(&UseOp);
if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
MachineOperand &MO = UseMI.getOperand(Src1);
if (MO.isReg() && MO.isUse() &&
isTwoAddrUse(UseMI, MO.getReg(), DstReg)) {
IsDstPhys = DstReg.isPhysical();
return &UseMI;
}
}
}
return nullptr;
}

Expand Down Expand Up @@ -422,6 +437,76 @@ static bool regsAreCompatible(Register RegA, Register RegB,
return TRI->regsOverlap(RegA, RegB);
}

/// From RegMap remove entries mapped to a physical register which overlaps MO.
static void removeMapRegEntry(const MachineOperand &MO,
DenseMap<Register, Register> &RegMap,
const TargetRegisterInfo *TRI) {
assert(
(MO.isReg() || MO.isRegMask()) &&
"removeMapRegEntry must be called with a register or regmask operand.");

SmallVector<Register, 2> Srcs;
for (auto SI : RegMap) {
Register ToReg = SI.second;
if (ToReg.isVirtual())
continue;

if (MO.isReg()) {
Register Reg = MO.getReg();
if (TRI->regsOverlap(ToReg, Reg))
Srcs.push_back(SI.first);
} else if (MO.clobbersPhysReg(ToReg))
Srcs.push_back(SI.first);
}

for (auto SrcReg : Srcs)
RegMap.erase(SrcReg);
}

/// If a physical register is clobbered, old entries mapped to it should be
/// deleted. For example
///
/// %2:gr64 = COPY killed $rdx
/// MUL64r %3:gr64, implicit-def $rax, implicit-def $rdx
///
/// After the MUL instruction, $rdx contains different value than in the COPY
/// instruction. So %2 should not map to $rdx after MUL.
void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
if (MI->isCopy()) {
// If a virtual register is copied to its mapped physical register, it
// doesn't change the potential coalescing between them, so we don't remove
// entries mapped to the physical register. For example
//
// %100 = COPY $r8
// ...
// $r8 = COPY %100
//
// The first copy constructs SrcRegMap[%100] = $r8, the second copy doesn't
// destroy the content of $r8, and should not impact SrcRegMap.
Register Dst = MI->getOperand(0).getReg();
if (!Dst || Dst.isVirtual())
return;

Register Src = MI->getOperand(1).getReg();
if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap), TRI))
return;
}

for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isRegMask()) {
removeMapRegEntry(MO, SrcRegMap, TRI);
continue;
}
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
if (!Reg || Reg.isVirtual())
continue;
removeMapRegEntry(MO, SrcRegMap, TRI);
}
}

// Returns true if Reg is equal or aliased to at least one register in Set.
static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,
const TargetRegisterInfo *TRI) {
Expand Down Expand Up @@ -670,9 +755,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
VirtRegPairs.push_back(NewReg);
break;
}
bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
if (!isNew)
assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
SrcRegMap[NewReg] = Reg;
VirtRegPairs.push_back(NewReg);
Reg = NewReg;
}
Expand Down Expand Up @@ -1479,9 +1562,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// by SubRegB is compatible with RegA with no subregister. So regardless of
// whether the dest oper writes a subreg, the source oper should not.
MO.setSubReg(0);

// Propagate SrcRegMap.
SrcRegMap[RegA] = RegB;
}

if (AllUsesCopied) {
Expand Down Expand Up @@ -1513,6 +1593,9 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
LV->addVirtualRegisterKilled(RegB, *PrevMI);
}

if (RemovedKillFlag && ReplacedAllUntiedUses)
SrcRegMap[LastCopiedReg] = RegB;

// Update LiveIntervals.
if (LIS) {
LiveInterval &LI = LIS->getInterval(RegB);
Expand Down Expand Up @@ -1599,6 +1682,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
if (!collectTiedOperands(&*mi, TiedOperands)) {
removeClobberedSrcRegMap(&*mi);
mi = nmi;
continue;
}
Expand All @@ -1623,6 +1707,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// The tied operands have been eliminated or shifted further down
// the block to ease elimination. Continue processing with 'nmi'.
TiedOperands.clear();
removeClobberedSrcRegMap(&*mi);
mi = nmi;
continue;
}
Expand Down Expand Up @@ -1652,6 +1737,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// Clear TiedOperands here instead of at the top of the loop
// since most instructions do not have tied operands.
TiedOperands.clear();
removeClobberedSrcRegMap(&*mi);
mi = nmi;
}
}
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/ARM/ssat.ll
Expand Up @@ -394,14 +394,14 @@ entry:
define i32 @no_sat_incorrect_constant(i32 %x) #0 {
; V4T-LABEL: no_sat_incorrect_constant:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: mov r2, #1065353216
; V4T-NEXT: mov r1, #1065353216
; V4T-NEXT: cmn r0, #8388608
; V4T-NEXT: orr r2, r2, #-1073741824
; V4T-NEXT: mov r1, r0
; V4T-NEXT: orrlt r1, r2, #1
; V4T-NEXT: ldr r2, .LCPI11_0
; V4T-NEXT: orr r1, r1, #-1073741824
; V4T-NEXT: mov r2, r0
; V4T-NEXT: orrlt r2, r1, #1
; V4T-NEXT: ldr r1, .LCPI11_0
; V4T-NEXT: cmp r0, #8388608
; V4T-NEXT: movge r1, r2
; V4T-NEXT: movlt r1, r2
; V4T-NEXT: mov r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/ARM/usat.ll
Expand Up @@ -566,12 +566,12 @@ entry:
define i32 @no_unsigned_sat_incorrect_compare(i32 %x, i32 %y) #0 {
; V4T-LABEL: no_unsigned_sat_incorrect_compare:
; V4T: @ %bb.0: @ %entry
; V4T-NEXT: ldr r2, .LCPI14_0
; V4T-NEXT: cmp r1, #0
; V4T-NEXT: mov r1, r0
; V4T-NEXT: movmi r1, #0
; V4T-NEXT: mov r2, r0
; V4T-NEXT: movmi r2, #0
; V4T-NEXT: ldr r1, .LCPI14_0
; V4T-NEXT: cmp r0, #8388608
; V4T-NEXT: movge r1, r2
; V4T-NEXT: movlt r1, r2
; V4T-NEXT: mov r0, r1
; V4T-NEXT: bx lr
; V4T-NEXT: .p2align 2
Expand All @@ -581,12 +581,12 @@ define i32 @no_unsigned_sat_incorrect_compare(i32 %x, i32 %y) #0 {
;
; V6-LABEL: no_unsigned_sat_incorrect_compare:
; V6: @ %bb.0: @ %entry
; V6-NEXT: ldr r2, .LCPI14_0
; V6-NEXT: cmp r1, #0
; V6-NEXT: mov r1, r0
; V6-NEXT: movmi r1, #0
; V6-NEXT: mov r2, r0
; V6-NEXT: movmi r2, #0
; V6-NEXT: ldr r1, .LCPI14_0
; V6-NEXT: cmp r0, #8388608
; V6-NEXT: movge r1, r2
; V6-NEXT: movlt r1, r2
; V6-NEXT: mov r0, r1
; V6-NEXT: bx lr
; V6-NEXT: .p2align 2
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AVR/ctpop.ll
Expand Up @@ -21,7 +21,6 @@ declare i8 @llvm.ctpop.i8(i8)
; CHECK: add {{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
; CHECK: mov {{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
; CHECK: swap {{.*}}[[SCRATCH]]
; CHECK: add {{.*}}[[SCRATCH]], {{.*}}[[RESULT]]
; CHECK: andi {{.*}}[[SCRATCH]], 15
; CHECK: mov {{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
; CHECK: add {{.*}}[[RESULT]], {{.*}}[[SCRATCH]]
; CHECK: andi {{.*}}[[RESULT]], 15
; CHECK: ret
11 changes: 5 additions & 6 deletions llvm/test/CodeGen/AVR/hardware-mul.ll
Expand Up @@ -14,16 +14,15 @@ define i8 @mult8(i8 %a, i8 %b) {
define i16 @mult16(i16 %a, i16 %b) {
; CHECK-LABEL: mult16:
; CHECK: muls r22, r25
; CHECK: mov r18, r0
; CHECK: mov r20, r0
; CHECK: mul r22, r24
; CHECK: mov r19, r0
; CHECK: mov r20, r1
; CHECK: mov r21, r0
; CHECK: mov r18, r1
; CHECK: clr r1
; CHECK: add r20, r18
; CHECK: add r18, r20
; CHECK: muls r23, r24
; CHECK: clr r1
; CHECK: mov r22, r0
; CHECK: add r22, r20
; CHECK: add r18, r0
; :TODO: finish after reworking shift instructions
%mul = mul nsw i16 %b, %a
ret i16 %mul
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/SystemZ/int-cmp-57.ll
Expand Up @@ -82,7 +82,7 @@ define i32 @fun6(i32 %arg) {
; CHECK-LABEL: fun6:
; CHECK: afi
; CHECK-NEXT: chi
; CHECK-NEXT: locrlh
; CHECK-NEXT: locre
bb:
%tmp = add i32 %arg, -2147483648
%tmp1 = icmp eq i32 %tmp, 0
Expand All @@ -94,7 +94,7 @@ define i32 @fun7(i32 %arg) {
; CHECK-LABEL: fun7:
; CHECK: afi
; CHECK-NEXT: chi
; CHECK-NEXT: locrle
; CHECK-NEXT: locrh
bb:
%tmp = add i32 %arg, -2147483648
%tmp1 = icmp sgt i32 %tmp, 0
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/Thumb/pr35836_2.ll
Expand Up @@ -36,20 +36,20 @@ entry:
%mul = mul i128 %add18, %add
ret i128 %mul
}
; CHECK: adds r5, r1, r7
; CHECK: adds r5, r1, r6
; CHECK: mov r5, r4
; CHECK: adcs r5, r6
; CHECK: adcs r5, r7
; CHECK: ldr r5, [sp, #12] @ 4-byte Reload
; CHECK: adcs r2, r5
; CHECK: ldr r5, [sp, #16] @ 4-byte Reload
; CHECK: adcs r3, r5
; CHECK: adds r5, r1, r7
; CHECK: adcs r4, r6
; CHECK: adds r6, r1, r6
; CHECK: adcs r4, r7
; CHECK: ldr r1, [r0, #20]
; CHECK: str r1, [sp, #16] @ 4-byte Spill
; CHECK: ldr r6, [r0, #28]
; CHECK: ldr r5, [r0, #28]
; CHECK: ldr r1, [r0, #16]
; CHECK: ldr r7, [r0, #24]
; CHECK: adcs r7, r1
; CHECK: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK: adcs r6, r0
; CHECK: adcs r5, r0
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/X86/DynamicCalleeSavedRegisters.ll
Expand Up @@ -28,8 +28,9 @@ define cc 11 i32 @caller(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0) nounwind {
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; CHECK-NEXT: movl %ebp, %esi
; CHECK-NEXT: calll callee@PLT
; CHECK-NEXT: leal (%eax,%ebx), %esi
; CHECK-NEXT: addl %ebp, %esi
; CHECK-NEXT: addl %eax, %ebx
; CHECK-NEXT: addl %ebp, %ebx
; CHECK-NEXT: movl %ebx, %esi
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
%b1 = call x86_regcallcc i32 @callee(i32 %a0, i32 %b0, i32 %c0, i32 %d0, i32 %e0)
Expand Down
13 changes: 6 additions & 7 deletions llvm/test/CodeGen/X86/abs.ll
Expand Up @@ -132,14 +132,13 @@ define i64 @test_i64(i64 %a) nounwind {
define i128 @test_i128(i128 %a) nounwind {
; X64-LABEL: test_i128:
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq %rsi, %rcx
; X64-NEXT: sarq $63, %rcx
; X64-NEXT: addq %rcx, %rax
; X64-NEXT: adcq %rcx, %rdx
; X64-NEXT: xorq %rcx, %rax
; X64-NEXT: xorq %rcx, %rdx
; X64-NEXT: movq %rsi, %rdx
; X64-NEXT: sarq $63, %rdx
; X64-NEXT: addq %rdx, %rax
; X64-NEXT: adcq %rdx, %rsi
; X64-NEXT: xorq %rdx, %rax
; X64-NEXT: xorq %rsi, %rdx
; X64-NEXT: retq
;
; X86-LABEL: test_i128:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/add-cmov.ll
Expand Up @@ -136,9 +136,9 @@ define i64 @select_max32_2_i64(i64 %offset, i64 %x) {
; CHECK-LABEL: select_max32_2_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: leaq 2(%rdi), %rax
; CHECK-NEXT: addq $2147483647, %rdi # imm = 0x7FFFFFFF
; CHECK-NEXT: 2147483647(%rdi), %rcx
; CHECK-NEXT: cmpq $41, %rsi
; CHECK-NEXT: cmovneq %rdi, %rax
; CHECK-NEXT: cmovneq %rcx, %rax
; CHECK-NEXT: retq
%b = icmp ne i64 %x, 41
%s = select i1 %b, i64 2147483647, i64 2
Expand Down Expand Up @@ -209,10 +209,10 @@ define i32 @select_20_43_i32(i32 %offset, i64 %x) {
; CHECK-LABEL: select_20_43_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: leal 43(%rdi), %eax
; CHECK-NEXT: addl $20, %edi
; CHECK-NEXT: leal 43(%rdi), %ecx
; CHECK-NEXT: 20(%rdi), %eax
; CHECK-NEXT: cmpq $42, %rsi
; CHECK-NEXT: cmovgel %edi, %eax
; CHECK-NEXT: cmovll %ecx, %eax
; CHECK-NEXT: retq
%b = icmp sgt i64 %x, 41
%s = select i1 %b, i32 20, i32 43
Expand All @@ -224,10 +224,10 @@ define i16 @select_n2_17_i16(i16 %offset, i1 %b) {
; CHECK-LABEL: select_n2_17_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: leal 17(%rdi), %eax
; CHECK-NEXT: addl $65534, %edi # imm = 0xFFFE
; CHECK-NEXT: leal 17(%rdi), %ecx
; CHECK-NEXT: leal 65534(%rdi), %eax
; CHECK-NEXT: testb $1, %sil
; CHECK-NEXT: cmovnel %edi, %eax
; CHECK-NEXT: cmovel %ecx, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%s = select i1 %b, i16 -2, i16 17
Expand Down

0 comments on commit 6599961

Please sign in to comment.