diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h index 8b1f67c416c22..9383ba6ce8671 100644 --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -121,9 +121,10 @@ class TailDuplicator { SmallVectorImpl &TDBBs, SmallVectorImpl &Copies, SmallVectorImpl *CandidatePtr); - void appendCopies(MachineBasicBlock *MBB, - SmallVectorImpl> &CopyInfos, - SmallVectorImpl &Copies); + void + appendCopies(MachineBasicBlock *MBB, MachineBasicBlock *SuccMBB, + SmallVectorImpl> &CopyInfos, + SmallVectorImpl &Copies); void removeDeadBlock( MachineBasicBlock *MBB, diff --git a/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/llvm/lib/CodeGen/PHIEliminationUtils.cpp index f4562f437788e..cf3391a1bc158 100644 --- a/llvm/lib/CodeGen/PHIEliminationUtils.cpp +++ b/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -32,7 +32,12 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, // instructions that are Calls with EHPad successors or INLINEASM_BR in a // block. bool EHPadSuccessor = SuccMBB->isEHPad(); - if (!EHPadSuccessor && !SuccMBB->isInlineAsmBrIndirectTarget()) + // Bypass fast path if the block itself contains INLINEASM_BR. + bool HasInlineAsmBr = llvm::any_of(*MBB, [](const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::INLINEASM_BR; + }); + + if (!EHPadSuccessor && !HasInlineAsmBr) return MBB->getFirstTerminator(); // Discover any defs in this basic block. diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 5d720fbbf1c61..b957115bb67e6 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TailDuplicator.h" +#include "PHIEliminationUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -651,14 +652,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (PreRegAlloc && MI.isCall()) return false; - // TailDuplicator::appendCopies will erroneously place COPYs after - // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same - // bug that was fixed in f7a53d82c090. - // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point - // for the COPY when replacing PHIs. - if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) - return false; - if (MI.isBundle()) InstrCount += MI.getBundleSize(); else if (!MI.isPHI() && !MI.isMetaInstruction()) @@ -926,7 +919,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, duplicateInstruction(&MI, TailBB, PredBB, LocalVRMap, UsedByPhi); } } - appendCopies(PredBB, CopyInfos, Copies); + appendCopies(PredBB, TailBB, CopyInfos, Copies); NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch @@ -994,7 +987,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); MI->eraseFromParent(); } - appendCopies(PrevBB, CopyInfos, Copies); + appendCopies(PrevBB, TailBB, CopyInfos, Copies); } else { TII->removeBranch(*PrevBB); // No PHIs to worry about, just splice the instructions over. @@ -1058,7 +1051,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, // from PredBB. processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); } - appendCopies(PredBB, CopyInfos, Copies); + appendCopies(PredBB, TailBB, CopyInfos, Copies); } return Changed; @@ -1066,12 +1059,16 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, /// At the end of the block \p MBB generate COPY instructions between registers /// described by \p CopyInfos. Append resulting instructions to \p Copies. -void TailDuplicator::appendCopies(MachineBasicBlock *MBB, - SmallVectorImpl> &CopyInfos, - SmallVectorImpl &Copies) { - MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); +void TailDuplicator::appendCopies( + MachineBasicBlock *MBB, MachineBasicBlock *SuccMBB, + SmallVectorImpl> &CopyInfos, + SmallVectorImpl &Copies) { const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); + for (auto &CI : CopyInfos) { + MachineBasicBlock::iterator Loc = + findPHICopyInsertPoint(MBB, SuccMBB, CI.second.Reg); + auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) .addReg(CI.second.Reg, 0, CI.second.SubReg); Copies.push_back(C); diff --git a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll index 7ce983869ce7d..481c884428850 100644 --- a/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll +++ b/llvm/test/CodeGen/X86/tail-dup-asm-goto.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=x86_64-linux -stop-after=early-tailduplication \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; Ensure that we don't duplicate a block with an "INLINEASM_BR" instruction +; Ensure that we don't insert copy after "INLINEASM_BR" instruction ; during code gen. declare dso_local void @foo() @@ -20,30 +20,31 @@ define ptr @test1(ptr %arg1, ptr %arg2) { ; CHECK-NEXT: JMP_1 %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.bb100: - ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: successors: %bb.5(0x80000000), %bb.4(0x00000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: MOV64mi32 [[COPY1]], 1, $noreg, 0, $noreg, 0 :: (store (s64) into %ir.arg1) - ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY [[MOV64rm]] + ; CHECK-NEXT: INLINEASM_BR &"#$0 $1 $2", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42, 13 /* imm */, 0, 13 /* imm */, %bb.4, 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags + ; CHECK-NEXT: JMP_1 %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.bb106: - ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: successors: %bb.5(0x80000000), %bb.4(0x00000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3.bb110: - ; CHECK-NEXT: successors: %bb.5(0x80000000), %bb.4(0x00000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY]], %bb.2, [[MOV64rm]], %bb.1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY [[COPY]] ; CHECK-NEXT: INLINEASM_BR &"#$0 $1 $2", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42, 13 /* imm */, 0, 13 /* imm */, %bb.4, 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags ; CHECK-NEXT: JMP_1 %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.bb17.i.i.i (inlineasm-br-indirect-target): ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr64 = PHI [[COPY2]], %bb.1, [[COPY3]], %bb.2 + ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5.kmem_cache_has_cpu_partial.exit: - ; CHECK-NEXT: $rax = COPY [[PHI]] + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr64 = PHI [[PHI]], %bb.4, [[COPY2]], %bb.1, [[COPY3]], %bb.2 + ; CHECK-NEXT: $rax = COPY [[PHI1]] ; CHECK-NEXT: RET 0, $rax bb: %i28.i = load ptr, ptr %arg1, align 8