diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp index 3a64d519d105d..4c72fa2359750 100644 --- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp +++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp @@ -90,7 +90,7 @@ bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getSubtarget().getRegisterInfo(); bool Changed = false; - const unsigned MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF | + const uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF | SIInstrFlags::SMRD | SIInstrFlags::DS | SIInstrFlags::FLAT | SIInstrFlags::MIMG; @@ -101,10 +101,11 @@ bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) { for (auto I = B; I != E; I = Next) { Next = std::next(I); - if (I->isBundled() || !I->mayLoadOrStore() || + const uint64_t IMemFlags = I->getDesc().TSFlags & MemFlags; + + if (IMemFlags == 0 || I->isBundled() || !I->mayLoadOrStore() || B->mayLoad() != I->mayLoad() || B->mayStore() != I->mayStore() || - (B->getDesc().TSFlags & MemFlags) != - (I->getDesc().TSFlags & MemFlags) || + ((B->getDesc().TSFlags & MemFlags) != IMemFlags) || isDependentLoad(*I)) { if (B != I) { diff --git a/llvm/test/CodeGen/AMDGPU/no-bundle-asm.ll b/llvm/test/CodeGen/AMDGPU/no-bundle-asm.ll new file mode 100644 index 0000000000000..8e68c462960eb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/no-bundle-asm.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +; SIPostRABundler used to incorrectly try to form a bundle containing +; inline asm. + +define amdgpu_kernel void @no_bundle_asm_sideeffect() { +; CHECK-LABEL: no_bundle_asm_sideeffect: +; CHECK: ; %bb.0: +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b64 s[0:1], 42 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm + call void asm sideeffect "s_mov_b32 s0, $0", "i,~{s0}"(i32 42) + call void asm sideeffect "s_mov_b64 s[0:1], $0", "i,~{s[0:1]}"(i64 42) + ret void +} + +define amdgpu_kernel void @no_bundle_asm() { +; CHECK-LABEL: no_bundle_asm: +; CHECK: ; %bb.0: +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s0, 42 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b64 s[0:1], 42 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_endpgm + call void asm "s_mov_b32 s0, $0", "i,~{s0}"(i32 42) + call void asm "s_mov_b64 s[0:1], $0", "i,~{s[0:1]}"(i64 42) + ret void +}