Skip to content

Commit

Permalink
AMDGPU: Insert wait at start of callee functions
Browse files Browse the repository at this point in the history
llvm-svn: 300000
  • Loading branch information
arsenm committed Apr 11, 2017
1 parent efa9f4b commit 9ac4002
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 1 deletion.
14 changes: 14 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -690,5 +690,19 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr *I : RemoveMI)
I->eraseFromParent();

if (!MFI->isEntryFunction()) {
// Wait for any outstanding memory operations that the input registers may
// depend on. We can't track them and it's better to to the wait after the
// costly call sequence.

// TODO: Could insert earlier and schedule more liberally with operations
// that only use caller preserved registers.
MachineBasicBlock &EntryBB = MF.front();
BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
.addImm(0);

Changes = true;
}

return Changes;
}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/hsa-func.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

; ELF: Symbol {
; ELF: Name: simple
; ELF: Size: 288
; ELF: Size: 292
; ELF: Type: Function (0x2)
; ELF: }

Expand Down
25 changes: 25 additions & 0 deletions llvm/test/CodeGen/AMDGPU/insert-waits-callee.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
--- |
define float @entry_callee_wait(float %arg) #0 {
ret float %arg
}

attributes #0 = { nounwind }
...
---
# CHECK-LABEL: name: entry_callee_wait{{$}}
# CHECK: bb.0:
# CHECK-NEXT: S_WAITCNT 0{{$}}
# CHECK-NEXT: V_ADD_F32
# CHECK-NEXT: S_SETPC_B64
liveins:
- { reg: '%sgpr0_sgpr1' }
- { reg: '%vgpr0' }

name: entry_callee_wait
body: |
bb.0:
%vgpr0 = V_ADD_F32_e32 %vgpr0, %vgpr0, implicit %exec
S_SETPC_B64 killed %sgpr0_sgpr1
...

0 comments on commit 9ac4002

Please sign in to comment.