Skip to content

Commit

Permalink
[Statepoint] Enable cross block relocates w/vreg lowering
Browse files Browse the repository at this point in the history
This change is mechanical, it just removes the restriction and updates tests.  The key building blocks were submitted in 31342eb and 8fe2abc.

Note that this (and preceeding changes) entirely subsumes D83965.  I did includes a couple of it's tests.

From the codegen changes, an interesting observation: this doesn't actual reduce spilling, it just let's the register allocator do it's job.  That results in a slightly different overall result which has both pros and cons over the eager spill lowering.  (i.e. We'll have some perf tuning to do once this is stable.)
  • Loading branch information
preames committed Jul 29, 2020
1 parent 1397864 commit 755f91f
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 30 deletions.
10 changes: 0 additions & 10 deletions llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
Expand Up @@ -545,16 +545,6 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
const unsigned MaxTiedRegs = 15; // Max number of tied regs MI can have.
unsigned MaxVRegPtrs =
std::min(MaxTiedRegs, MaxRegistersForGCPointers.getValue());
// Use old spill scheme for cross-block relocates.
if (SI.StatepointInstr) {
const BasicBlock *BB = SI.StatepointInstr->getParent();
bool NonLocalReloc =
llvm::any_of(SI.GCRelocates, [BB](const GCRelocateInst *R) {
return R->getParent() != BB;
});
if (NonLocalReloc)
MaxVRegPtrs = 0;
}

LLVM_DEBUG(dbgs() << "Desiding how to lower GC Pointers:\n");
unsigned CurNumVRegs = 0;
Expand Down
20 changes: 9 additions & 11 deletions llvm/test/CodeGen/X86/statepoint-vreg-details.ll
Expand Up @@ -215,27 +215,25 @@ define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint
; CHECK-VREG: bb.0.entry:
; CHECK-VREG: %1:gr32 = COPY $esi
; CHECK-VREG-NEXT: %0:gr64 = COPY $rdi
; CHECK-VREG-NEXT: %3:gr8 = COPY %1.sub_8bit
; CHECK-VREG-NEXT: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0)
; CHECK-VREG-NEXT: %4:gr8 = COPY %1.sub_8bit
; CHECK-VREG-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-VREG-NEXT: STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.0)
; CHECK-VREG-NEXT: %2:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, %0, %0(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al
; CHECK-VREG-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-VREG-NEXT: %4:gr8 = COPY $al
; CHECK-VREG-NEXT: %2:gr8 = COPY %4
; CHECK-VREG-NEXT: TEST8ri killed %3, 1, implicit-def $eflags
; CHECK-VREG-NEXT: %5:gr8 = COPY $al
; CHECK-VREG-NEXT: %3:gr8 = COPY %5
; CHECK-VREG-NEXT: TEST8ri killed %4, 1, implicit-def $eflags
; CHECK-VREG-NEXT: JCC_1 %bb.2, 4, implicit $eflags
; CHECK-VREG-NEXT: JMP_1 %bb.1
; CHECK-VREG: bb.1.left:
; CHECK-VREG-NEXT: %6:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)
; CHECK-VREG-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-VREG-NEXT: $rdi = COPY %6
; CHECK-VREG-NEXT: $rdi = COPY %2
; CHECK-VREG-NEXT: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
; CHECK-VREG-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-VREG-NEXT: $al = COPY %2
; CHECK-VREG-NEXT: $al = COPY %3
; CHECK-VREG-NEXT: RET 0, $al
; CHECK-VREG: bb.2.right:
; CHECK-VREG-NEXT: %5:gr8 = MOV8ri 1
; CHECK-VREG-NEXT: $al = COPY %5
; CHECK-VREG-NEXT: %6:gr8 = MOV8ri 1
; CHECK-VREG-NEXT: $al = COPY %6
; CHECK-VREG-NEXT: RET 0, $al

entry:
Expand Down
179 changes: 170 additions & 9 deletions llvm/test/CodeGen/X86/statepoint-vreg.ll
Expand Up @@ -6,10 +6,12 @@ target triple = "x86_64-pc-linux-gnu"

declare i1 @return_i1()
declare void @func()
declare void @"some_call"(i64 addrspace(1)*)
declare void @consume(i32 addrspace(1)*)
declare void @consume2(i32 addrspace(1)*, i32 addrspace(1)*)
declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*)
declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*)
declare i32 @"personality_function"()

; test most simple relocate
define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
Expand Down Expand Up @@ -231,30 +233,31 @@ define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset %rbx, -24
; CHECK-NEXT: .cfi_offset %rbx, -32
; CHECK-NEXT: .cfi_offset %r14, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movl %esi, %ebp
; CHECK-NEXT: movq %rdi, (%rsp)
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: callq return_i1
; CHECK-NEXT: .Ltmp7:
; CHECK-NEXT: testb $1, %bpl
; CHECK-NEXT: je .LBB7_2
; CHECK-NEXT: # %bb.1: # %left
; CHECK-NEXT: movl %eax, %ebx
; CHECK-NEXT: movq (%rsp), %rdi
; CHECK-NEXT: movl %eax, %r14d
; CHECK-NEXT: movq %rbx, %rdi
; CHECK-NEXT: callq consume
; CHECK-NEXT: movl %ebx, %eax
; CHECK-NEXT: movl %r14d, %eax
; CHECK-NEXT: jmp .LBB7_3
; CHECK-NEXT: .LBB7_2: # %right
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: .LBB7_3: # %right
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
Expand Down Expand Up @@ -374,9 +377,167 @@ entry:
ret void
}

define i64 addrspace(1)* @test_basic_invoke(i64 addrspace(1)* %obj,
; CHECK-LABEL: test_basic_invoke:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset %rbx, -24
; CHECK-NEXT: .cfi_offset %r14, -16
; CHECK-NEXT: movq %rsi, %r14
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: .Ltmp12:
; CHECK-NEXT: callq some_call
; CHECK-NEXT: .Ltmp15:
; CHECK-NEXT: .Ltmp13:
; CHECK-NEXT: # %bb.1: # %normal_return
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: .LBB11_2: # %normal_return
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB11_3: # %exceptional_return
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .Ltmp14:
; CHECK-NEXT: movq %r14, %rax
; CHECK-NEXT: jmp .LBB11_2
i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @"personality_function" {
entry:
%0 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0) ["gc-live" (i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)]
to label %invoke_safepoint_normal_dest unwind label %exceptional_return

invoke_safepoint_normal_dest:
%obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 0, i32 0)
%obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 1, i32 1)
br label %normal_return

normal_return:
ret i64 addrspace(1)* %obj.relocated

exceptional_return:
%landing_pad = landingpad token
cleanup
%obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 0, i32 0)
%obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 1, i32 1)
ret i64 addrspace(1)* %obj1.relocated1
}

define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) gc "statepoint-example" personality i32 ()* @"personality_function" {
; CHECK-LABEL: test_invoke_same_val:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset %rbx, -40
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rdx, %rbx
; CHECK-NEXT: movq %rsi, %rbp
; CHECK-NEXT: movl %edi, %r14d
; CHECK-NEXT: testb $1, %r14b
; CHECK-NEXT: je .LBB12_2
; CHECK-NEXT: # %bb.1: # %left
; CHECK-NEXT: .Ltmp19:
; CHECK-NEXT: movq %rbp, %rdi
; CHECK-NEXT: callq some_call
; CHECK-NEXT: .Ltmp22:
; CHECK-NEXT: .Ltmp20:
; CHECK-NEXT: jmp .LBB12_4
; CHECK-NEXT: .LBB12_2: # %right
; CHECK-NEXT: movq %rcx, %r15
; CHECK-NEXT: .Ltmp16:
; CHECK-NEXT: movq %rbp, %rdi
; CHECK-NEXT: callq some_call
; CHECK-NEXT: .Ltmp23:
; CHECK-NEXT: .Ltmp17:
; CHECK-NEXT: # %bb.3: # %right.relocs
; CHECK-NEXT: movq %r15, %rbp
; CHECK-NEXT: .LBB12_4: # %normal_return
; CHECK-NEXT: testb $1, %r14b
; CHECK-NEXT: cmoveq %rbx, %rbp
; CHECK-NEXT: .LBB12_6: # %exceptional_return.left
; CHECK-NEXT: movq %rbp, %rax
; CHECK-NEXT: .LBB12_7: # %exceptional_return.left
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 40
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: popq %r14
; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: popq %r15
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB12_8: # %exceptional_return.right
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .Ltmp18:
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: jmp .LBB12_7
; CHECK-NEXT: .LBB12_5: # %exceptional_return.left
; CHECK-NEXT: .Ltmp21:
; CHECK-NEXT: jmp .LBB12_6
entry:
br i1 %cond, label %left, label %right

left:
%sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 0) ["gc-live"(i64 addrspace(1)* %val1, i64 addrspace(1)* %val2)]
to label %left.relocs unwind label %exceptional_return.left

left.relocs:
%val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 0, i32 0)
%val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 1, i32 1)
br label %normal_return

right:
%sp2 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 0) ["gc-live"(i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)]
to label %right.relocs unwind label %exceptional_return.right

right.relocs:
%val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 0, i32 0)
%val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 1, i32 1)
br label %normal_return

normal_return:
%a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs]
%a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs]
%ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2
ret i64 addrspace(1)* %ret

exceptional_return.left:
%landing_pad = landingpad token
cleanup
%val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 0, i32 0)
ret i64 addrspace(1)* %val.relocated2

exceptional_return.right:
%landing_pad1 = landingpad token
cleanup
%val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad1, i32 0, i32 0)
ret i64 addrspace(1)* %val.relocated3
}

declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32)
declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
declare i1 @llvm.experimental.gc.result.i1(token)

0 comments on commit 755f91f

Please sign in to comment.