diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 8301489df9a4..40033db4e26d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -973,10 +973,13 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { unsigned Index = *DerivedPtrLocation; SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); - // Note: We know all of these reloads are independent, but don't bother to - // exploit that chain wise. DAGCombine will happily do so as needed, so - // doing it here would be a small compile time win at most. - SDValue Chain = getRoot(); + // All the reloads are independent and are reading memory only modified by + // statepoints (i.e. no other aliasing stores); informing SelectionDAG of + // this this let's CSE kick in for free and allows reordering of instructions + // if possible. The lowering for statepoint sets the root, so this is + // ordering all reloads with the either a) the statepoint node itself, or b) + // the entry of the current block for an invoke statepoint. + const SDValue Chain = DAG.getRoot(); // != Builder.getRoot() auto &MF = DAG.getMachineFunction(); auto &MFI = MF.getFrameInfo(); @@ -991,8 +994,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, SpillSlot, LoadMMO); - - DAG.setRoot(SpillLoad.getValue(1)); + PendingLoads.push_back(SpillLoad.getValue(1)); assert(SpillLoad.getNode()); setValue(&Relocate, SpillLoad); diff --git a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll index ccc0d61b3c67..f61604b169f2 100644 --- a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll +++ b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll @@ -44,19 +44,16 @@ next: define i1 @test2(i32 addrspace(1)* %arg) gc "statepoint-example" { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, (%rsp) ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp2: -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp3: -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: orq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: cmpq $0, (%rsp) ; CHECK-NEXT: sete %al -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: