640 changes: 640 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp

Large diffs are not rendered by default.

138 changes: 138 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file includes support code use by SelectionDAGBuilder when lowering a
// statepoint sequence in SelectionDAG IR.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H

#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include <vector>

namespace llvm {
class SelectionDAGBuilder;

/// This class tracks both per-statepoint and per-selectiondag information.
/// For each statepoint it tracks locations of it's gc valuess (incoming and
/// relocated) and list of gcreloc calls scheduled for visiting (this is
/// used for a debug mode consistency check only). The spill slot tracking
/// works in concert with information in FunctionLoweringInfo.
class StatepointLoweringState {
public:
StatepointLoweringState() : NextSlotToAllocate(0) {
}

/// Reset all state tracking for a newly encountered safepoint. Also
/// performs some consistency checking.
void startNewStatepoint(SelectionDAGBuilder &Builder);

/// Clear the memory usage of this object. This is called from
/// SelectionDAGBuilder::clear. We require this is never called in the
/// midst of processing a statepoint sequence.
void clear();

/// Returns the spill location of a value incoming to the current
/// statepoint. Will return SDValue() if this value hasn't been
/// spilled. Otherwise, the value has already been spilled and no
/// further action is required by the caller.
SDValue getLocation(SDValue val) {
if (!Locations.count(val))
return SDValue();
return Locations[val];
}
void setLocation(SDValue val, SDValue Location) {
assert(!Locations.count(val) &&
"Trying to allocate already allocated location");
Locations[val] = Location;
}

/// Returns the relocated value for a given input pointer. Will
/// return SDValue() if this value hasn't yet been reloaded from
/// it's stack slot after the statepoint. Otherwise, the value
/// has already been reloaded and the SDValue of that reload will
/// be returned. Note that VMState values are spilled but not
/// reloaded (since they don't change at the safepoint unless
/// also listed in the GC pointer section) and will thus never
/// be in this map
SDValue getRelocLocation(SDValue val) {
if (!RelocLocations.count(val))
return SDValue();
return RelocLocations[val];
}
void setRelocLocation(SDValue val, SDValue Location) {
assert(!RelocLocations.count(val) &&
"Trying to allocate already allocated location");
RelocLocations[val] = Location;
}

/// Record the fact that we expect to encounter a given gc_relocate
/// before the next statepoint. If we don't see it, we'll report
/// an assertion.
void scheduleRelocCall(const CallInst &RelocCall) {
PendingGCRelocateCalls.push_back(&RelocCall);
}
/// Remove this gc_relocate from the list we're expecting to see
/// before the next statepoint. If we weren't expecting to see
/// it, we'll report an assertion.
void relocCallVisited(const CallInst &RelocCall) {
SmallVectorImpl<const CallInst *>::iterator itr =
std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(),
&RelocCall);
assert(itr != PendingGCRelocateCalls.end() &&
"Visited unexpected gcrelocate call");
PendingGCRelocateCalls.erase(itr);
}

// TODO: Should add consistency tracking to ensure we encounter
// expected gc_result calls too.

/// Get a stack slot we can use to store an value of type ValueType. This
/// will hopefully be a recylced slot from another statepoint.
SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder);

void reserveStackSlot(int Offset) {
assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
"out of bounds");
assert(!AllocatedStackSlots[Offset] && "already reserved!");
assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!");
AllocatedStackSlots[Offset] = true;
}
bool isStackSlotAllocated(int Offset) {
assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() &&
"out of bounds");
return AllocatedStackSlots[Offset];
}

private:
/// Maps pre-relocation value (gc pointer directly incoming into statepoint)
/// into it's location (currently only stack slots)
DenseMap<SDValue, SDValue> Locations;
/// Map pre-relocated value into it's new relocated location
DenseMap<SDValue, SDValue> RelocLocations;

/// A boolean indicator for each slot listed in the FunctionInfo as to
/// whether it has been used in the current statepoint. Since we try to
/// preserve stack slots across safepoints, there can be gaps in which
/// slots have been allocated.
SmallVector<bool, 50> AllocatedStackSlots;

/// Points just beyond the last slot known to have been allocated
unsigned NextSlotToAllocate;

/// Keep track of pending gcrelocate calls for consistency check
SmallVector<const CallInst *, 10> PendingGCRelocateCalls;
};
} // end namespace llvm

#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
1 change: 1 addition & 0 deletions llvm/lib/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ add_llvm_library(LLVMCore
Pass.cpp
PassManager.cpp
PassRegistry.cpp
Statepoint.cpp
Type.cpp
TypeFinder.cpp
Use.cpp
Expand Down
62 changes: 62 additions & 0 deletions llvm/lib/IR/Statepoint.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//===-- IR/Statepoint.cpp -- gc.statepoint utilities --- -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//

#include "llvm/IR/Function.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/CommandLine.h"

#include "llvm/IR/Statepoint.h"

using namespace std;
using namespace llvm;

bool llvm::isStatepoint(const ImmutableCallSite &CS) {
const Function *F = CS.getCalledFunction();
return (F && F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint);
}
bool llvm::isStatepoint(const Instruction *inst) {
if (isa<InvokeInst>(inst) || isa<CallInst>(inst)) {
ImmutableCallSite CS(inst);
return isStatepoint(CS);
}
return false;
}
bool llvm::isStatepoint(const Instruction &inst) {
return isStatepoint(&inst);
}

bool llvm::isGCRelocate(const ImmutableCallSite &CS) {
return isGCRelocate(CS.getInstruction());
}
bool llvm::isGCRelocate(const Instruction *inst) {
if (const CallInst *call = dyn_cast<CallInst>(inst)) {
if (const Function *F = call->getCalledFunction()) {
return F->getIntrinsicID() == Intrinsic::experimental_gc_relocate;
}
}
return false;
}

bool llvm::isGCResult(const ImmutableCallSite &CS) {
return isGCResult(CS.getInstruction());
}
bool llvm::isGCResult(const Instruction *inst) {
if (const CallInst *call = cast<CallInst>(inst)) {
if (Function *F = call->getCalledFunction()) {
return (F->getIntrinsicID() == Intrinsic::experimental_gc_result_int ||
F->getIntrinsicID() == Intrinsic::experimental_gc_result_float ||
F->getIntrinsicID() == Intrinsic::experimental_gc_result_ptr);
}
}
return false;
}
9 changes: 9 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
Expand Down Expand Up @@ -1165,6 +1166,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
if (!CI->isLosslessCast())
return false;

// If this is a GC intrinsic, avoid munging types. We need types for
// statepoint reconstruction in SelectionDAG.
// TODO: This is probably something which should be expanded to all
// intrinsics since the entire point of intrinsics is that
// they are understandable by the optimizer.
if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
return false;

// The size of ByVal or InAlloca arguments is derived from the type, so we
// can't change to a type with a different size. If the size were
// passed explicitly we could avoid this check.
Expand Down
74 changes: 74 additions & 0 deletions llvm/test/CodeGen/X86/statepoint-call-lowering.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
; RUN: llc < %s | FileCheck %s
; This file contains a collection of basic tests to ensure we didn't
; screw up normal call lowering when there are no deopt or gc arguments.

target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

declare zeroext i1 @return_i1()
declare zeroext i32 @return_i32()
declare i32* @return_i32ptr()
declare float @return_float()

define i1 @test_i1_return() {
; CHECK-LABEL: test_i1_return
; This is just checking that a i1 gets lowered normally when there's no extra
; state arguments to the statepoint
; CHECK: pushq %rax
; CHECK: callq return_i1
; CHECK: popq %rdx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0)
%call1 = call zeroext i1 @llvm.experimental.gc.result.int.i1(i32 %safepoint_token)
ret i1 %call1
}

define i32 @test_i32_return() {
; CHECK-LABEL: test_i32_return
; CHECK: pushq %rax
; CHECK: callq return_i32
; CHECK: popq %rdx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i32 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i32f(i32 ()* @return_i32, i32 0, i32 0, i32 0)
%call1 = call zeroext i32 @llvm.experimental.gc.result.int.i32(i32 %safepoint_token)
ret i32 %call1
}

define i32* @test_i32ptr_return() {
; CHECK-LABEL: test_i32ptr_return
; CHECK: pushq %rax
; CHECK: callq return_i32ptr
; CHECK: popq %rdx
; CHECK: retq
entry:
%safepoint_token = tail call i32 (i32* ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_p0i32f(i32* ()* @return_i32ptr, i32 0, i32 0, i32 0)
%call1 = call i32* @llvm.experimental.gc.result.ptr.p0i32(i32 %safepoint_token)
ret i32* %call1
}

define float @test_float_return() {
; CHECK-LABEL: test_float_return
; CHECK: pushq %rax
; CHECK: callq return_float
; CHECK: popq %rax
; CHECK: retq
entry:
%safepoint_token = tail call i32 (float ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_f32f(float ()* @return_float, i32 0, i32 0, i32 0)
%call1 = call float @llvm.experimental.gc.result.float.f32(i32 %safepoint_token)
ret float %call1
}

declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
declare i1 @llvm.experimental.gc.result.int.i1(i32)

declare i32 @llvm.experimental.gc.statepoint.p0f_i32f(i32 ()*, i32, i32, ...)
declare i32 @llvm.experimental.gc.result.int.i32(i32)

declare i32 @llvm.experimental.gc.statepoint.p0f_p0i32f(i32* ()*, i32, i32, ...)
declare i32* @llvm.experimental.gc.result.ptr.p0i32(i32)

declare i32 @llvm.experimental.gc.statepoint.p0f_f32f(float ()*, i32, i32, ...)
declare float @llvm.experimental.gc.result.float.f32(i32)

60 changes: 60 additions & 0 deletions llvm/test/CodeGen/X86/statepoint-stack-usage.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
; RUN: llc < %s | FileCheck %s

target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

; This test is checking to make sure that we reuse the same stack slots
; for GC values spilled over two different call sites. Since the order
; of GC arguments differ, niave lowering code would insert loads and
; stores to rearrange items on the stack. We need to make sure (for
; performance) that this doesn't happen.
define i32 @back_to_back_calls(i32* %a, i32* %b, i32* %c) #1 {
; CHECK-LABEL: back_to_back_calls
; The exact stores don't matter, but there need to be three stack slots created
; CHECK: movq %rdx, 16(%rsp)
; CHECK: movq %rdi, 8(%rsp)
; CHECK: movq %rsi, (%rsp)
%safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32* %a, i32* %b, i32* %c)
%a1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 9)
%b1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 10)
%c1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 11)
; CHECK: callq
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
%safepoint_token2 = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32* %c1, i32* %b1, i32* %a1)
%a2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 11)
%b2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 10)
%c2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 9)
; CHECK: callq
ret i32 1
}

; This test simply checks that minor changes in vm state don't prevent slots
; being reused for gc values.
define i32 @reserve_first(i32* %a, i32* %b, i32* %c) #1 {
; CHECK-LABEL: reserve_first
; The exact stores don't matter, but there need to be three stack slots created
; CHECK: movq %rdx, 16(%rsp)
; CHECK: movq %rdi, 8(%rsp)
; CHECK: movq %rsi, (%rsp)
%safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32* %a, i32* %b, i32* %c)
%a1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 9)
%b1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 10)
%c1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 11)
; CHECK: callq
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
%safepoint_token2 = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32* %a1, i32 0, i32* %c1, i32 0, i32 0, i32* %c1, i32* %b1, i32* %a1)
%a2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 11)
%b2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 10)
%c2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 9)
; CHECK: callq
ret i32 1
}

; Function Attrs: nounwind
declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32) #3

declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)

attributes #1 = { uwtable }
111 changes: 111 additions & 0 deletions llvm/test/CodeGen/X86/statepoint-stackmap-format.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
; RUN: llc < %s | FileCheck %s
; This test is a sanity check to ensure statepoints are generating StackMap
; sections correctly. This is not intended to be a rigorous test of the
; StackMap format (see the stackmap tests for that).

target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

declare zeroext i1 @return_i1()

define i1 @test(i32 addrspace(1)* %ptr) {
; CHECK-LABEL: test
; Do we see one spill for the local value and the store to the
; alloca?
; CHECK: subq $24, %rsp
; CHECK: movq $0, 8(%rsp)
; CHECK: movq %rdi, (%rsp)
; CHECK: callq return_i1
; CHECK: addq $24, %rsp
; CHECK: retq
entry:
%metadata1 = alloca i32 addrspace(1)*, i32 2, align 8
store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1
; NOTE: Currently NOT testing alloca lowering in the StackMap format. Its
; known to be broken.
%safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 addrspace(1)* null)
%call1 = call zeroext i1 @llvm.experimental.gc.result.int.i1(i32 %safepoint_token)
%a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
%b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 5, i32 5)
;
ret i1 %call1
}

declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
declare i1 @llvm.experimental.gc.result.int.i1(i32)
declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3


; CHECK-LABEL: .section .llvm_stackmaps
; CHECK-NEXT: __LLVM_StackMaps:
; Header
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 0
; Num Functions
; CHECK-NEXT: .long 1
; Num LargeConstants
; CHECK-NEXT: .long 0
; Num Callsites
; CHECK-NEXT: .long 1

; Functions and stack size
; CHECK-NEXT: .quad test
; CHECK-NEXT: .quad 24

; Large Constants
; Statepoint ID only
; CHECK: .quad 2882400000

; Callsites
; Constant arguments
; CHECK: .long .Ltmp1-test
; CHECK: .short 0
; CHECK: .short 8
; SmallConstant (0)
; CHECK: .byte 4
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 0
; SmallConstant (2)
; CHECK: .byte 4
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 2
; Direct Spill Slot [RSP+0]
; CHECK: .byte 2
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 0
; SmallConstant (0)
; CHECK: .byte 4
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 0
; SmallConstant (0)
; CHECK: .byte 4
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 0
; SmallConstant (0)
; CHECK: .byte 4
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 0
; Direct Spill Slot [RSP+0]
; CHECK: .byte 2
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 0
; Direct Spill Slot [RSP+0]
; CHECK: .byte 2
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 0

; No Padding or LiveOuts
; CHECK: .short 0
; CHECK: .short 0
; CHECK: .align 8