Skip to content

Commit

Permalink
[GC] Remove buggy untested optimization from statepoint lowering
Browse files Browse the repository at this point in the history
A downstream test case (see included reduced test) revealed that we have a bug in how we handle duplicate relocations. If we have the same SDValue relocated twice, and that value happens to be a constant (such as null), we only export one of the two llvm::Values. Exporting on a per llvm::Value basis is required to allow lowering of gc.relocates in following basic blocks (e.g. invokes). Without it, we end up with a use of an undefined vreg and bad things happen.

Rather than fixing the optimization - which appears to be hard - I propose we simply remove it. There are no tests in tree that change with this code removed. If we find out later that this did matter for something, we can reimplement a variation of this in CodeGenPrepare to catch the easy cases without complicating the lowering code.

Thanks to Denis and Serguei who did all the hard work of figuring out what went wrong here. The patch is by far the easy part. :)

Differential Revision: https://reviews.llvm.org/D75964
  • Loading branch information
preames committed Mar 11, 2020
1 parent 0396aa4 commit e671641
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 81 deletions.
35 changes: 6 additions & 29 deletions llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
Expand Up @@ -92,35 +92,12 @@ class FunctionLoweringInfo {
DenseMap<const Value *, unsigned> CatchPadExceptionPointers;

/// Keep track of frame indices allocated for statepoints as they could be
/// used across basic block boundaries. This struct is more complex than a
/// simple map because the stateopint lowering code de-duplicates gc pointers
/// based on their SDValue (so %p and (bitcast %p to T) will get the same
/// slot), and we track that here.

struct StatepointSpillMap {
using SlotMapTy = DenseMap<const Value *, Optional<int>>;

/// Maps uniqued llvm IR values to the slots they were spilled in. If a
/// value is mapped to None it means we visited the value but didn't spill
/// it (because it was a constant, for instance).
SlotMapTy SlotMap;

/// Maps llvm IR values to the values they were de-duplicated to.
DenseMap<const Value *, const Value *> DuplicateMap;

SlotMapTy::const_iterator find(const Value *V) const {
auto DuplIt = DuplicateMap.find(V);
if (DuplIt != DuplicateMap.end())
V = DuplIt->second;
return SlotMap.find(V);
}

SlotMapTy::const_iterator end() const { return SlotMap.end(); }
};

/// Maps gc.statepoint instructions to their corresponding StatepointSpillMap
/// instances.
DenseMap<const Instruction *, StatepointSpillMap> StatepointSpillMaps;
/// used across basic block boundaries (e.g. for an invoke). For each
/// gc.statepoint instruction, maps uniqued llvm IR values to the slots they
/// were spilled in. If a value is mapped to None it means we visited the
/// value but didn't spill it (because it was a constant, for instance).
using StatepointSpillMapTy = DenseMap<const Value *, Optional<int>>;
DenseMap<const Instruction *, StatepointSpillMapTy> StatepointSpillMaps;

/// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
/// the entry block. This allows the allocas to be efficiently referenced
Expand Down
56 changes: 4 additions & 52 deletions llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
Expand Up @@ -268,45 +268,6 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue,
Builder.StatepointLowering.setLocation(Incoming, Loc);
}

/// Remove any duplicate (as SDValues) from the derived pointer pairs. This
/// is not required for correctness. It's purpose is to reduce the size of
/// StackMap section. It has no effect on the number of spill slots required
/// or the actual lowering.
static void
removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
SmallVectorImpl<const Value *> &Ptrs,
SmallVectorImpl<const GCRelocateInst *> &Relocs,
SelectionDAGBuilder &Builder,
FunctionLoweringInfo::StatepointSpillMap &SSM) {
DenseMap<SDValue, const Value *> Seen;

SmallVector<const Value *, 64> NewBases, NewPtrs;
SmallVector<const GCRelocateInst *, 64> NewRelocs;
for (size_t i = 0, e = Ptrs.size(); i < e; i++) {
SDValue SD = Builder.getValue(Ptrs[i]);
auto SeenIt = Seen.find(SD);

if (SeenIt == Seen.end()) {
// Only add non-duplicates
NewBases.push_back(Bases[i]);
NewPtrs.push_back(Ptrs[i]);
NewRelocs.push_back(Relocs[i]);
Seen[SD] = Ptrs[i];
} else {
// Duplicate pointer found, note in SSM and move on:
SSM.DuplicateMap[Ptrs[i]] = SeenIt->second;
}
}
assert(Bases.size() >= NewBases.size());
assert(Ptrs.size() >= NewPtrs.size());
assert(Relocs.size() >= NewRelocs.size());
Bases = NewBases;
Ptrs = NewPtrs;
Relocs = NewRelocs;
assert(Ptrs.size() == Bases.size());
assert(Ptrs.size() == Relocs.size());
}

/// Extract call from statepoint, lower it and return pointer to the
/// call node. Also update NodeMap so that getValue(statepoint) will
/// reference lowered call result
Expand Down Expand Up @@ -610,15 +571,15 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
SDValue Loc = Builder.StatepointLowering.getLocation(SDV);

if (Loc.getNode()) {
SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
} else {
// Record value as visited, but not spilled. This is case for allocas
// and constants. For this values we can avoid emitting spill load while
// visiting corresponding gc_relocate.
// Actually we do not need to record them in this map at all.
// We do this only to check that we are not relocating any unvisited
// value.
SpillMap.SlotMap[V] = None;
SpillMap[V] = None;

// Default llvm mechanisms for exporting values which are used in
// different basic blocks does not work for gc relocates.
Expand All @@ -641,24 +602,15 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
NumOfStatepoints++;
// Clear state
StatepointLowering.startNewStatepoint(*this);
assert(SI.Bases.size() == SI.Ptrs.size() &&
SI.Ptrs.size() == SI.GCRelocates.size());

#ifndef NDEBUG
// We schedule gc relocates before removeDuplicateGCPtrs since we _will_
// encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs.
for (auto *Reloc : SI.GCRelocates)
if (Reloc->getParent() == SI.StatepointInstr->getParent())
StatepointLowering.scheduleRelocCall(*Reloc);
#endif

// Remove any redundant llvm::Values which map to the same SDValue as another
// input. Also has the effect of removing duplicates in the original
// llvm::Value input list as well. This is a useful optimization for
// reducing the size of the StackMap section. It has no other impact.
removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this,
FuncInfo.StatepointSpillMaps[SI.StatepointInstr]);
assert(SI.Bases.size() == SI.Ptrs.size() &&
SI.Ptrs.size() == SI.GCRelocates.size());

// Lower statepoint vmstate and gcstate arguments
SmallVector<SDValue, 10> LoweredMetaArgs;
SmallVector<MachineMemOperand*, 16> MemRefs;
Expand Down
80 changes: 80 additions & 0 deletions llvm/test/CodeGen/X86/statepoint-duplicates-export.ll
@@ -0,0 +1,80 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs < %s | FileCheck %s

; Check that we can export values of "duplicated" gc.relocates without a crash
; "duplicate" here means maps to same SDValue. We previously had an
; optimization which tried to reduce number of spills/stackmap entries in such
; cases, but it incorrectly handled exports across basis block boundaries
; leading to a crash in this case.

target triple = "x86_64-pc-linux-gnu"

declare void @func()

define i1 @test() gc "statepoint-example" {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: callq func
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: callq func
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null, i32 addrspace(1)* null)
%base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%derived = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 8)
%safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %base, i32 addrspace(1)* %derived)
br label %next

next:
%base_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 7, i32 7)
%derived_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 7, i32 8)
%cmp1 = icmp eq i32 addrspace(1)* %base_reloc, null
%cmp2 = icmp eq i32 addrspace(1)* %derived_reloc, null
%cmp = and i1 %cmp1, %cmp2
ret i1 %cmp
}

; Showing redundant fill on first statepoint and spill/fill on second one
define i1 @test2(i32 addrspace(1)* %arg) gc "statepoint-example" {
; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: callq func
; CHECK-NEXT: .Ltmp2:
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp)
; CHECK-NEXT: callq func
; CHECK-NEXT: .Ltmp3:
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: orq {{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: sete %al
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
entry:
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %arg, i32 addrspace(1)* %arg)
%base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%derived = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 8)
%safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %base, i32 addrspace(1)* %derived)
br label %next

next:
%base_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 7, i32 7)
%derived_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 7, i32 8)
%cmp1 = icmp eq i32 addrspace(1)* %base_reloc, null
%cmp2 = icmp eq i32 addrspace(1)* %derived_reloc, null
%cmp = and i1 %cmp1, %cmp2
ret i1 %cmp
}


declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)

0 comments on commit e671641

Please sign in to comment.