Skip to content

Commit

Permalink
ADT: SmallVector size/capacity use word-size integers when elements a…
Browse files Browse the repository at this point in the history
…re small

SmallVector currently uses 32bit integers for size and capacity to reduce
sizeof(SmallVector). This limits the number of elements to UINT32_MAX.

For a SmallVector<char>, this limits the SmallVector size to only 4GB.
Buffering bitcode output uses SmallVector<char>, but needs >4GB output.

This changes SmallVector size and capacity to conditionally use word-size
integers if the element type is small (<4 bytes). For larger elements types,
the vector size can reach ~16GB with 32bit size.

Making this conditional on the element type provides both the smaller
sizeof(SmallVector) for larger types which are unlikely to grow so large,
and supports larger capacities for smaller element types.
  • Loading branch information
browneee committed Apr 25, 2020
1 parent a0919ac commit b5f0eae
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 26 deletions.
60 changes: 44 additions & 16 deletions llvm/include/llvm/ADT/SmallVector.h
Expand Up @@ -16,29 +16,42 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemAlloc.h"
#include "llvm/Support/type_traits.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdlib>
#include <cstring>
#include <initializer_list>
#include <iterator>
#include <limits>
#include <memory>
#include <new>
#include <type_traits>
#include <utility>

namespace llvm {

/// This is all the non-templated stuff common to all SmallVectors.
class SmallVectorBase {
/// This is all the stuff common to all SmallVectors.
///
/// The template parameter specifies the type which should be used to hold the
/// Size and Capacity of the SmallVector, so it can be adjusted.
/// Using 32 bit size is desirable to shink the size of the SmallVector.
/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
/// buffering bitcode output - which can exceed 4GB.
template <class Size_T> class SmallVectorBase {
protected:
void *BeginX;
unsigned Size = 0, Capacity;
Size_T Size = 0, Capacity;

/// The maximum value of the Size_T used.
static constexpr size_t SizeTypeMax() {
return std::numeric_limits<Size_T>::max();
}

SmallVectorBase() = delete;
SmallVectorBase(void *FirstEl, size_t TotalCapacity)
Expand Down Expand Up @@ -70,17 +83,24 @@ class SmallVectorBase {
}
};

template <class T>
using SmallVectorSizeType =
typename std::conditional<sizeof(T) < 4, uintptr_t, uint32_t>::type;

/// Figure out the offset of the first element.
template <class T, typename = void> struct SmallVectorAlignmentAndSize {
AlignedCharArrayUnion<SmallVectorBase> Base;
AlignedCharArrayUnion<SmallVectorBase<SmallVectorSizeType<T>>> Base;
AlignedCharArrayUnion<T> FirstEl;
};

/// This is the part of SmallVectorTemplateBase which does not depend on whether
/// the type T is a POD. The extra dummy template argument is used by ArrayRef
/// to avoid unnecessarily requiring T to be complete.
template <typename T, typename = void>
class SmallVectorTemplateCommon : public SmallVectorBase {
class SmallVectorTemplateCommon
: public SmallVectorBase<SmallVectorSizeType<T>> {
using Base = SmallVectorBase<SmallVectorSizeType<T>>;

/// Find the address of the first element. For this pointer math to be valid
/// with small-size of 0 for T with lots of alignment, it's important that
/// SmallVectorStorage is properly-aligned even for small-size of 0.
Expand All @@ -92,21 +112,20 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
// Space after 'FirstEl' is clobbered, do not add any instance vars after it.

protected:
SmallVectorTemplateCommon(size_t Size)
: SmallVectorBase(getFirstEl(), Size) {}
SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}

void grow_pod(size_t MinCapacity, size_t TSize) {
SmallVectorBase::grow_pod(getFirstEl(), MinCapacity, TSize);
Base::grow_pod(getFirstEl(), MinCapacity, TSize);
}

/// Return true if this is a smallvector which has not had dynamic
/// memory allocated for it.
bool isSmall() const { return BeginX == getFirstEl(); }
bool isSmall() const { return this->BeginX == getFirstEl(); }

/// Put this vector in a state of being small.
void resetToSmall() {
BeginX = getFirstEl();
Size = Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
this->BeginX = getFirstEl();
this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
}

public:
Expand All @@ -124,6 +143,10 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
using pointer = T *;
using const_pointer = const T *;

using Base::capacity;
using Base::empty;
using Base::size;

// forward iterator creation methods.
iterator begin() { return (iterator)this->BeginX; }
const_iterator begin() const { return (const_iterator)this->BeginX; }
Expand All @@ -137,7 +160,9 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
const_reverse_iterator rend() const { return const_reverse_iterator(begin());}

size_type size_in_bytes() const { return size() * sizeof(T); }
size_type max_size() const { return size_type(-1) / sizeof(T); }
size_type max_size() const {
return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
}

size_t capacity_in_bytes() const { return capacity() * sizeof(T); }

Expand Down Expand Up @@ -232,18 +257,21 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
// Define this out-of-line to dissuade the C++ compiler from inlining it.
template <typename T, bool TriviallyCopyable>
void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
if (MinSize > UINT32_MAX)
// Ensure we can fit the new capacity.
// This is only going to be applicable when the capacity is 32 bit.
if (MinSize > this->SizeTypeMax())
report_bad_alloc_error("SmallVector capacity overflow during allocation");

// Ensure we can meet the guarantee of space for at least one more element.
// The above check alone will not catch the case where grow is called with a
// default MinCapacity of 0, but the current capacity cannot be increased.
if (this->capacity() == size_t(UINT32_MAX))
// This is only going to be applicable when the capacity is 32 bit.
if (this->capacity() == this->SizeTypeMax())
report_bad_alloc_error("SmallVector capacity unable to grow");

// Always grow, even from zero.
size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2));
NewCapacity = std::min(std::max(NewCapacity, MinSize), size_t(UINT32_MAX));
NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax());
T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T)));

// Move the elements over.
Expand Down
29 changes: 19 additions & 10 deletions llvm/lib/Support/SmallVector.cpp
Expand Up @@ -37,24 +37,30 @@ static_assert(sizeof(SmallVector<void *, 1>) ==
sizeof(unsigned) * 2 + sizeof(void *) * 2,
"wasted space in SmallVector size 1");

/// grow_pod - This is an implementation of the grow() method which only works
/// on POD-like datatypes and is out of line to reduce code duplication.
/// This function will report a fatal error if it cannot increase capacity.
void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
size_t TSize) {
// Ensure we can fit the new capacity in 32 bits.
if (MinCapacity > UINT32_MAX)
static_assert(sizeof(SmallVector<char, 0>) ==
sizeof(void *) * 2 + sizeof(void *),
"1 byte elements have word-sized type for size and capacity");

// Note: Moving this function into the header may cause performance regression.
template <class Size_T>
void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinCapacity,
size_t TSize) {
// Ensure we can fit the new capacity.
// This is only going to be applicable when the capacity is 32 bit.
if (MinCapacity > SizeTypeMax())
report_bad_alloc_error("SmallVector capacity overflow during allocation");

// Ensure we can meet the guarantee of space for at least one more element.
// The above check alone will not catch the case where grow is called with a
// default MinCapacity of 0, but the current capacity cannot be increased.
if (capacity() == size_t(UINT32_MAX))
// This is only going to be applicable when the capacity is 32 bit.
if (capacity() == SizeTypeMax())
report_bad_alloc_error("SmallVector capacity unable to grow");

// In theory 2*capacity can overflow if the capacity is 64 bit, but the
// original capacity would never be large enough for this to be a problem.
size_t NewCapacity = 2 * capacity() + 1; // Always grow.
NewCapacity =
std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax());

void *NewElts;
if (BeginX == FirstEl) {
Expand All @@ -70,3 +76,6 @@ void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
this->BeginX = NewElts;
this->Capacity = NewCapacity;
}

template class llvm::SmallVectorBase<uint32_t>;
template class llvm::SmallVectorBase<uintptr_t>;

0 comments on commit b5f0eae

Please sign in to comment.