Skip to content

Commit dda3c19

Browse files
committed
ADT: SmallVector size/capacity use word-size integers when elements are small
SmallVector currently uses 32bit integers for size and capacity to reduce sizeof(SmallVector). This limits the number of elements to UINT32_MAX. For a SmallVector<char>, this limits the SmallVector size to only 4GB. Buffering bitcode output uses SmallVector<char>, but needs >4GB output. This changes SmallVector size and capacity to conditionally use word-size integers if the element type is small (<4 bytes). For larger elements types, the vector size can reach ~16GB with 32bit size. Making this conditional on the element type provides both the smaller sizeof(SmallVector) for larger types which are unlikely to grow so large, and supports larger capacities for smaller element types. This recommit fixes the same template being instantiated twice on platforms where uintptr_t is the same as uint32_t.
1 parent f8990fe commit dda3c19

2 files changed

Lines changed: 64 additions & 26 deletions

File tree

llvm/include/llvm/ADT/SmallVector.h

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,29 +16,42 @@
1616
#include "llvm/ADT/iterator_range.h"
1717
#include "llvm/Support/AlignOf.h"
1818
#include "llvm/Support/Compiler.h"
19+
#include "llvm/Support/ErrorHandling.h"
1920
#include "llvm/Support/MathExtras.h"
2021
#include "llvm/Support/MemAlloc.h"
2122
#include "llvm/Support/type_traits.h"
22-
#include "llvm/Support/ErrorHandling.h"
2323
#include <algorithm>
2424
#include <cassert>
2525
#include <cstddef>
2626
#include <cstdlib>
2727
#include <cstring>
2828
#include <initializer_list>
2929
#include <iterator>
30+
#include <limits>
3031
#include <memory>
3132
#include <new>
3233
#include <type_traits>
3334
#include <utility>
3435

3536
namespace llvm {
3637

37-
/// This is all the non-templated stuff common to all SmallVectors.
38-
class SmallVectorBase {
38+
/// This is all the stuff common to all SmallVectors.
39+
///
40+
/// The template parameter specifies the type which should be used to hold the
41+
/// Size and Capacity of the SmallVector, so it can be adjusted.
42+
/// Using 32 bit size is desirable to shink the size of the SmallVector.
43+
/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
44+
/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
45+
/// buffering bitcode output - which can exceed 4GB.
46+
template <class Size_T> class SmallVectorBase {
3947
protected:
4048
void *BeginX;
41-
unsigned Size = 0, Capacity;
49+
Size_T Size = 0, Capacity;
50+
51+
/// The maximum value of the Size_T used.
52+
static constexpr size_t SizeTypeMax() {
53+
return std::numeric_limits<Size_T>::max();
54+
}
4255

4356
SmallVectorBase() = delete;
4457
SmallVectorBase(void *FirstEl, size_t TotalCapacity)
@@ -70,17 +83,25 @@ class SmallVectorBase {
7083
}
7184
};
7285

86+
template <class T>
87+
using SmallVectorSizeType =
88+
typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
89+
uint32_t>::type;
90+
7391
/// Figure out the offset of the first element.
7492
template <class T, typename = void> struct SmallVectorAlignmentAndSize {
75-
AlignedCharArrayUnion<SmallVectorBase> Base;
93+
AlignedCharArrayUnion<SmallVectorBase<SmallVectorSizeType<T>>> Base;
7694
AlignedCharArrayUnion<T> FirstEl;
7795
};
7896

7997
/// This is the part of SmallVectorTemplateBase which does not depend on whether
8098
/// the type T is a POD. The extra dummy template argument is used by ArrayRef
8199
/// to avoid unnecessarily requiring T to be complete.
82100
template <typename T, typename = void>
83-
class SmallVectorTemplateCommon : public SmallVectorBase {
101+
class SmallVectorTemplateCommon
102+
: public SmallVectorBase<SmallVectorSizeType<T>> {
103+
using Base = SmallVectorBase<SmallVectorSizeType<T>>;
104+
84105
/// Find the address of the first element. For this pointer math to be valid
85106
/// with small-size of 0 for T with lots of alignment, it's important that
86107
/// SmallVectorStorage is properly-aligned even for small-size of 0.
@@ -92,21 +113,20 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
92113
// Space after 'FirstEl' is clobbered, do not add any instance vars after it.
93114

94115
protected:
95-
SmallVectorTemplateCommon(size_t Size)
96-
: SmallVectorBase(getFirstEl(), Size) {}
116+
SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}
97117

98118
void grow_pod(size_t MinCapacity, size_t TSize) {
99-
SmallVectorBase::grow_pod(getFirstEl(), MinCapacity, TSize);
119+
Base::grow_pod(getFirstEl(), MinCapacity, TSize);
100120
}
101121

102122
/// Return true if this is a smallvector which has not had dynamic
103123
/// memory allocated for it.
104-
bool isSmall() const { return BeginX == getFirstEl(); }
124+
bool isSmall() const { return this->BeginX == getFirstEl(); }
105125

106126
/// Put this vector in a state of being small.
107127
void resetToSmall() {
108-
BeginX = getFirstEl();
109-
Size = Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
128+
this->BeginX = getFirstEl();
129+
this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
110130
}
111131

112132
public:
@@ -124,6 +144,10 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
124144
using pointer = T *;
125145
using const_pointer = const T *;
126146

147+
using Base::capacity;
148+
using Base::empty;
149+
using Base::size;
150+
127151
// forward iterator creation methods.
128152
iterator begin() { return (iterator)this->BeginX; }
129153
const_iterator begin() const { return (const_iterator)this->BeginX; }
@@ -137,7 +161,9 @@ class SmallVectorTemplateCommon : public SmallVectorBase {
137161
const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
138162

139163
size_type size_in_bytes() const { return size() * sizeof(T); }
140-
size_type max_size() const { return size_type(-1) / sizeof(T); }
164+
size_type max_size() const {
165+
return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
166+
}
141167

142168
size_t capacity_in_bytes() const { return capacity() * sizeof(T); }
143169

@@ -232,18 +258,21 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
232258
// Define this out-of-line to dissuade the C++ compiler from inlining it.
233259
template <typename T, bool TriviallyCopyable>
234260
void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
235-
if (MinSize > UINT32_MAX)
261+
// Ensure we can fit the new capacity.
262+
// This is only going to be applicable when the capacity is 32 bit.
263+
if (MinSize > this->SizeTypeMax())
236264
report_bad_alloc_error("SmallVector capacity overflow during allocation");
237265

238266
// Ensure we can meet the guarantee of space for at least one more element.
239267
// The above check alone will not catch the case where grow is called with a
240268
// default MinCapacity of 0, but the current capacity cannot be increased.
241-
if (this->capacity() == size_t(UINT32_MAX))
269+
// This is only going to be applicable when the capacity is 32 bit.
270+
if (this->capacity() == this->SizeTypeMax())
242271
report_bad_alloc_error("SmallVector capacity unable to grow");
243272

244273
// Always grow, even from zero.
245274
size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2));
246-
NewCapacity = std::min(std::max(NewCapacity, MinSize), size_t(UINT32_MAX));
275+
NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax());
247276
T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T)));
248277

249278
// Move the elements over.

llvm/lib/Support/SmallVector.cpp

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,24 +37,30 @@ static_assert(sizeof(SmallVector<void *, 1>) ==
3737
sizeof(unsigned) * 2 + sizeof(void *) * 2,
3838
"wasted space in SmallVector size 1");
3939

40-
/// grow_pod - This is an implementation of the grow() method which only works
41-
/// on POD-like datatypes and is out of line to reduce code duplication.
42-
/// This function will report a fatal error if it cannot increase capacity.
43-
void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
44-
size_t TSize) {
45-
// Ensure we can fit the new capacity in 32 bits.
46-
if (MinCapacity > UINT32_MAX)
40+
static_assert(sizeof(SmallVector<char, 0>) ==
41+
sizeof(void *) * 2 + sizeof(void *),
42+
"1 byte elements have word-sized type for size and capacity");
43+
44+
// Note: Moving this function into the header may cause performance regression.
45+
template <class Size_T>
46+
void SmallVectorBase<Size_T>::grow_pod(void *FirstEl, size_t MinCapacity,
47+
size_t TSize) {
48+
// Ensure we can fit the new capacity.
49+
// This is only going to be applicable when the capacity is 32 bit.
50+
if (MinCapacity > SizeTypeMax())
4751
report_bad_alloc_error("SmallVector capacity overflow during allocation");
4852

4953
// Ensure we can meet the guarantee of space for at least one more element.
5054
// The above check alone will not catch the case where grow is called with a
5155
// default MinCapacity of 0, but the current capacity cannot be increased.
52-
if (capacity() == size_t(UINT32_MAX))
56+
// This is only going to be applicable when the capacity is 32 bit.
57+
if (capacity() == SizeTypeMax())
5358
report_bad_alloc_error("SmallVector capacity unable to grow");
5459

60+
// In theory 2*capacity can overflow if the capacity is 64 bit, but the
61+
// original capacity would never be large enough for this to be a problem.
5562
size_t NewCapacity = 2 * capacity() + 1; // Always grow.
56-
NewCapacity =
57-
std::min(std::max(NewCapacity, MinCapacity), size_t(UINT32_MAX));
63+
NewCapacity = std::min(std::max(NewCapacity, MinCapacity), SizeTypeMax());
5864

5965
void *NewElts;
6066
if (BeginX == FirstEl) {
@@ -70,3 +76,6 @@ void SmallVectorBase::grow_pod(void *FirstEl, size_t MinCapacity,
7076
this->BeginX = NewElts;
7177
this->Capacity = NewCapacity;
7278
}
79+
80+
template class llvm::SmallVectorBase<uint32_t>;
81+
template class llvm::SmallVectorBase<uint64_t>;

0 commit comments

Comments
 (0)