Skip to content

Commit

Permalink
Fix benchmark & tune heuristic
Browse files Browse the repository at this point in the history
  • Loading branch information
pitrou committed Mar 25, 2024
1 parent 429d060 commit 9774e89
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 20 deletions.
9 changes: 6 additions & 3 deletions cpp/src/arrow/memory_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,9 @@ class BaseMemoryPoolImpl : public MemoryPool {
if (new_size == old_size) {
return Status::OK();
}
if (old_size == 0 || new_size == 0) {
return Reallocate(old_size, new_size, alignment, ptr);
}
if (new_size < 0) {
return Status::Invalid("negative realloc size");
}
Expand All @@ -545,9 +548,9 @@ class BaseMemoryPoolImpl : public MemoryPool {
}
// First try resizing in place
if (!Allocator::ResizeInPlace(old_size, new_size, *ptr)) {
// TODO comment
if (std::max(old_size, new_size) >= 32 * 1024) {
// Deallocate then allocate (faster than copying data?)
// For non-small allocations, we prefer deallocate-then-allocate
// over reallocation, because it saves the cost of copying data.
if (std::max(old_size, new_size) >= 256) {
Allocator::DeallocateAligned(*ptr, old_size, alignment);
RETURN_NOT_OK(Allocator::AllocateAligned(new_size, alignment, ptr));
} else {
Expand Down
50 changes: 33 additions & 17 deletions cpp/src/arrow/memory_pool_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,30 +115,39 @@ static void AllocateTouchDeallocate(
state.SetBytesProcessed(state.iterations() * nbytes);
}

// 256 kiB: typical max size for a scratch space (L2-sized)
static constexpr int64_t kMaxReallocationSize = 256 << 10;
// 4 kiB: typical increment when resizing a scratch space
static constexpr int64_t kReallocationIncrement = 4096;

template <typename Alloc, bool Copy>
static void BenchmarkReallocateGrowing(benchmark::State& state) {
// 256 kiB: typical max size for a scratch space (L2-sized)
const int64_t max_size = 256 << 10;
// 4 kiB: typical increment when resizing a scratch space
const int64_t increment = 4096;
MemoryPool* pool = *Alloc::GetAllocator();
pool->ReleaseUnused();
int64_t nb_reallocs = 0;

int64_t nb_inplace_reallocs = 0;
for (auto _ : state) {
const auto alloc_before = pool->bytes_allocated();
uint8_t* data;
int64_t size = 0;
ARROW_CHECK_OK(pool->Allocate(size, &data));
for (; size < max_size; size += increment) {
while (size < kMaxReallocationSize) {
const auto old_data = data;
int64_t new_size = size + kReallocationIncrement;
if constexpr (Copy) {
ARROW_CHECK_OK(pool->Reallocate(size - increment, size, &data));
ARROW_CHECK_OK(pool->Reallocate(size, new_size, &data));
} else {
ARROW_CHECK_OK(pool->ReallocateNoCopy(size - increment, size, &data));
ARROW_CHECK_OK(pool->ReallocateNoCopy(size, new_size, &data));
}
++nb_reallocs;
nb_inplace_reallocs += (data == old_data);
size = new_size;
}
pool->Free(data, size - increment);
pool->Free(data, size);
ARROW_CHECK_EQ(pool->bytes_allocated(), alloc_before);
}
state.SetItemsProcessed(nb_reallocs);
state.counters["percent_in_place"] = 100.0 * nb_inplace_reallocs / nb_reallocs;
}

template <typename Alloc>
Expand All @@ -153,26 +162,33 @@ static void ReallocateGrowingNoCopy(benchmark::State& state) {

template <typename Alloc, bool Copy>
static void BenchmarkReallocateShrinking(benchmark::State& state) {
const int64_t max_size = 256 << 10; // 256 kiB
const int64_t increment = 4096;
MemoryPool* pool = *Alloc::GetAllocator();
int64_t nb_reallocs = 0;
pool->ReleaseUnused();

int64_t nb_reallocs = 0;
int64_t nb_inplace_reallocs = 0;
for (auto _ : state) {
const auto alloc_before = pool->bytes_allocated();
uint8_t* data;
int64_t size = max_size;
int64_t size = kMaxReallocationSize;
ARROW_CHECK_OK(pool->Allocate(size, &data));
for (; size >= 0; size -= increment) {
while (size >= kReallocationIncrement) {
const auto old_data = data;
int64_t new_size = size - kReallocationIncrement;
if constexpr (Copy) {
ARROW_CHECK_OK(pool->Reallocate(size + increment, size, &data));
ARROW_CHECK_OK(pool->Reallocate(size, new_size, &data));
} else {
ARROW_CHECK_OK(pool->ReallocateNoCopy(size + increment, size, &data));
ARROW_CHECK_OK(pool->ReallocateNoCopy(size, new_size, &data));
}
++nb_reallocs;
nb_inplace_reallocs += (data == old_data);
size = new_size;
}
pool->Free(data, size + increment);
pool->Free(data, size);
ARROW_CHECK_EQ(pool->bytes_allocated(), alloc_before);
}
state.SetItemsProcessed(nb_reallocs);
state.counters["percent_in_place"] = 100.0 * nb_inplace_reallocs / nb_reallocs;
}

template <typename Alloc>
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/memory_pool_jemalloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ bool JemallocAllocator::ResizeInPlace(int64_t old_size, int64_t new_size, uint8_
// No need to pass any alignment since this doesn't move the base pointer
int64_t got_size = static_cast<int64_t>(xallocx(ptr, static_cast<size_t>(new_size),
/*extra=*/0, /*flags=*/0));
// FIXME: xallocx resizes to an actual size that's not the actualy requested size.
// Using an exact equality check here is therefore pessimal. The alternative is to
// return the actual allocation size to the caller, as it should be passed
// accurately when deallocating the area.
return got_size == new_size;
}

Expand Down

0 comments on commit 9774e89

Please sign in to comment.