Skip to content

Commit

Permalink
Revert "Revert "Improve / refactor anonymous mmap capabilities (faceb…
Browse files Browse the repository at this point in the history
…ook#10810)""

This reverts commit e0cf5cd.
  • Loading branch information
seckcoder committed Jul 21, 2023
1 parent a746236 commit 780711c
Show file tree
Hide file tree
Showing 11 changed files with 285 additions and 121 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ jobs:
name: "Test RocksDB"
shell: powershell.exe
command: |
build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16
build_tools\run_ci_db_test.ps1 -SuiteRun arena_test,db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16
build-linux-java:
executor: linux-docker
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,7 @@ set(SOURCES
options/options.cc
options/options_helper.cc
options/options_parser.cc
port/mmap.cc
port/stack_trace.cc
table/adaptive/adaptive_table_factory.cc
table/block_based/binary_search_index_reader.cc
Expand Down
2 changes: 2 additions & 0 deletions TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
"options/options.cc",
"options/options_helper.cc",
"options/options_parser.cc",
"port/mmap.cc",
"port/port_posix.cc",
"port/stack_trace.cc",
"port/win/env_default.cc",
Expand Down Expand Up @@ -538,6 +539,7 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"options/options.cc",
"options/options_helper.cc",
"options/options_parser.cc",
"port/mmap.cc",
"port/port_posix.cc",
"port/stack_trace.cc",
"port/win/env_default.cc",
Expand Down
3 changes: 3 additions & 0 deletions db/db_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
#include "util/string_util.h"
#include "utilities/merge_operators.h"

// In case defined by Windows headers
#undef small

namespace ROCKSDB_NAMESPACE {
class MockEnv;

Expand Down
2 changes: 1 addition & 1 deletion db/memtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
: comparator_(cmp),
moptions_(ioptions, mutable_cf_options),
refs_(0),
kArenaBlockSize(OptimizeBlockSize(moptions_.arena_block_size)),
kArenaBlockSize(Arena::OptimizeBlockSize(moptions_.arena_block_size)),
mem_tracker_(write_buffer_manager),
arena_(moptions_.arena_block_size,
(write_buffer_manager != nullptr &&
Expand Down
111 changes: 23 additions & 88 deletions memory/arena.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "memory/arena.h"
#ifndef OS_WIN
#include <sys/mman.h>
#endif

#include <algorithm>

#include "logging/logging.h"
Expand All @@ -22,16 +20,7 @@

namespace ROCKSDB_NAMESPACE {

// MSVC complains that it is already defined since it is static in the header.
#ifndef _MSC_VER
const size_t Arena::kInlineSize;
#endif

const size_t Arena::kMinBlockSize = 4096;
const size_t Arena::kMaxBlockSize = 2u << 30;
static const int kAlignUnit = alignof(max_align_t);

size_t OptimizeBlockSize(size_t block_size) {
size_t Arena::OptimizeBlockSize(size_t block_size) {
// Make sure block_size is in optimal range
block_size = std::max(Arena::kMinBlockSize, block_size);
block_size = std::min(Arena::kMaxBlockSize, block_size);
Expand All @@ -53,14 +42,12 @@ Arena::Arena(size_t block_size, AllocTracker* tracker, size_t huge_page_size)
blocks_memory_ += alloc_bytes_remaining_;
aligned_alloc_ptr_ = inline_block_;
unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_;
#ifdef MAP_HUGETLB
hugetlb_size_ = huge_page_size;
if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
if (MemMapping::kHugePageSupported) {
hugetlb_size_ = huge_page_size;
if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
}
}
#else
(void)huge_page_size;
#endif
if (tracker_ != nullptr) {
tracker_->Allocate(kInlineSize);
}
Expand All @@ -71,21 +58,6 @@ Arena::~Arena() {
assert(tracker_->is_freed());
tracker_->FreeMem();
}
for (const auto& block : blocks_) {
delete[] block;
}

#ifdef MAP_HUGETLB
for (const auto& mmap_info : huge_blocks_) {
if (mmap_info.addr_ == nullptr) {
continue;
}
auto ret = munmap(mmap_info.addr_, mmap_info.length_);
if (ret != 0) {
// TODO(sdong): Better handling
}
}
#endif
}

char* Arena::AllocateFallback(size_t bytes, bool aligned) {
Expand All @@ -99,12 +71,10 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) {
// We waste the remaining space in the current block.
size_t size = 0;
char* block_head = nullptr;
#ifdef MAP_HUGETLB
if (hugetlb_size_) {
if (MemMapping::kHugePageSupported && hugetlb_size_ > 0) {
size = hugetlb_size_;
block_head = AllocateFromHugePage(size);
}
#endif
if (!block_head) {
size = kBlockSize;
block_head = AllocateNewBlock(size);
Expand All @@ -123,45 +93,22 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) {
}

char* Arena::AllocateFromHugePage(size_t bytes) {
#ifdef MAP_HUGETLB
if (hugetlb_size_ == 0) {
return nullptr;
}
// Reserve space in `huge_blocks_` before calling `mmap`.
// Use `emplace_back()` instead of `reserve()` to let std::vector manage its
// own memory and do fewer reallocations.
//
// - If `emplace_back` throws, no memory leaks because we haven't called
// `mmap` yet.
// - If `mmap` throws, no memory leaks because the vector will be cleaned up
// via RAII.
huge_blocks_.emplace_back(nullptr /* addr */, 0 /* length */);

void* addr = mmap(nullptr, bytes, (PROT_READ | PROT_WRITE),
(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), -1, 0);

if (addr == MAP_FAILED) {
return nullptr;
}
huge_blocks_.back() = MmapInfo(addr, bytes);
blocks_memory_ += bytes;
if (tracker_ != nullptr) {
tracker_->Allocate(bytes);
MemMapping mm = MemMapping::AllocateHuge(bytes);
auto addr = static_cast<char*>(mm.Get());
if (addr) {
huge_blocks_.push_back(std::move(mm));
blocks_memory_ += bytes;
if (tracker_ != nullptr) {
tracker_->Allocate(bytes);
}
}
return reinterpret_cast<char*>(addr);
#else
(void)bytes;
return nullptr;
#endif
return addr;
}

char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
Logger* logger) {
assert((kAlignUnit & (kAlignUnit - 1)) ==
0); // Pointer size should be a power of 2

#ifdef MAP_HUGETLB
if (huge_page_size > 0 && bytes > 0) {
if (MemMapping::kHugePageSupported && hugetlb_size_ > 0 &&
huge_page_size > 0 && bytes > 0) {
// Allocate from a huge page TLB table.
size_t reserved_size =
((bytes - 1U) / huge_page_size + 1U) * huge_page_size;
Expand All @@ -177,10 +124,6 @@ char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
return addr;
}
}
#else
(void)huge_page_size;
(void)logger;
#endif

size_t current_mod =
reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
Expand All @@ -200,17 +143,10 @@ char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
}

char* Arena::AllocateNewBlock(size_t block_bytes) {
// Reserve space in `blocks_` before allocating memory via new.
// Use `emplace_back()` instead of `reserve()` to let std::vector manage its
// own memory and do fewer reallocations.
//
// - If `emplace_back` throws, no memory leaks because we haven't called `new`
// yet.
// - If `new` throws, no memory leaks because the vector will be cleaned up
// via RAII.
blocks_.emplace_back(nullptr);

char* block = new char[block_bytes];
auto uniq = std::make_unique<char[]>(block_bytes);
char* block = uniq.get();
blocks_.push_back(std::move(uniq));

size_t allocated_size;
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
allocated_size = malloc_usable_size(block);
Expand All @@ -227,7 +163,6 @@ char* Arena::AllocateNewBlock(size_t block_bytes) {
if (tracker_ != nullptr) {
tracker_->Allocate(allocated_size);
}
blocks_.back() = block;
return block;
}

Expand Down
56 changes: 25 additions & 31 deletions memory/arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,13 @@
// size, it uses malloc to directly get the requested size.

#pragma once
#ifndef OS_WIN
#include <sys/mman.h>
#endif
#include <assert.h>
#include <stdint.h>
#include <cerrno>

#include <cstddef>
#include <vector>
#include <deque>

#include "memory/allocator.h"
#include "util/mutexlock.h"
#include "port/mmap.h"
#include "rocksdb/env.h"

namespace ROCKSDB_NAMESPACE {

Expand All @@ -31,9 +28,13 @@ class Arena : public Allocator {
Arena(const Arena&) = delete;
void operator=(const Arena&) = delete;

static const size_t kInlineSize = 2048;
static const size_t kMinBlockSize;
static const size_t kMaxBlockSize;
static constexpr size_t kInlineSize = 2048;
static constexpr size_t kMinBlockSize = 4096;
static constexpr size_t kMaxBlockSize = 2u << 30;

static constexpr unsigned kAlignUnit = alignof(std::max_align_t);
static_assert((kAlignUnit & (kAlignUnit - 1)) == 0,
"Pointer size should be power of 2");

// huge_page_size: if 0, don't use huge page TLB. If > 0 (should set to the
// supported hugepage size of the system), block allocation will try huge
Expand Down Expand Up @@ -63,7 +64,7 @@ class Arena : public Allocator {
// by the arena (exclude the space allocated but not yet used for future
// allocations).
size_t ApproximateMemoryUsage() const {
return blocks_memory_ + blocks_.capacity() * sizeof(char*) -
return blocks_memory_ + blocks_.size() * sizeof(char*) -
alloc_bytes_remaining_;
}

Expand All @@ -81,21 +82,19 @@ class Arena : public Allocator {
return blocks_.empty() && huge_blocks_.empty();
}

// check and adjust the block_size so that the return value is
// 1. in the range of [kMinBlockSize, kMaxBlockSize].
// 2. the multiple of align unit.
static size_t OptimizeBlockSize(size_t block_size);

private:
char inline_block_[kInlineSize] __attribute__((__aligned__(alignof(max_align_t))));
alignas(std::max_align_t) char inline_block_[kInlineSize];
// Number of bytes allocated in one block
const size_t kBlockSize;
// Array of new[] allocated memory blocks
using Blocks = std::vector<char*>;
Blocks blocks_;

struct MmapInfo {
void* addr_;
size_t length_;

MmapInfo(void* addr, size_t length) : addr_(addr), length_(length) {}
};
std::vector<MmapInfo> huge_blocks_;
// Allocated memory blocks
std::deque<std::unique_ptr<char[]>> blocks_;
// Huge page allocations
std::deque<MemMapping> huge_blocks_;
size_t irregular_block_num = 0;

// Stats for current active block.
Expand All @@ -108,15 +107,15 @@ class Arena : public Allocator {
// How many bytes left in currently active block?
size_t alloc_bytes_remaining_ = 0;

#ifdef MAP_HUGETLB
size_t hugetlb_size_ = 0;
#endif // MAP_HUGETLB

char* AllocateFromHugePage(size_t bytes);
char* AllocateFallback(size_t bytes, bool aligned);
char* AllocateNewBlock(size_t block_bytes);

// Bytes of memory in blocks allocated so far
size_t blocks_memory_ = 0;
// Non-owned
AllocTracker* tracker_;
};

Expand All @@ -133,9 +132,4 @@ inline char* Arena::Allocate(size_t bytes) {
return AllocateFallback(bytes, false /* unaligned */);
}

// check and adjust the block_size so that the return value is
// 1. in the range of [kMinBlockSize, kMaxBlockSize].
// 2. the multiple of align unit.
extern size_t OptimizeBlockSize(size_t block_size);

} // namespace ROCKSDB_NAMESPACE
Loading

0 comments on commit 780711c

Please sign in to comment.