-
Notifications
You must be signed in to change notification settings - Fork 119
Description
The current pagemap on Windows has several conditionals on the critical path of deallocation. This is due to it checking for initialisation as it goes. This also leads to less optimal codegen for combining the fast and slow paths after checking the pagemap.
We can implement a page map that has no conditionals on the fast path if each level has a default initialisation. I was experimenting with making a type safe multi-level tree that is pre-initialised.
#include <type_traits>
#include <atomic>
#include <cstdlib>
static constexpr size_t block_size = 65536;
template <typename T, size_t total_entries, void* alloc_block()>
class Node
{
public:
static constexpr bool is_leaf = (total_entries * sizeof(T)) <= block_size;
/**
* Calculate the entries that are stored at this level of the tree.
*
* `TT` is used to allow the change of representation from T at the bottom level
* to pointers at higher levels.
* `entries` is used to say how many entries are required at this level.
**/
template <typename TT, size_t entries>
static constexpr size_t calc_entries()
{
if constexpr (entries * sizeof(TT) <= block_size)
{
return entries;
}
else
{
constexpr size_t next = (entries * sizeof(TT)) / block_size;
return calc_entries<void*, next>();
}
}
/// The number of elements at this level of the tree.
static constexpr size_t entries = calc_entries<T, total_entries>();
/// The number of entries each sub entry should contain.
static constexpr size_t sub_entries = total_entries / entries;
using SubT = Node<T, sub_entries, alloc_block>;
/**
* Type of entries in the tree, these are either the leaf entries
* or the pointers to the next level down.
*/
struct Ptr
{
std::conditional_t<is_leaf, T, SubT*> value;
constexpr static std::conditional_t<is_leaf, T, SubT*> init()
{
if constexpr (is_leaf)
{
return T();
}
else
{
return (SubT::original());
}
}
constexpr Ptr() noexcept : value(init()) {}
Ptr(std::conditional_t<is_leaf, T, SubT*> v) noexcept : value(v) {}
};
#if !defined(_MSC_VER) || defined(__clang__)
// The address used for default address for this level in the tree.
inline static Node original_block{};
#endif
constexpr static Node* original()
{
#if defined(_MSC_VER) && !defined(__clang__)
// The address used for default address for this level in the tree.
// MSVC does not support the `inline static` of the self type.
constexpr static Node original_block{};
#endif
return const_cast<Node*>(&original_block);
};
#if !defined(_MSC_VER) || defined(__clang__)
// The address used for the lock at for this level in the tree.
inline static Node lock_block{};
#endif
constexpr static Node* lock()
{
#if defined(_MSC_VER) && !defined(__clang__)
// The address used for the lock at for this level in the tree.
// MSVC does not support the `inline static` of the self type.
constexpr static Node lock_block{};
#endif
return const_cast<Node*>(&lock_block);
};
constexpr Node() noexcept {}
/// Data store for this level of the tree.
std::atomic<Ptr> array[entries];
/// Get element at this index.
T get(size_t index)
{
if constexpr (is_leaf)
{
return array[index].load().value;
}
else
{
return (array[index / sub_entries].load().value)->get(index % sub_entries);
}
}
/// Set element at this index.
void set(size_t index, T v)
{
if constexpr (is_leaf)
{
array[index] = Ptr(v);
}
else
{
auto next = array[index / sub_entries].load().value;
if ((next != SubT::original()) && (next != SubT::lock()))
{
array[index / sub_entries].load().value->set(index % sub_entries, v);
return;
}
set_slow(index, v);
}
}
__attribute__((noinline))
void set_slow(size_t index, T v)
{
auto expected = Ptr(SubT::original());
if (array[index / sub_entries].compare_exchange_strong(expected, Ptr(SubT::lock())))
{
// Allocate new node
void* new_block = alloc_block();
// Initialise the block
auto new_block_typed = new (new_block) SubT();
// Unlock node
array[index / sub_entries].store(Ptr(new_block_typed));
}
else
{
while (array[index / sub_entries].load().value == SubT::lock())
{
//Aal::pause();
}
}
array[index / sub_entries].load().value->set(index % sub_entries, v);
}
};
void* alloc_block()
{
return malloc(block_size);
}
Node<uint8_t, 1ULL << 24, alloc_block> tree {};
extern "C" uint8_t get(size_t c) { return tree.get(c); }
extern "C" void set(size_t c, uint8_t v) { tree.set(c,v); }This generates great ASM (haven't actually tested it yet). I was just looking at integrating this into snmalloc, but there are two challenges.
struct PagemapConfigand its usespage_for_address
I think for the PagemapConfig it should be something like:
static constexpr PagemapConfig config = {
2, false, sizeof(uintptr_t), GRANULARITY_BITS, sizeof(T), BLOCK_SIZE};
Update the version, and add a new field for the block size, though I don't actually know if it is necessary to expose that detail. This new implementation degenerates to the flat map if the block_size is equal to the total_entries * sizeof(T), so by exposing block_size the second field is redundant.
I think page_for_address only every exposes the page containing the T, and we have always made that part of the pagemap if we return it.
@davidchisnall, can you check my understanding before I work on the integration.