Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add Apple Silicon support to Mesh #104

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
# Version 2.0, that can be found in the LICENSE file.

PREFIX = /usr
BAZEL_CONFIG = --config=modern-amd64
# FIXME: put the arch flag back for x86
# FIXME: revert temp flags
BAZEL_CONFIG = --config=disable-meshing --config=debugsymbols
LIB_SUFFIX =

UNAME_S = $(shell uname -s)
Expand Down
17 changes: 11 additions & 6 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,16 @@ http_archive(
],
)

http_archive(
# FIXME: temporary
local_repository(
name = "org_heaplayers",
sha256 = "c8a9f7589e13112515ba1ac8647b4e80462f18a6773f7f5f132a7d7602fe2aec",
strip_prefix = "Heap-Layers-{}".format(commit["heap_layers"]),
urls = [
"https://github.com/emeryberger/Heap-Layers/archive/{}.zip".format(commit["heap_layers"]),
],
path = "../Heap-Layers",
)
# http_archive(
# name = "org_heaplayers",
# sha256 = "c8a9f7589e13112515ba1ac8647b4e80462f18a6773f7f5f132a7d7602fe2aec",
# strip_prefix = "Heap-Layers-{}".format(commit["heap_layers"]),
# urls = [
# "https://github.com/emeryberger/Heap-Layers/archive/{}.zip".format(commit["heap_layers"]),
# ],
# )
4 changes: 3 additions & 1 deletion bazel
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def determine_bazel_filename(version):
machine = normalized_machine_arch_name()
if machine != "x86_64" and machine != 'arm64':
raise Exception(
'Unsupported machine architecture "{}". Bazel currently only supports x86_64.'.format(
'Unsupported machine architecture "{}". Bazel currently only supports x86_64 and arm64.'.format(
machine
)
)
Expand All @@ -342,6 +342,8 @@ def normalized_machine_arch_name():
machine = platform.machine().lower()
if machine == "amd64":
machine = "x86_64"
if machine == "aarch64":
machine = "arm64"
return machine


Expand Down
39 changes: 18 additions & 21 deletions src/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,28 +86,21 @@ class AtomicBitmapBase {
AtomicBitmapBase(size_t bitCount) {
d_assert_msg(bitCount <= maxBits, "max bits (%zu) exceeded: %zu", maxBits, bitCount);

static_assert(wordCount(representationSize(maxBits)) == 4, "unexpected representation size");
// for (size_t i = 0; i < wordCount(representationSize(maxBits)); i++) {
// _bits[i].store(0, std::memory_order_relaxed);
// }
_bits[0].store(0, std::memory_order_relaxed);
_bits[1].store(0, std::memory_order_relaxed);
_bits[2].store(0, std::memory_order_relaxed);
_bits[3].store(0, std::memory_order_relaxed);
// FIXME: this used to be manually unrolled. hopefully clang can unroll it for us?
for (size_t i = 0; i < wordCount(representationSize(maxBits)); i++) {
_bits[i].store(0, std::memory_order_relaxed);
}
std::atomic_thread_fence(std::memory_order_release);
}

~AtomicBitmapBase() {
}

inline void ATTRIBUTE_ALWAYS_INLINE setAndExchangeAll(size_t *oldBits, const size_t *newBits) {
// for (size_t i = 0; i < wordCount(representationSize(maxBits)); i++) {
// oldBits[i] = _bits[i].exchange(newBits[i]);
// }
oldBits[0] = _bits[0].exchange(newBits[0], std::memory_order_acq_rel);
oldBits[1] = _bits[1].exchange(newBits[1], std::memory_order_acq_rel);
oldBits[2] = _bits[2].exchange(newBits[2], std::memory_order_acq_rel);
oldBits[3] = _bits[3].exchange(newBits[3], std::memory_order_acq_rel);
// FIXME: this used to be manually unrolled. hopefully clang can unroll it for us?
for (size_t i = 0; i < wordCount(representationSize(maxBits)); i++) {
oldBits[i] = _bits[i].exchange(newBits[i], std::memory_order_acq_rel);
}
}

public:
Expand Down Expand Up @@ -140,8 +133,12 @@ class AtomicBitmapBase {
}

inline uint32_t ATTRIBUTE_ALWAYS_INLINE inUseCount() const {
return __builtin_popcountl(_bits[0]) + __builtin_popcountl(_bits[1]) + __builtin_popcountl(_bits[2]) +
__builtin_popcountl(_bits[3]);
// FIXME: this used to be manually unrolled. hopefully clang can unroll it for us?
uint32_t sum = 0;
for (size_t i = 0; i < wordCount(representationSize(maxBits)); i++) {
sum += __builtin_popcountl(_bits[i]);
}
return sum;
}

protected:
Expand Down Expand Up @@ -577,12 +574,12 @@ class BitmapBase : public Super {
} // namespace bitmap

namespace internal {
typedef bitmap::BitmapBase<bitmap::AtomicBitmapBase<256>> Bitmap;
typedef bitmap::BitmapBase<bitmap::RelaxedFixedBitmapBase<256>> RelaxedFixedBitmap;
typedef bitmap::BitmapBase<bitmap::AtomicBitmapBase<kMaxShuffleVectorLength>> Bitmap;
typedef bitmap::BitmapBase<bitmap::RelaxedFixedBitmapBase<kMaxShuffleVectorLength>> RelaxedFixedBitmap;
typedef bitmap::BitmapBase<bitmap::RelaxedBitmapBase> RelaxedBitmap;

static_assert(sizeof(Bitmap) == sizeof(size_t) * 4, "Bitmap unexpected size");
static_assert(sizeof(RelaxedFixedBitmap) == sizeof(size_t) * 4, "Bitmap unexpected size");
static_assert(sizeof(Bitmap) <= sizeof(size_t) * 16, "Bitmap unexpected size");
static_assert(sizeof(RelaxedFixedBitmap) <= sizeof(size_t) * 16, "Bitmap unexpected size");
static_assert(sizeof(RelaxedBitmap) == sizeof(size_t) * 2, "Bitmap unexpected size");
} // namespace internal
} // namespace mesh
Expand Down
39 changes: 31 additions & 8 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@
#define MAP_NORESERVE 0
#endif

#if __APPLE__
#include <TargetConditionals.h>
#if TARGET_CPU_ARM64
#define MESH_APPLE_SILICON
#endif
#endif

namespace mesh {

static constexpr bool kMeshingEnabled = MESHING_ENABLED == 1;
Expand All @@ -69,17 +76,28 @@ static constexpr int kMapShared = 1;
static constexpr int kMapShared = kMeshingEnabled ? MAP_SHARED : MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
#endif

// we have to define this here for use in meshable_arena's CheapHeap we allocate
// MiniHeaps out of. We validate (and fail compilation) if this gets out of date
// with a static_assert at the bottom of mini_heap.h
static constexpr size_t kMiniHeapSize = 64;

static constexpr size_t kMinObjectSize = 16;
static constexpr size_t kMaxSize = 16384;
static constexpr size_t kClassSizesMax = 25;
static constexpr size_t kAlignment = 8;
static constexpr int kMinAlign = 16;

#ifdef MESH_APPLE_SILICON

static constexpr uint64_t kPageSize = 16384;
// we have to define this here for use in meshable_arena's CheapHeap we allocate
// MiniHeaps out of. We validate (and fail compilation) if this gets out of date
// with a static_assert at the bottom of mini_heap.h
static constexpr size_t kMiniHeapSize = 160;

#else

static constexpr uint64_t kPageSize = 4096;
// see comment in other branch
static constexpr size_t kMiniHeapSize = 64;

#endif

static constexpr size_t kMaxFastLargeSize = 256 * 1024; // 256Kb

static constexpr size_t kMaxSplitListSize = 16384;
Expand Down Expand Up @@ -113,10 +131,15 @@ static constexpr size_t kMinArenaExpansion = 4096; // 16 MB in pages
// ensures we amortize the cost of going to the global heap enough
static constexpr uint64_t kMinStringLen = 8;
static constexpr size_t kMiniheapRefillGoalSize = 4 * 1024;
// this must be kept below 2^6 because it's used as the max value in a bitfield; see sv::Entry
static constexpr size_t kMaxMiniheapsPerShuffleVector = 24;

// shuffle vector features
static constexpr int16_t kMaxShuffleVectorLength = 256; // sizeof(uint8_t) << 8
#ifdef MESH_APPLE_SILICON
static constexpr int16_t kMaxShuffleVectorLength = 1024;
#else
static constexpr int16_t kMaxShuffleVectorLength = 256;
#endif
static constexpr bool kEnableShuffleOnInit = SHUFFLE_ON_INIT == 1;
static constexpr bool kEnableShuffleOnFree = SHUFFLE_ON_FREE == 1;

Expand All @@ -127,7 +150,7 @@ static constexpr std::chrono::milliseconds kZeroMs{0};
static constexpr std::chrono::milliseconds kMeshPeriodMs{100}; // 100 ms

// controls aspects of miniheaps
static constexpr size_t kMaxMeshes = 256; // 1 per bit
static constexpr size_t kMaxMeshes = kMaxShuffleVectorLength; // 1 per bit
#ifdef __APPLE__
static constexpr size_t kArenaSize = 32ULL * 1024ULL * 1024ULL * 1024ULL; // 16 GB
#else
Expand Down Expand Up @@ -182,7 +205,7 @@ using std::unique_lock;
#define ATTRIBUTE_ALIGNED(s) __attribute__((aligned(s)))
#define ATTRIBUTE_MALLOC __attribute__((malloc))
#define ATTRIBUTE_ALLOC_SIZE(x) __attribute__((alloc_size(x)))
#define ATTRIBUTE_ALLOC_SIZE2(x,y) __attribute__((alloc_size(x, y)))
#define ATTRIBUTE_ALLOC_SIZE2(x, y) __attribute__((alloc_size(x, y)))
#define CACHELINE_SIZE 64
#define CACHELINE_ALIGNED ATTRIBUTE_ALIGNED(CACHELINE_SIZE)
#define CACHELINE_ALIGNED_FN CACHELINE_ALIGNED
Expand Down
2 changes: 1 addition & 1 deletion src/global_heap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ shiftedSplitting(MWC &prng, MiniHeapListEntry *miniheaps, SplitArray &left, Spli
return;
}

constexpr size_t nBytes = 32;
constexpr size_t nBytes = kMaxShuffleVectorLength / 8;
const size_t limit = rightSize < t ? rightSize : t;
d_assert(nBytes == left[0]->bitmap().byteCount());

Expand Down
1 change: 1 addition & 0 deletions src/mac_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ MESH_EXPORT void replace_malloc_destroy_zone(malloc_zone_t *) {
MESH_EXPORT kern_return_t replace_malloc_get_all_zones(task_t, memory_reader_t, vm_address_t **addresses,
unsigned *count) {
*addresses = 0;
// FIXME: this produces a warning by clang, so maybe this should have a *?
count = 0;
return KERN_SUCCESS;
}
Expand Down
35 changes: 21 additions & 14 deletions src/mini_heap.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,16 @@ class Flags {
static inline constexpr uint32_t ATTRIBUTE_ALWAYS_INLINE getSingleBitMask(uint32_t pos) {
return 1UL << pos;
}
// FIXME: these need to be updated so offset has enough space (10 bits)
static constexpr uint32_t SizeClassShift = 0;
static constexpr uint32_t FreelistIdShift = 6;
// max value is (16K / 16 - 1) - 1 = 1022
// so needs 10 bits
static constexpr uint32_t ShuffleVectorOffsetShift = 8;
static constexpr uint32_t MaxCountShift = 16;
// max value is 16K / 16 = 1024 = 2^10 (for 16K pages)
// so needs 11 bits (10 for 0-1023 + 1)
// we give it one more in case we need more later
static constexpr uint32_t MaxCountShift = 18;
static constexpr uint32_t MeshedOffset = 30;

inline void ATTRIBUTE_ALWAYS_INLINE setMasked(uint32_t mask, uint32_t newVal) {
Expand All @@ -53,10 +59,10 @@ class Flags {
(freelistId << FreelistIdShift)} {
d_assert((freelistId & 0x3) == freelistId);
d_assert((sizeClass & ((1 << FreelistIdShift) - 1)) == sizeClass);
d_assert(svOffset < 255);
d_assert(svOffset < (kPageSize / kMinObjectSize - 1));
d_assert_msg(sizeClass < 255, "sizeClass: %u", sizeClass);
d_assert(maxCount <= 256);
d_assert(this->maxCount() == maxCount);
d_assert(maxCount <= (kPageSize / kMinObjectSize));
d_assert_msg(this->maxCount() == maxCount, "maxCount() (%u) != maxCount (%u)", this->maxCount(), maxCount);
}

inline uint32_t freelistId() const {
Expand All @@ -73,7 +79,7 @@ class Flags {

inline uint32_t maxCount() const {
// XXX: does this assume little endian?
return (_flags.load(std::memory_order_seq_cst) >> MaxCountShift) & 0x1ff;
return (_flags.load(std::memory_order_seq_cst) >> MaxCountShift) & 0x7ff;
}

inline uint32_t sizeClass() const {
Expand Down Expand Up @@ -512,20 +518,21 @@ class MiniHeap {
return spanptr;
}

internal::Bitmap _bitmap; // 32 bytes 32
const Span _span; // 8 40
MiniHeapListEntry _freelist{}; // 8 48
atomic<pid_t> _current{0}; // 4 52
Flags _flags; // 4 56
const float _objectSizeReciprocal; // 4 60
MiniHeapID _nextMeshed{}; // 4 64
// The comments are for the max size, since it is architecture-dependent.
internal::Bitmap _bitmap; // 128 128 bytes
const Span _span; // 8 136
MiniHeapListEntry _freelist{}; // 8 144
atomic<pid_t> _current{0}; // 4 148
Flags _flags; // 4 152
const float _objectSizeReciprocal; // 4 156
MiniHeapID _nextMeshed{}; // 4 160
};

typedef FixedArray<MiniHeap, 63> MiniHeapArray;

static_assert(sizeof(pid_t) == 4, "pid_t not 32-bits!");
static_assert(sizeof(mesh::internal::Bitmap) == 32, "Bitmap too big!");
static_assert(sizeof(MiniHeap) == 64, "MiniHeap too big!");
static_assert(sizeof(mesh::internal::Bitmap) == kMaxShuffleVectorLength / 8, "Bitmap too big!");
static_assert(sizeof(MiniHeap) <= 160, "MiniHeap too big!");
static_assert(sizeof(MiniHeap) == kMiniHeapSize, "MiniHeap size mismatch");
static_assert(sizeof(MiniHeapArray) == 64 * sizeof(void *), "MiniHeapArray too big!");
} // namespace mesh
Expand Down
2 changes: 1 addition & 1 deletion src/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ void Runtime::segfaultHandler(int sig, siginfo_t *siginfo, void *context) {

// okToProceed is a barrier that ensures any in-progress meshing has
// completed, and the reason for the fault was 'just' a meshing
if (siginfo->si_code == SEGV_ACCERR && runtime().heap().okToProceed(siginfo->si_addr)) {
if ((siginfo->si_code == SEGV_ACCERR || siginfo->si_code == SEGV_MAPERR) && runtime().heap().okToProceed(siginfo->si_addr)) {
// debug("TODO: trapped access violation from meshing, log stat\n");
return;
}
Expand Down
11 changes: 6 additions & 5 deletions src/shuffle_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Entry {
Entry() noexcept : _mhOffset{0}, _bitOffset{0} {
}

explicit Entry(uint8_t mhOff, uint8_t bitOff) : _mhOffset{mhOff}, _bitOffset{bitOff} {
explicit Entry(uint16_t mhOff, uint16_t bitOff) : _mhOffset{mhOff}, _bitOffset{bitOff} {
}

Entry(const Entry &rhs) = default;
Expand All @@ -40,17 +40,18 @@ class Entry {
return _mhOffset == rhs._mhOffset && _bitOffset == rhs._bitOffset;
}

inline uint8_t ATTRIBUTE_ALWAYS_INLINE miniheapOffset() const {
// FIXME: should this cast to uint8_t?
inline uint16_t ATTRIBUTE_ALWAYS_INLINE miniheapOffset() const {
return _mhOffset;
}

inline uint8_t ATTRIBUTE_ALWAYS_INLINE bit() const {
inline uint16_t ATTRIBUTE_ALWAYS_INLINE bit() const {
return _bitOffset;
}

private:
uint8_t _mhOffset;
uint8_t _bitOffset;
uint16_t _mhOffset : 6;
uint16_t _bitOffset : 10;
};
static_assert(sizeof(Entry) == 2, "Entry too big!");
} // namespace sv
Expand Down
5 changes: 5 additions & 0 deletions src/testing/unit/concurrent_mesh_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "gtest/gtest.h"

#include "common.h"
#include "internal.h"
#include "meshing.h"
#include "runtime.h"
Expand All @@ -20,7 +21,11 @@ using namespace std;
using namespace mesh;

static constexpr uint32_t StrLen = 128;
#ifdef MESH_APPLE_SILICON
static constexpr uint32_t ObjCount = 128;
#else
static constexpr uint32_t ObjCount = 32;
#endif

static char *s1;
static char *s2;
Expand Down
3 changes: 2 additions & 1 deletion src/thread_local_heap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ void ThreadLocalHeap::InitTLH() {
ThreadLocalHeap *ThreadLocalHeap::NewHeap(pthread_t current) {
// we just allocate out of our internal heap
void *buf = mesh::internal::Heap().malloc(sizeof(ThreadLocalHeap));
static_assert(sizeof(ThreadLocalHeap) < 4096 * 8, "tlh should have a reasonable size");
// FIXME: is this size too big?
static_assert(sizeof(ThreadLocalHeap) < 4096 * 16, "tlh should have a reasonable size");
hard_assert(buf != nullptr);
hard_assert(reinterpret_cast<uintptr_t>(buf) % CACHELINE_SIZE == 0);

Expand Down