Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions extension/memory_allocator/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Please this file formatted by running:
# ~~~
# cmake-format -i CMakeLists.txt
# ~~~

cmake_minimum_required(VERSION 3.19)

# Source root directory for executorch.
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

list(TRANSFORM _extension_module__srcs PREPEND "${EXECUTORCH_ROOT}/")
if(CMAKE_TOOLCHAIN_IOS
OR CMAKE_TOOLCHAIN_ANDROID
OR APPLE
)
# Building a share library on iOS requires code signing On Android we see
# duplicated registration when using shared lib
add_library(extension_memory_allocator STATIC ${_extension_memory_allocator__srcs})
else()
add_library(extension_memory_allocator ${_extension_memory_allocator__srcs})
endif()
target_link_libraries(
extension_memory_allocator PRIVATE executorch_core)
target_include_directories(
extension_memory_allocator PUBLIC ${_common_include_directories}
)
target_compile_options(
extension_memory_allocator
PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/wd4996>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations -fPIC>
)

# Install libraries
install(
TARGETS extension_memory_allocator
EXPORT ExecuTorchTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}
INCLUDES
DESTINATION ${_common_include_directories}
)
89 changes: 89 additions & 0 deletions extension/memory_allocator/cpu_caching_malloc_allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include <cstdlib>

#include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h>

namespace executorch::extension {

namespace {
size_t get_alignment_adjusted_size(size_t size, size_t alignment) {
alignment = std::max(alignment, kDefaultAlignment);
if (size % alignment != 0) {
// Adjust size to the next multiple of alignment
// This is needed for aligned_alloc to work
return (size + alignment) & ~(alignment - 1);
} else {
return size;
}
}
} // namespace

CPUCachingAllocator::CPUCachingAllocator(uint32_t max_size)
: MemoryAllocator(0, nullptr) {
max_size_ = max_size;
current_size_ = 0;
}

void* CPUCachingAllocator::allocate(size_t size, size_t alignment) {
EXECUTORCH_TRACK_ALLOCATION(prof_id(), size);

if (!isPowerOf2(alignment)) {
ET_LOG(Error, "Alignment %zu is not a power of 2", alignment);
return nullptr;
}
size = get_alignment_adjusted_size(size, alignment);

std::lock_guard<std::mutex> guard(mutex_);
const auto& it = available_map_.find(size);
if (it == available_map_.end() || it->second.empty()) {
if (current_size_ + size > max_size_) {
// Freeing while holding the lock will cause performance issues
// we probably should log how often this happens so as to allow
// for calling site to adjust the max_size_ parameter
free_cached();
}
void* ptr = std::aligned_alloc(alignment, size);
current_size_ += size;
if (ptr == nullptr) {
ET_LOG(Error, "Failed to allocate memory");
return nullptr;
}
allocation_map_[ptr] = size;
return ptr;
}
void* ptr = it->second.back();
it->second.pop_back();
allocation_map_[ptr] = size;
return ptr;
}

void CPUCachingAllocator::free_cached() {
// We dont lock mutex_ here because it will cause deadlock otherwise
// we could use recursive_mutex but we just design this differently since
// free_cache is not a public API anyways
for (const auto& it : available_map_) {
for (const auto ptr : it.second) {
std::free(ptr);
}
}
available_map_.clear();
}

void CPUCachingAllocator::reset() {
std::lock_guard<std::mutex> guard(mutex_);
for (auto& it : allocation_map_) {
void* ptr = it.first;
size_t alloc_size = it.second;
// Cache the memory
available_map_[alloc_size].push_back(ptr);
current_size_ -= alloc_size;
}
allocation_map_.clear();
}

CPUCachingAllocator::~CPUCachingAllocator() {
// destructor must be called in thread safe manner
reset();
free_cached();
}

} // namespace executorch::extension
81 changes: 81 additions & 0 deletions extension/memory_allocator/cpu_caching_malloc_allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#pragma once

#include <cstddef>
#include <mutex>

#include <executorch/runtime/core/memory_allocator.h>

#ifdef USE_C10_SMALL_VECTOR
#include <c10/util/SmallVector.h>
#else
#include <vector>
#endif

#ifdef USE_C10_FLAT_HASH_MAP
#include <c10/util/flat_hash_map.h>
#else
#include <unordered_map>
#endif

/*
* CPUCachingAllocator:
* This file is copied over from c10/mobile/CPUCachingAllocator.h
* It is a thread safe caching allocator.
*/

namespace executorch::extension {

#ifdef USE_C10_SMALL_VECTOR
template <typename T, unsigned N>
using SmallVector = c10::SmallVector<T, N>;
#else
template <typename T, unsigned N>
using SmallVector = std::vector<T>;
#endif

#ifdef USE_C10_FLAT_HASH_MAP
template <typename KeyType, typename ValueType>
using FlatHashMap = ska::flat_hash_map<KeyType, ValueType>;
#else
template <typename KeyType, typename ValueType>
using FlatHashMap = std::unordered_map<KeyType, ValueType>;
#endif

constexpr size_t kDefaultAlignment = 64;
class CPUCachingAllocator : public executorch::runtime::MemoryAllocator {
/*
* What it does:
* Caches all the allocations carried out by this allocator.
* Cache key is the size of the allocation.
* If requested size is found in the cache returns the cached pointer.
* What it does not do:
* No speculative allocation for any future allocations.
*/
private:
void free_cached();

protected:
// Invariants.
// New invariants must be written.
FlatHashMap<size_t, SmallVector<void*, 16>> available_map_;
FlatHashMap<void*, size_t> allocation_map_;
// Since allocation_map, which is a global instance, is mutated/read via
// all public APIs we need a global mutex.
std::mutex mutex_;
size_t max_size_;
size_t current_size_;

public:
/*
max_size: Maximum size of memory to cache. Never cache more than that.
*/
CPUCachingAllocator(uint32_t max_size);
// Checks the cache to see if allocation of size bytes can be found.
// If so return cached memory, else
// allocates memory, records it for caching and returns.
void* allocate(size_t size, size_t alignment = kDefaultAlignment) override;
void reset() override;
~CPUCachingAllocator();
};

} // namespace executorch::extension
17 changes: 17 additions & 0 deletions extension/memory_allocator/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,20 @@ def define_common_targets():
"@EXECUTORCH_CLIENTS",
],
)

runtime.cxx_library(
name = "cpu_caching_allocator",
srcs = [
"cpu_caching_malloc_allocator.cpp",
],
exported_headers = [
"cpu_caching_malloc_allocator.h",
],
exported_deps = [
"//executorch/runtime/core:memory_allocator",
],
visibility = [
"//executorch/extension/memory_allocator/test/...",
"@EXECUTORCH_CLIENTS",
],
)
Loading
Loading