pytorch · kimishpatel · Dec 7, 2025 · Dec 5, 2025
@@ -0,0 +1,49 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Please keep this file formatted by running:
+# ~~~
+# cmake-format -i CMakeLists.txt
+# ~~~
+
+cmake_minimum_required(VERSION 3.19)
+
+# Source root directory for executorch.
+if(NOT EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+endif()
+
+list(TRANSFORM _extension_memory_allocator__srcs PREPEND "${EXECUTORCH_ROOT}/")
+if(CMAKE_TOOLCHAIN_IOS
+   OR CMAKE_TOOLCHAIN_ANDROID
+   OR APPLE
+)
+  # Building a share library on iOS requires code signing On Android we see
+  # duplicated registration when using shared lib
+  add_library(
+    extension_memory_allocator STATIC ${_extension_memory_allocator__srcs}
+  )
+else()
+  add_library(extension_memory_allocator ${_extension_memory_allocator__srcs})
+endif()
+target_link_libraries(extension_memory_allocator PRIVATE executorch_core)
+target_include_directories(
+  extension_memory_allocator PUBLIC ${_common_include_directories}
+)
+target_compile_options(
+  extension_memory_allocator
+  PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/wd4996>
+         $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations -fPIC>
+)
+
+# Install libraries
+install(
+  TARGETS extension_memory_allocator
+  EXPORT ExecuTorchTargets
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  INCLUDES
+  DESTINATION ${_common_include_directories}
+)
@@ -0,0 +1,97 @@
+#include <cstdlib>
+
+#include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h>
+#include <executorch/extension/memory_allocator/memory_allocator_utils.h>
+
+namespace executorch::extension {
+
+CPUCachingAllocator::CPUCachingAllocator(uint32_t max_size)
+    : MemoryAllocator(0, nullptr) {
+  max_size_ = max_size;
+  current_size_ = 0;
+}
+
+void* CPUCachingAllocator::allocate(size_t size, size_t alignment) {
+  EXECUTORCH_TRACK_ALLOCATION(prof_id(), size);
+
+  if (!isPowerOf2(alignment)) {
+    ET_LOG(Error, "Alignment %zu is not a power of 2", alignment);
+    return nullptr;
+  }
+  alignment = std::max(alignment, kCachingAllocatorDefaultAlignment);
+  auto adjusted_size_value =
+      executorch::extension::utils::get_aligned_size(size, alignment);
+  if (!adjusted_size_value.ok()) {
+    return nullptr;
+  }
+  size = adjusted_size_value.get();
+
+  std::lock_guard<std::mutex> guard(mutex_);
+  const auto& it = available_map_.find(size);
+  // Two choices here.
+  // 1. Return cached memory
+  // 2. Allocate new memory
+  // 2 can lead to current_size > max_size_
+  if (it == available_map_.end() || it->second.empty()) {
+    void* ptr = std::malloc(size);
+    if (ptr == nullptr) {
+      ET_LOG(Error, "Failed to allocate memory");
+      return nullptr;
+    }
+    current_size_ += size;
+    allocation_map_[ptr] = size;
+    return alignPointer(ptr, alignment);
+  }
+  void* ptr = it->second.back();
+  it->second.pop_back();
+  allocation_map_[ptr] = size;
+  return alignPointer(ptr, alignment);
+}
+
+void CPUCachingAllocator::free_everything() {
+  // We dont lock mutex_ here because it will cause deadlock otherwise
+  // we could use recursive_mutex but we just design this differently since
+  // free_cache is not a public API anyways
+  for (const auto& it : available_map_) {
+    for (const auto ptr : it.second) {
+      std::free(ptr);
+    }
+  }
+  available_map_.clear();
+  for (const auto& it : allocation_map_) {
+    void* ptr = it.first;
+    std::free(ptr);
+  }
+  allocation_map_.clear();
+  // Note that purely by the design, clearing available map does not
+  // mean that our current allocated size is zero.
+  current_size_ = 0;
+}
+
+void CPUCachingAllocator::reset() {
+  std::lock_guard<std::mutex> guard(mutex_);
+  // We make the default allocations, via allcate to be either
+  // a. gotten via cached memory OR
+  // b. allocated via malloced and not yet cached
+  // So if current_size_ (allocated) is larger than the max_size_
+  // for now we simply deallocate everything.
+  if (current_size_ > max_size_) {
+    free_everything();
+  } else {
+    for (auto& it : allocation_map_) {
+      void* ptr = it.first;
+      size_t alloc_size = it.second;
+      // Cache the memory
+      available_map_[alloc_size].push_back(ptr);
+    }
+    allocation_map_.clear();
+  }
+}
+
+CPUCachingAllocator::~CPUCachingAllocator() {
+  // destructor must be called in thread safe manner
+  reset();
+  free_everything();
+}
+
+} // namespace executorch::extension
@@ -0,0 +1,90 @@
+#pragma once
+
+#include <cstddef>
+#include <mutex>
+
+#include <executorch/runtime/core/memory_allocator.h>
+
+#ifdef USE_C10_SMALL_VECTOR
+#include <c10/util/SmallVector.h>
+#else
+#include <vector>
+#endif
+
+#ifdef USE_C10_FLAT_HASH_MAP
+#include <c10/util/flat_hash_map.h>
+#else
+#include <unordered_map>
+#endif
+
+/*
+ * CPUCachingAllocator:
+ * This file is copied over from c10/mobile/CPUCachingAllocator.h
+ * It is a thread safe caching allocator.
+ */
+
+namespace executorch::extension {
+
+#ifdef USE_C10_SMALL_VECTOR
+template <typename T, unsigned N>
+using SmallVector = c10::SmallVector<T, N>;
+#else
+template <typename T, unsigned N>
+using SmallVector = std::vector<T>;
+#endif
+
+#ifdef USE_C10_FLAT_HASH_MAP
+template <typename KeyType, typename ValueType>
+using FlatHashMap = ska::flat_hash_map<KeyType, ValueType>;
+#else
+template <typename KeyType, typename ValueType>
+using FlatHashMap = std::unordered_map<KeyType, ValueType>;
+#endif
+
+constexpr size_t kCachingAllocatorDefaultAlignment = 64;
+class CPUCachingAllocator : public executorch::runtime::MemoryAllocator {
+  /*
+   * What it does:
+   * Caches all the allocations carried out by this allocator.
+   * Cache key is the size of the allocation.
+   * If requested size is found in the cache returns the cached pointer.
+   * What it does not do:
+   * No speculative allocation for any future allocations.
+   */
+ private:
+  void free_everything();
+
+ protected:
+  // Invariants.
+  // New invariants must be written.
+  FlatHashMap<size_t, SmallVector<void*, 16>> available_map_;
+  FlatHashMap<void*, size_t> allocation_map_;
+  // Since allocation_map_ and other member variables are mutated/read via
+  // all public APIs, we need a mutex to protect concurrent access to these
+  // instance members.
+  std::mutex mutex_;
+  size_t max_size_;
+  size_t current_size_;
+
+ public:
+  /*
+    max_size: Maximum size of memory to cache. Never cache more than that.
+  */
+  explicit CPUCachingAllocator(uint32_t max_size);
+  // No copies allowed
+  CPUCachingAllocator(const CPUCachingAllocator&) = delete;
+  CPUCachingAllocator& operator=(const CPUCachingAllocator&) = delete;
+  // No moves allowed
+  CPUCachingAllocator(CPUCachingAllocator&&) = delete;
+  CPUCachingAllocator& operator=(CPUCachingAllocator&&) = delete;
+  // Checks the cache to see if allocation of size bytes can be found.
+  // If so return cached memory, else
+  // allocates memory, records it for caching and returns.
+  void* allocate(
+      size_t size,
+      size_t alignment = kCachingAllocatorDefaultAlignment) override;
+  void reset() override;
+  ~CPUCachingAllocator();
+};
+
+} // namespace executorch::extension
@@ -13,6 +13,7 @@
 #include <cstdlib>
 #include <vector>
 
+#include <executorch/extension/memory_allocator/memory_allocator_utils.h>
 #include <executorch/runtime/core/memory_allocator.h>
 
 namespace executorch {
@@ -51,20 +52,12 @@ class MallocMemoryAllocator : public executorch::runtime::MemoryAllocator {
       return nullptr;
     }
 
-    // The minimum alignment that malloc() is guaranteed to provide.
-    static constexpr size_t kMallocAlignment = alignof(std::max_align_t);
-    if (alignment > kMallocAlignment) {
-      // To get higher alignments, allocate extra and then align the returned
-      // pointer. This will waste an extra `alignment - 1` bytes every time, but
-      // this is the only portable way to get aligned memory from the heap.
-      const size_t extra = alignment - 1;
-      if ET_UNLIKELY (extra >= SIZE_MAX - size) {
-        ET_LOG(
-            Error, "Malloc size overflow: size=%zu + extra=%zu", size, extra);
-        return nullptr;
-      }
-      size += extra;
+    auto adjusted_size_value =
+        executorch::extension::utils::get_aligned_size(size, alignment);
+    if (!adjusted_size_value.ok()) {
+      return nullptr;
     }
+    size = adjusted_size_value.get();
     void* mem_ptr = std::malloc(size);
     if (!mem_ptr) {
       ET_LOG(Error, "Malloc failed to allocate %zu bytes", size);

@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+
+#include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/result.h>
+#include <executorch/runtime/platform/compiler.h>
+
+using executorch::runtime::Error;
+using executorch::runtime::Result;
+namespace executorch::extension::utils {
+
+// Util to get alighment adjusted allocation size
+inline Result<size_t> get_aligned_size(size_t size, size_t alignment) {
+  // The minimum alignment that malloc() is guaranteed to provide.
+  static constexpr size_t kMallocAlignment = alignof(std::max_align_t);
+  if (alignment > kMallocAlignment) {
+    // To get higher alignments, allocate extra and then align the returned
+    // pointer. This will waste an extra `alignment - 1` bytes every time, but
+    // this is the only portable way to get aligned memory from the heap.
+    const size_t extra = alignment - 1;
+    if ET_UNLIKELY (extra >= SIZE_MAX - size) {
+      ET_LOG(Error, "Malloc size overflow: size=%zu + extra=%zu", size, extra);
+      return Result<size_t>(Error::InvalidArgument);
+    }
+    size += extra;
+  }
+  return size;
+}
+
+} // namespace executorch::extension::utils
@@ -11,6 +11,25 @@ def define_common_targets():
         name = "malloc_memory_allocator",
         exported_headers = [
             "malloc_memory_allocator.h",
+            "memory_allocator_utils.h",
+        ],
+        exported_deps = [
+            "//executorch/runtime/core:memory_allocator",
+        ],
+        visibility = [
+            "//executorch/extension/memory_allocator/test/...",
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )
+
+    runtime.cxx_library(
+        name = "cpu_caching_allocator",
+        srcs = [
+            "cpu_caching_malloc_allocator.cpp",
+        ],
+        exported_headers = [
+            "cpu_caching_malloc_allocator.h",
+            "memory_allocator_utils.h",
         ],
         exported_deps = [
             "//executorch/runtime/core:memory_allocator",