From d8e6d136a953f183551ac8d9b82ca8d15cdac366 Mon Sep 17 00:00:00 2001
From: Manuel Candales <mcandales@meta.com>
Date: Tue, 21 Oct 2025 21:15:30 -0700
Subject: [PATCH 1/3] Update

[ghstack-poisoned]
---
 backends/apple/metal/metal_backend.py         | 38 ++++++++++++++++---
 .../apple/metal/runtime/metal_backend.cpp     | 26 +++++++++++++
 2 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/backends/apple/metal/metal_backend.py b/backends/apple/metal/metal_backend.py
index 13a3534004b..ffc1e6e6d7d 100644
--- a/backends/apple/metal/metal_backend.py
+++ b/backends/apple/metal/metal_backend.py
@@ -108,8 +108,11 @@ def preprocess(
         options: dict[str, typing.Any] = {
             # Do not link against the full PyTorch/libtorch library
             "aot_inductor.link_libtorch": False,
-            # Package model constants and other generated files directly in the shared object (.so) file
-            "aot_inductor.package_constants_in_so": True,
+            # Separate weight constants from the .so file
+            "aot_inductor.package": True,
+            "aot_inductor.package_constants_in_so": False,
+            # Store weight constants on disk in a binary blob
+            "aot_inductor.package_constants_on_disk_format": "binary_blob",
             # Enable maximum automatic tuning for optimal performance
             "max_autotune": True,
             # "aot_inductor.debug_compile": True,
@@ -117,7 +120,7 @@ def preprocess(
         }
 
         with collect_unsupported_fallback_kernels():
-            so_path = torch._inductor.aot_compile(edge_program_module, tuple(user_input_placeholders), options=options)  # type: ignore[arg-type]
+            paths = torch._inductor.aot_compile(edge_program_module, tuple(user_input_placeholders), options=options)  # type: ignore[arg-type]
             if len(missing_fallback_kernels) > 0:
                 formatted_kernels = "\n  - ".join(sorted(missing_fallback_kernels))
                 raise RuntimeError(
@@ -125,17 +128,42 @@ def preprocess(
                     "Please add them to the AOTI backend."
                 )
 
+        # Extract the .so and .blob paths from the returned list
+        so_path = None
+        blob_path = None
+        for path in paths:
+            if path.endswith(".wrapper.so"):
+                so_path = path
+            elif path.endswith(".wrapper_weights.blob"):
+                blob_path = path
+
+        if so_path is None or blob_path is None:
+            raise RuntimeError(
+                f"Could not find required files in compiled paths, got {paths}"
+            )
+
         # pyre-ignorep[6]: Incompatible parameter type
         with open(so_path, "rb") as f:
             so_data = f.read()
 
         named_data_store = NamedDataStore()
         method_name = MetalBackend.method_name_from_compile_specs(compile_specs)
+
+        # Keep the so file in the NamedDataStore, so that it can be packaged into the .pte file.
+        named_data_store.add_named_data(method_name + "_so_blob", so_data, 1, None)
+
+        # Add weights blob to named data store
+        with open(blob_path, "rb") as f:
+            blob_data = f.read()
+
         named_data_store.add_named_data(
-            method_name + "_so_blob", so_data, 1, "aoti_metal_blob"
+            method_name + "_weights_blob", blob_data, 1, "aoti_metal_blob"
         )
 
-        # Clean up the generated so file; it has been packaged into the NamdeDataStore
+        # Clean up the weights blob file
+        os.remove(blob_path)
+
+        # Clean up the generated so file; it has been packaged into the NamedDataStore
         # pyre-ignorep[6]: Incompatible parameter type
         os.remove(so_path)
 
diff --git a/backends/apple/metal/runtime/metal_backend.cpp b/backends/apple/metal/runtime/metal_backend.cpp
index 97b273d428f..f79a2a67b6f 100644
--- a/backends/apple/metal/runtime/metal_backend.cpp
+++ b/backends/apple/metal/runtime/metal_backend.cpp
@@ -106,6 +106,15 @@ class ET_EXPERIMENTAL MetalBackend final
         Debug,
         "MetalBackend::load_function_pointers_into_handle - Loaded AOTInductorModelContainerRun");
 
+    LOAD_SYMBOL(
+        handle,
+        update_constants_from_blob,
+        AOTInductorModelUpdateConstantsFromBlob,
+        so_handle);
+    ET_LOG(
+        Debug,
+        "MetalBackend::load_function_pointers_into_handle - Loaded AOTInductorModelUpdateConstantsFromBlob");
+
     ET_LOG(
         Debug,
         "MetalBackend::load_function_pointers_into_handle - All symbols loaded successfully");
@@ -203,6 +212,9 @@ class ET_EXPERIMENTAL MetalBackend final
     outfile.close();
     ET_LOG(Info, "MetalBackend::init - File closed successfully");
 
+    // Free the buffer immediately after writing to disk
+    aoti_metal_buffer->Free();
+
     // Load the ELF using dlopen
     void* so_handle = dlopen(so_path.c_str(), RTLD_LAZY | RTLD_LOCAL);
     ET_CHECK_OR_RETURN_ERROR(
@@ -234,6 +246,20 @@ class ET_EXPERIMENTAL MetalBackend final
 
     handle->container_handle = container_handle;
 
+    // Look into named data map for constant data
+    std::string weights_blob_key =
+        method_name.empty() ? "weights_blob" : method_name + "_weights_blob";
+    auto buffer_res = named_data_map->get_data(weights_blob_key.c_str());
+    if (buffer_res.ok() && handle->update_constants_from_blob != nullptr) {
+      ET_LOG(Info, "Found %s in named data map", weights_blob_key.c_str());
+      const void* weights_blob = buffer_res->data();
+      // Feed the weights blob into the container. Under the hood it's copying
+      // weights, so we should free the buffer immediately.
+      ET_CHECK_OK_OR_RETURN_ERROR(handle->update_constants_from_blob(
+          handle->container_handle, static_cast<const uint8_t*>(weights_blob)));
+      buffer_res->Free();
+    }
+
     ET_LOG(Info, "MetalBackend::init - Initialization completed successfully");
     return (DelegateHandle*)handle; // Return the handle post-processing
   }

From 00ba52242cdd965c2b263862be67dd72f32a292a Mon Sep 17 00:00:00 2001
From: Manuel Candales <mcandales@meta.com>
Date: Tue, 21 Oct 2025 21:15:34 -0700
Subject: [PATCH 2/3] Update

[ghstack-poisoned]
---
 backends/apple/metal/runtime/shims/et_metal.h |   1 +
 .../apple/metal/runtime/shims/et_metal.mm     |  15 ++
 .../apple/metal/runtime/shims/et_metal_ops.mm |  16 +-
 backends/apple/metal/runtime/shims/memory.cpp | 230 ++++++++++++------
 backends/apple/metal/runtime/shims/memory.h   |   2 +-
 5 files changed, 181 insertions(+), 83 deletions(-)

diff --git a/backends/apple/metal/runtime/shims/et_metal.h b/backends/apple/metal/runtime/shims/et_metal.h
index 75f79e5139c..0e012d18c8f 100644
--- a/backends/apple/metal/runtime/shims/et_metal.h
+++ b/backends/apple/metal/runtime/shims/et_metal.h
@@ -354,6 +354,7 @@ extern "C" {
 
 // Memory management functions for Metal
 void* metal_allocate_buffer(long bytes);
+void metal_deallocate_buffer(void* ptr);
 bool metal_is_device_pointer(void* ptr);
 int metal_copy_memory(
     void* dst,
diff --git a/backends/apple/metal/runtime/shims/et_metal.mm b/backends/apple/metal/runtime/shims/et_metal.mm
index fdca0a28cf3..cae8f96c0d2 100644
--- a/backends/apple/metal/runtime/shims/et_metal.mm
+++ b/backends/apple/metal/runtime/shims/et_metal.mm
@@ -86,6 +86,21 @@ void dispatch_sync_with_rethrow(dispatch_queue_t queue, void (^block)()) {
     }
 }
 
+void metal_deallocate_buffer(void* ptr) {
+    @autoreleasepool {
+        auto it = ptr_to_mtl_buffer.find(ptr);
+        if (it != ptr_to_mtl_buffer.end()) {
+            id<MTLBuffer> buffer = it->second;
+            [buffer release];
+            ptr_to_mtl_buffer.erase(it);
+            ET_LOG(Debug, "Deallocated Metal buffer for pointer %p", ptr);
+            ptr = nullptr;
+        } else {
+            ET_LOG(Error, "Failed to find Metal buffer for pointer %p", ptr);
+        }
+    }
+}
+
 void metal_cleanup_resources() {
     if (!ptr_to_mtl_buffer.empty()) {
         @autoreleasepool {
diff --git a/backends/apple/metal/runtime/shims/et_metal_ops.mm b/backends/apple/metal/runtime/shims/et_metal_ops.mm
index 0aa90650a1d..94bc3219306 100644
--- a/backends/apple/metal/runtime/shims/et_metal_ops.mm
+++ b/backends/apple/metal/runtime/shims/et_metal_ops.mm
@@ -736,9 +736,12 @@ AOTITorchError aoti_torch_mps_convolution(
         throw std::runtime_error("Tensor size mismatch");
       }
 
-      // Store the tensor handle - mark that we own the memory since we manually allocated it with malloc
+      // Store the tensor handle - mark that we own the memory since we manually allocated it
       *ret0 = output_tensor_handle;
-      is_tensor_own_memory[et_tensor] = true;  // We allocated the GPU memory
+      // Note: memory_to_n_tensor is managed automatically in aoti_torch_create_tensor_from_blob_v2
+      // The function sets it to NOT_OWN, but we need to change it to 1 since we allocated it
+      extern std::unordered_map<void*, int32_t> memory_to_n_tensor;
+      memory_to_n_tensor[tensor_data] = 1;
 
       ET_LOG(Debug, "aoti_torch_mps_convolution: Created output tensor with %zu elements using MPSGraph", actual_numel);
 
@@ -1327,10 +1330,11 @@ AOTITorchError aoti_torch_mps__scaled_dot_product_attention_math_for_mps(
         }
 
         // Mark that we own the memory for these tensors
-        auto* out_et_tensor = reinterpret_cast<Tensor*>(out_tensor_handle);
-        auto* attn_et_tensor = reinterpret_cast<Tensor*>(attn_tensor_handle);
-        is_tensor_own_memory[out_et_tensor] = true;
-        is_tensor_own_memory[attn_et_tensor] = true;
+        // Note: memory_to_n_tensor is managed automatically in aoti_torch_create_tensor_from_blob_v2
+        // The function sets it to NOT_OWN, but we need to change it to 1 since we allocated it
+        extern std::unordered_map<void*, int32_t> memory_to_n_tensor;
+        memory_to_n_tensor[out_contents_ptr] = 1;
+        memory_to_n_tensor[attn_contents_ptr] = 1;
 
         // Set output tensor handles
         *ret0 = out_tensor_handle;
diff --git a/backends/apple/metal/runtime/shims/memory.cpp b/backends/apple/metal/runtime/shims/memory.cpp
index 83250f308bb..50f8d4e1cd0 100644
--- a/backends/apple/metal/runtime/shims/memory.cpp
+++ b/backends/apple/metal/runtime/shims/memory.cpp
@@ -31,7 +31,12 @@ using namespace executorch::backends::aoti;
 
 // Global storage for tensors and their metadata
 std::unordered_set<std::shared_ptr<Tensor>> tensors;
-std::unordered_map<Tensor*, bool> is_tensor_own_memory;
+
+// Reference counting for memory addresses
+// Maps memory address to number of tensors using it
+// Special value: NOT_OWN (-1) means tensor never owns the memory
+constexpr int32_t NOT_OWN = -1;
+std::unordered_map<void*, int32_t> memory_to_n_tensor;
 
 extern "C" {
 
@@ -110,7 +115,18 @@ AOTITorchError aoti_torch_create_tensor_from_blob_v2(
   // Store the tensor so it doesn't get destroyed
   tensors.insert(tensor);
   *ret_new_tensor = tensor.get();
-  is_tensor_own_memory[tensor.get()] = false;
+
+  // Check if this memory address is already being tracked
+  auto memory_it = memory_to_n_tensor.find(adjusted_data);
+  ET_CHECK_OR_RETURN_ERROR(
+      memory_it == memory_to_n_tensor.end(),
+      InvalidArgument,
+      "Memory address %p is already being tracked by another tensor",
+      adjusted_data);
+
+  // Mark this memory as NOT_OWN since tensor created from blob never owns
+  // memory
+  memory_to_n_tensor[adjusted_data] = NOT_OWN;
 
   ET_LOG(Debug, "aoti_torch_create_tensor_from_blob_v2: successfull");
   return Error::Ok;
@@ -192,7 +208,9 @@ AOTITorchError aoti_torch_empty_strided(
   // Store the tensor so it doesn't get destroyed
   tensors.insert(tensor);
   *ret_new_tensor = tensor.get();
-  is_tensor_own_memory[tensor.get()] = true;
+
+  // This tensor owns the memory it allocated, set reference count to 1
+  memory_to_n_tensor[ptr] = 1;
 
   ET_LOG(Debug, "aoti_torch_empty_strided: successfull");
   return Error::Ok;
@@ -200,51 +218,81 @@ AOTITorchError aoti_torch_empty_strided(
 
 AOTITorchError aoti_torch_delete_tensor_object(AOTITensorHandle tensor) {
   ET_LOG(Debug, "aoti_torch_delete_tensor_object: entered");
-  // Find tensor in the set
+
+  // Handle null tensor pointer
+  if (tensor == nullptr) {
+    ET_LOG(Debug, "aoti_torch_delete_tensor_object: null tensor");
+    return Error::Ok;
+  }
+
+  // Check if tensor exists in our tracking
+  bool found_in_tensors = false;
   for (auto it = tensors.begin(); it != tensors.end(); ++it) {
     if (it->get() == tensor) {
-      auto tensor_ptr = *it;
+      found_in_tensors = true;
+      break;
+    }
+  }
 
-      // Check ownership before cleaning up
-      auto ownership_it = is_tensor_own_memory.find(tensor);
-      bool owns_memory = (ownership_it != is_tensor_own_memory.end())
-          ? ownership_it->second
-          : false;
+  // If tensor not found in our tracking, it's invalid
+  ET_CHECK_OR_RETURN_ERROR(
+      found_in_tensors, InvalidArgument, "Didn't find tensor %p", tensor);
 
-      // Clean up ownership metadata
-      is_tensor_own_memory.erase(tensor);
+  // Find and delete the tensor
+  for (auto it = tensors.begin(); it != tensors.end(); ++it) {
+    if (it->get() == tensor) {
+      // Get the tensor before erasing
+      auto tensor_ptr = *it;
+      void* data_ptr = tensor_ptr->mutable_data_ptr();
 
-      if (owns_memory) {
-        // et tensor owns the memory; need to free it manually
-        void* data_ptr = tensor_ptr->mutable_data_ptr();
+      // Find the reference count for this memory address
+      auto memory_it = memory_to_n_tensor.find(data_ptr);
+      if (memory_it != memory_to_n_tensor.end()) {
+        int32_t ref_count = memory_it->second;
 
-        // Check if it's Metal GPU memory
-        if (metal_is_device_pointer(data_ptr)) {
-          // This is Metal GPU memory - the Metal helper will handle cleanup
-          // Metal buffers are automatically managed by ARC when the buffer is
-          // released
+        if (ref_count == NOT_OWN) {
+          // Tensor never owned the memory, skip freeing
+          // Just remove tensor from tracking
           tensors.erase(it);
-          ET_LOG(
-              Debug,
-              "aoti_torch_delete_tensor_object: successfull (Metal GPU memory)");
+          ET_LOG(Debug, "aoti_torch_delete_tensor_object: tensor doesn't own memory, skipping free");
           return Error::Ok;
+        } else if (ref_count == 1) {
+          // Only current tensor using this memory, free it
+          // Check if it's Metal GPU memory
+          if (metal_is_device_pointer(data_ptr)) {
+            metal_deallocate_buffer(data_ptr);
+          } else {
+            // This is CPU memory - free immediately
+            free(data_ptr);
+            data_ptr = nullptr;
+            ET_LOG(Debug, "aoti_torch_delete_tensor_object: freeing CPU memory");
+          }
+
+          // Remove from memory tracking
+          memory_to_n_tensor.erase(memory_it);
+        } else if (ref_count > 1) {
+          // Other tensors still using this memory, just decrement count
+          memory_to_n_tensor[data_ptr] = ref_count - 1;
+          ET_LOG(Debug, "aoti_torch_delete_tensor_object: decremented ref count from %d to %d", ref_count, ref_count - 1);
         }
-
-        // This is CPU memory - free immediately
-        free(data_ptr);
+      } else {
+        ET_CHECK_OR_RETURN_ERROR(
+            false,
+            Internal,
+            "Internal error: memory not found during deletion");
       }
-      // else: Don't free memory since the tensor doesn't own it
 
-      // Remove from set (this will call the destructor if it's the last
+      // Remove tensor from set (this will call the destructor if it's the last
       // reference)
       tensors.erase(it);
-      ET_LOG(
-          Debug, "aoti_torch_delete_tensor_object: successfull (CPU memory)");
+      ET_LOG(Debug, "aoti_torch_delete_tensor_object: successfull");
       return Error::Ok;
     }
   }
-  ET_LOG(Error, "Didn't find tensor %p", tensor);
-  return Error::InvalidArgument;
+
+  // This should never be reached since we found it above
+  ET_CHECK_OR_RETURN_ERROR(
+      false, Internal, "Internal error: tensor not found after validation");
 }
 
 AOTITorchError aoti_torch_copy_(
@@ -375,6 +423,24 @@ AOTITorchError aoti_torch__reinterpret_tensor(
       InvalidArgument,
       "aoti_torch__reinterpret_tensor failed: ret_new_tensor is null");
 
+  // Check if storage_offset is not 0 - return error if not
+  ET_CHECK_OK_OR_RETURN_ERROR(validate_storage_offset(storage_offset));
+
+  // Get the device info from the source tensor to perform device_index
+  // validation
+  int32_t device_type = 0;
+  int32_t device_index = 0;
+  ET_CHECK_OK_OR_RETURN_ERROR(aoti_torch_get_device_type(self, &device_type));
+
+  ET_CHECK_OK_OR_RETURN_ERROR(aoti_torch_get_device_index(self, &device_index));
+
+  // Ensure device_index is always 0
+  ET_CHECK_OR_RETURN_ERROR(
+      device_index == 0,
+      InvalidArgument,
+      "device_index must be 0, got: %d",
+      device_index);
+
   // Get the dtype from the source tensor
   int32_t dtype = 0;
   ET_CHECK_OK_OR_RETURN_ERROR(aoti_torch_get_dtype(self, &dtype));
@@ -382,54 +448,52 @@ AOTITorchError aoti_torch__reinterpret_tensor(
   // Validate dtype using SupportedDTypes
   ET_CHECK_OK_OR_RETURN_ERROR(validate_dtype(dtype));
 
-  int32_t device_type = 0;
-  ET_CHECK_OK_OR_RETURN_ERROR(aoti_torch_get_device_type(self, &device_type));
+  // Get the original data pointer from the source tensor
+  void* data_ptr = self->mutable_data_ptr();
+  ET_CHECK_OR_RETURN_ERROR(
+      data_ptr != nullptr,
+      InvalidArgument,
+      "Source tensor has null data pointer");
 
-  int32_t device_index = 0;
-  ET_CHECK_OK_OR_RETURN_ERROR(aoti_torch_get_device_index(self, &device_index));
+  // Check if the given memory is in the map, if not return error
+  auto memory_it = memory_to_n_tensor.find(data_ptr);
+  ET_CHECK_OR_RETURN_ERROR(
+      memory_it != memory_to_n_tensor.end(),
+      InvalidArgument,
+      "Memory address %p is not being tracked by reference counting system",
+      data_ptr);
+
+  // Convert sizes using utility function from utils.h
+  std::vector<aten::SizesType> sizes = convert_sizes_to_vector(ndim, sizes_ptr);
+
+  // Convert strides using utility function from utils.h
+  std::vector<aten::StridesType> strides =
+      convert_strides_to_vector(ndim, sizes_ptr, strides_ptr);
+
+  // Create new tensor view that reinterprets the same memory with different
+  // shape/strides This creates a view, not a copy - the data pointer is shared
+  std::shared_ptr<Tensor> tensor = executorch::extension::from_blob(
+      data_ptr, // Reuse the same memory from source tensor
+      sizes, // New sizes with explicit SizesType
+      strides, // New strides with explicit StridesType
+      dtype_to_scalar_type(dtype) // Convert dtype with explicit type casting
+  );
 
-  // Get the base data pointer from the source tensor
-  void* base_data_ptr = self->mutable_data_ptr();
   ET_CHECK_OR_RETURN_ERROR(
-      base_data_ptr != nullptr,
+      tensor != nullptr,
       InvalidArgument,
-      "Source tensor has null data pointer");
+      "Failed to create reinterpreted tensor view");
 
-  // Calculate new tensor size in elements for logging
-  int64_t new_numel = 1;
-  for (int64_t i = 0; i < ndim; i++) {
-    new_numel *= sizes_ptr[i];
-  }
+  // Store the tensor so it doesn't get destroyed
+  tensors.insert(tensor);
 
-  ET_LOG(
-      Debug,
-      "aoti_torch__reinterpret_tensor: base_data_ptr=%p, new_numel=%lld, storage_offset=%lld",
-      base_data_ptr,
-      new_numel,
-      storage_offset);
-
-  // Create a new tensor view that shares the same underlying storage
-  // This is the correct way to implement reinterpret_tensor - as a view, not a
-  // copy
-  AOTITorchError create_err = aoti_torch_create_tensor_from_blob_v2(
-      base_data_ptr, // Same underlying data pointer
-      ndim, // New dimensions
-      sizes_ptr, // New sizes
-      strides_ptr, // New strides
-      storage_offset, // Storage offset (will be handled properly now)
-      dtype,
-      device_type,
-      device_index,
-      ret_new_tensor,
-      0, // layout (default)
-      nullptr, // opaque_metadata
-      0 // opaque_metadata_size
-  );
+  *ret_new_tensor = tensor.get();
 
-  if (create_err != Error::Ok) {
-    ET_LOG(Error, "failed to create reinterpreted tensor view");
-    return create_err;
-  }
+  // Increment the reference count for this memory address only if it is owned
+  // by tensor
+  memory_to_n_tensor[data_ptr] = memory_to_n_tensor[data_ptr] == NOT_OWN
+      ? NOT_OWN
+      : memory_to_n_tensor[data_ptr] + 1;
 
   ET_LOG(Debug, "aoti_torch__reinterpret_tensor: successfull");
   return Error::Ok;
@@ -437,13 +501,27 @@ AOTITorchError aoti_torch__reinterpret_tensor(
 
 // Cleanup function for clearing global state
 void cleanup_memory() {
-  is_tensor_own_memory.clear();
-  if (!tensors.empty()) {
-    ET_LOG(Error, "Warning: tensors not empty during cleanup");
+  // Use aoti_torch_delete_tensor_object to properly delete each tensor
+  // Note: We need to collect tensor pointers first since deletion modifies the
+  // set
+  std::vector<Tensor*> tensor_ptrs;
+  tensor_ptrs.reserve(tensors.size());
+  for (const auto& tensor_shared : tensors) {
+    tensor_ptrs.push_back(tensor_shared.get());
+  }
+
+  // Now delete each tensor - this will modify the global tensors set
+  for (Tensor* tensor_ptr : tensor_ptrs) {
+    aoti_torch_delete_tensor_object(tensor_ptr);
   }
 
+  // tensors set should now be empty, but ensure it's cleared
+  tensors.clear();
+
   // Clean up Metal resources
   metal_cleanup_resources();
+
+  ET_LOG(Info, "Cleared all tensors and Metal resources");
 }
 
 } // extern "C"
diff --git a/backends/apple/metal/runtime/shims/memory.h b/backends/apple/metal/runtime/shims/memory.h
index 47fb6352b50..5f48fd921c6 100644
--- a/backends/apple/metal/runtime/shims/memory.h
+++ b/backends/apple/metal/runtime/shims/memory.h
@@ -22,7 +22,7 @@ namespace metal {
 extern "C" {
 
 // Global storage declarations
-extern std::unordered_map<Tensor*, bool> is_tensor_own_memory;
+extern std::unordered_map<void*, int32_t> memory_to_n_tensor;
 extern std::unordered_set<std::shared_ptr<Tensor>> tensors;
 
 // Memory-related operations

From 5bd34ee8ed29147ae2171071eb1f00109144e4cd Mon Sep 17 00:00:00 2001
From: Manuel Candales <mcandales@meta.com>
Date: Tue, 21 Oct 2025 21:19:21 -0700
Subject: [PATCH 3/3] Update

[ghstack-poisoned]
---
 backends/apple/metal/runtime/shims/memory.cpp | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/backends/apple/metal/runtime/shims/memory.cpp b/backends/apple/metal/runtime/shims/memory.cpp
index 50f8d4e1cd0..b5d2d3161ae 100644
--- a/backends/apple/metal/runtime/shims/memory.cpp
+++ b/backends/apple/metal/runtime/shims/memory.cpp
@@ -254,7 +254,9 @@ AOTITorchError aoti_torch_delete_tensor_object(AOTITensorHandle tensor) {
           // Tensor never owned the memory, skip freeing
           // Just remove tensor from tracking
           tensors.erase(it);
-          ET_LOG(Debug, "aoti_torch_delete_tensor_object: tensor doesn't own memory, skipping free");
+          ET_LOG(
+              Debug,
+              "aoti_torch_delete_tensor_object: tensor doesn't own memory, skipping free");
           return Error::Ok;
         } else if (ref_count == 1) {
           // Only current tensor using this memory, free it
@@ -265,7 +267,8 @@ AOTITorchError aoti_torch_delete_tensor_object(AOTITensorHandle tensor) {
             // This is CPU memory - free immediately
             free(data_ptr);
             data_ptr = nullptr;
-            ET_LOG(Debug, "aoti_torch_delete_tensor_object: freeing CPU memory");
+            ET_LOG(
+                Debug, "aoti_torch_delete_tensor_object: freeing CPU memory");
           }
 
           // Remove from memory tracking
@@ -273,7 +276,11 @@ AOTITorchError aoti_torch_delete_tensor_object(AOTITensorHandle tensor) {
         } else if (ref_count > 1) {
           // Other tensors still using this memory, just decrement count
           memory_to_n_tensor[data_ptr] = ref_count - 1;
-          ET_LOG(Debug, "aoti_torch_delete_tensor_object: decremented ref count from %d to %d", ref_count, ref_count - 1);
+          ET_LOG(
+              Debug,
+              "aoti_torch_delete_tensor_object: decremented ref count from %d to %d",
+              ref_count,
+              ref_count - 1);
         }
       } else {
         ET_CHECK_OR_RETURN_ERROR(