diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt
index b291722c3f0..14422e45d7c 100644
--- a/.ci/docker/ci_commit_pins/pytorch.txt
+++ b/.ci/docker/ci_commit_pins/pytorch.txt
@@ -1 +1 @@
-00e3eea170ce5db8ea9c62ce5e48f13886cd6d20
+e4cd76cf8283c8ddbf95674b020fbfcff467cb4b
diff --git a/backends/vulkan/runtime/api/containers/StagingBuffer.h b/backends/vulkan/runtime/api/containers/StagingBuffer.h
index ab650c09a43..a24728470b0 100644
--- a/backends/vulkan/runtime/api/containers/StagingBuffer.h
+++ b/backends/vulkan/runtime/api/containers/StagingBuffer.h
@@ -29,15 +29,13 @@ class StagingBuffer final {
   StagingBuffer(
       Context* context_p,
       const vkapi::ScalarType dtype,
-      const size_t numel,
-      const bool gpuonly = false)
+      const size_t numel)
       : context_p_(context_p),
         dtype_(dtype),
         numel_(numel),
         nbytes_(element_size(dtype_) * numel_),
-        vulkan_buffer_(context_p_->adapter_ptr()->vma().create_storage_buffer(
-            nbytes_,
-            gpuonly)) {}
+        vulkan_buffer_(
+            context_p_->adapter_ptr()->vma().create_staging_buffer(nbytes_)) {}
 
   StagingBuffer(const StagingBuffer&) = delete;
   StagingBuffer& operator=(const StagingBuffer&) = delete;
diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp
index 578898ad194..7b9d30ef658 100644
--- a/backends/vulkan/runtime/api/containers/Tensor.cpp
+++ b/backends/vulkan/runtime/api/containers/Tensor.cpp
@@ -540,7 +540,7 @@ vkapi::VulkanBuffer allocate_buffer(
   }
 
   return adapter_ptr->vma().create_storage_buffer(
-      element_size(dtype) * numel, /*gpu_only = */ true, allocate_memory);
+      element_size(dtype) * numel, allocate_memory);
 }
 
 vTensorStorage::vTensorStorage(
diff --git a/backends/vulkan/runtime/vk_api/memory/Allocator.cpp b/backends/vulkan/runtime/vk_api/memory/Allocator.cpp
index f7428f12b67..b990cf6a119 100644
--- a/backends/vulkan/runtime/vk_api/memory/Allocator.cpp
+++ b/backends/vulkan/runtime/vk_api/memory/Allocator.cpp
@@ -132,9 +132,27 @@ VulkanImage Allocator::create_image(
       allocate_memory);
 }
 
+VulkanBuffer Allocator::create_staging_buffer(const VkDeviceSize size) {
+  const VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+
+  VmaAllocationCreateInfo alloc_create_info = {};
+  alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY;
+  alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
+
+  // Staging buffers are accessed by both the CPU and GPU, so set the
+  // appropriate flags to indicate that the host device will be accessing
+  // the data from this buffer.
+  alloc_create_info.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
+  alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
+  alloc_create_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+  alloc_create_info.preferredFlags =
+      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+
+  return VulkanBuffer(allocator_, size, alloc_create_info, buffer_usage);
+}
+
 VulkanBuffer Allocator::create_storage_buffer(
     const VkDeviceSize size,
-    const bool gpu_only,
     const bool allocate_memory) {
   const VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
 
@@ -142,22 +160,6 @@ VulkanBuffer Allocator::create_storage_buffer(
   alloc_create_info.flags = DEFAULT_ALLOCATION_STRATEGY;
   alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
 
-  // The create storage buffer will be accessed by both the CPU and GPU, so set
-  // the appropriate flags to indicate that the host device will be accessing
-  // the data from this buffer.
-  if (!gpu_only) {
-    // Deferred memory allocation should only be used for GPU only buffers.
-    VK_CHECK_COND(
-        allocate_memory,
-        "Only GPU-only buffers should use deferred memory allocation");
-
-    alloc_create_info.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
-    alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
-    alloc_create_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
-    alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-        VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
-  }
-
   return VulkanBuffer(
       allocator_, size, alloc_create_info, buffer_usage, allocate_memory);
 }
@@ -170,9 +172,7 @@ VulkanBuffer Allocator::create_uniform_buffer(const VkDeviceSize size) {
 
   VkBufferUsageFlags buffer_usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
 
-  VulkanBuffer uniform_buffer(
-      allocator_, size, alloc_create_info, buffer_usage);
-  return uniform_buffer;
+  return VulkanBuffer(allocator_, size, alloc_create_info, buffer_usage);
 }
 
 } // namespace vkapi
diff --git a/backends/vulkan/runtime/vk_api/memory/Allocator.h b/backends/vulkan/runtime/vk_api/memory/Allocator.h
index 6d8ee09ae5d..7d02ffe54e3 100644
--- a/backends/vulkan/runtime/vk_api/memory/Allocator.h
+++ b/backends/vulkan/runtime/vk_api/memory/Allocator.h
@@ -62,9 +62,10 @@ class Allocator final {
       const bool allow_transfer = false,
       const bool allocate_memory = true);
 
+  VulkanBuffer create_staging_buffer(const VkDeviceSize);
+
   VulkanBuffer create_storage_buffer(
       const VkDeviceSize,
-      const bool gpu_only = true,
       const bool allocate_memory = true);
 
   /*
diff --git a/install_requirements.py b/install_requirements.py
index 64243ec6943..1f5982c80e0 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -94,7 +94,7 @@ def python_is_compatible():
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-NIGHTLY_VERSION = "dev20240901"
+NIGHTLY_VERSION = "dev20240829"
 
 # The pip repository that hosts nightly torch packages.
 TORCH_NIGHTLY_URL = "https://download.pytorch.org/whl/nightly/cpu"