diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt
index 2b7a613066b83..c4d3ea1e4f05b 100644
--- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt
+++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt
@@ -56,6 +56,7 @@ if(ANDROID)
 endif()
 
 set(SCUDO_HEADERS
+  allocator_common.h
   allocator_config.h
   atomic_helpers.h
   bytemap.h
diff --git a/compiler-rt/lib/scudo/standalone/allocator_common.h b/compiler-rt/lib/scudo/standalone/allocator_common.h
new file mode 100644
index 0000000000000..95f4776ac596d
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/allocator_common.h
@@ -0,0 +1,85 @@
+//===-- allocator_common.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_ALLOCATOR_COMMON_H_
+#define SCUDO_ALLOCATOR_COMMON_H_
+
+#include "common.h"
+#include "list.h"
+
+namespace scudo {
+
+template <class SizeClassAllocator> struct TransferBatch {
+  typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
+  typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;
+
+  static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint;
+  void setFromArray(CompactPtrT *Array, u16 N) {
+    DCHECK_LE(N, MaxNumCached);
+    Count = N;
+    memcpy(Batch, Array, sizeof(Batch[0]) * Count);
+  }
+  void appendFromArray(CompactPtrT *Array, u16 N) {
+    DCHECK_LE(N, MaxNumCached - Count);
+    memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
+    // u16 will be promoted to int by arithmetic type conversion.
+    Count = static_cast<u16>(Count + N);
+  }
+  void appendFromTransferBatch(TransferBatch *B, u16 N) {
+    DCHECK_LE(N, MaxNumCached - Count);
+    DCHECK_GE(B->Count, N);
+    // Append from the back of `B`.
+    memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N);
+    // u16 will be promoted to int by arithmetic type conversion.
+    Count = static_cast<u16>(Count + N);
+    B->Count = static_cast<u16>(B->Count - N);
+  }
+  void clear() { Count = 0; }
+  void add(CompactPtrT P) {
+    DCHECK_LT(Count, MaxNumCached);
+    Batch[Count++] = P;
+  }
+  void moveToArray(CompactPtrT *Array) {
+    memcpy(Array, Batch, sizeof(Batch[0]) * Count);
+    clear();
+  }
+  u16 getCount() const { return Count; }
+  bool isEmpty() const { return Count == 0U; }
+  CompactPtrT get(u16 I) const {
+    DCHECK_LE(I, Count);
+    return Batch[I];
+  }
+  TransferBatch *Next;
+
+private:
+  CompactPtrT Batch[MaxNumCached];
+  u16 Count;
+};
+
+// A BatchGroup is used to collect blocks. Each group has a group id to
+// identify the group kind of contained blocks.
+template <class SizeClassAllocator> struct BatchGroup {
+  // `Next` is used by IntrusiveList.
+  BatchGroup *Next;
+  // The compact base address of each group
+  uptr CompactPtrGroupBase;
+  // Cache value of SizeClassAllocatorLocalCache::getMaxCached()
+  u16 MaxCachedPerBatch;
+  // Number of blocks pushed into this group. This is an increment-only
+  // counter.
+  uptr PushedBlocks;
+  // This is used to track how many bytes are not in-use since last time we
+  // tried to release pages.
+  uptr BytesInBGAtLastCheckpoint;
+  // Blocks are managed by TransferBatch in a list.
+  SinglyLinkedList<TransferBatch<SizeClassAllocator>> Batches;
+};
+
+} // namespace scudo
+
+#endif // SCUDO_ALLOCATOR_COMMON_H_
diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h
index 1095eb5f186d1..8cb02abd16fd7 100644
--- a/compiler-rt/lib/scudo/standalone/local_cache.h
+++ b/compiler-rt/lib/scudo/standalone/local_cache.h
@@ -22,74 +22,6 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
   typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
   typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;
 
-  struct TransferBatch {
-    static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint;
-    void setFromArray(CompactPtrT *Array, u16 N) {
-      DCHECK_LE(N, MaxNumCached);
-      Count = N;
-      memcpy(Batch, Array, sizeof(Batch[0]) * Count);
-    }
-    void appendFromArray(CompactPtrT *Array, u16 N) {
-      DCHECK_LE(N, MaxNumCached - Count);
-      memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
-      // u16 will be promoted to int by arithmetic type conversion.
-      Count = static_cast<u16>(Count + N);
-    }
-    void appendFromTransferBatch(TransferBatch *B, u16 N) {
-      DCHECK_LE(N, MaxNumCached - Count);
-      DCHECK_GE(B->Count, N);
-      // Append from the back of `B`.
-      memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N);
-      // u16 will be promoted to int by arithmetic type conversion.
-      Count = static_cast<u16>(Count + N);
-      B->Count = static_cast<u16>(B->Count - N);
-    }
-    void clear() { Count = 0; }
-    void add(CompactPtrT P) {
-      DCHECK_LT(Count, MaxNumCached);
-      Batch[Count++] = P;
-    }
-    void copyToArray(CompactPtrT *Array) const {
-      memcpy(Array, Batch, sizeof(Batch[0]) * Count);
-    }
-    u16 getCount() const { return Count; }
-    bool isEmpty() const { return Count == 0U; }
-    CompactPtrT get(u16 I) const {
-      DCHECK_LE(I, Count);
-      return Batch[I];
-    }
-    static u16 getMaxCached(uptr Size) {
-      return Min(MaxNumCached, SizeClassMap::getMaxCachedHint(Size));
-    }
-    TransferBatch *Next;
-
-  private:
-    CompactPtrT Batch[MaxNumCached];
-    u16 Count;
-  };
-
-  // A BatchGroup is used to collect blocks. Each group has a group id to
-  // identify the group kind of contained blocks.
-  struct BatchGroup {
-    // `Next` is used by IntrusiveList.
-    BatchGroup *Next;
-    // The compact base address of each group
-    uptr CompactPtrGroupBase;
-    // Cache value of TransferBatch::getMaxCached()
-    u16 MaxCachedPerBatch;
-    // Number of blocks pushed into this group. This is an increment-only
-    // counter.
-    uptr PushedBlocks;
-    // This is used to track how many bytes are not in-use since last time we
-    // tried to release pages.
-    uptr BytesInBGAtLastCheckpoint;
-    // Blocks are managed by TransferBatch in a list.
-    SinglyLinkedList<TransferBatch> Batches;
-  };
-
-  static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
-                "BatchGroup uses the same class size as TransferBatch");
-
   void init(GlobalStats *S, SizeClassAllocator *A) {
     DCHECK(isEmpty());
     Stats.init();
@@ -151,7 +83,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
   }
 
   void drain() {
-    // Drain BatchClassId last as createBatch can refill it.
+    // Drain BatchClassId last as it may be needed while draining normal blocks.
     for (uptr I = 0; I < NumClasses; ++I) {
       if (I == BatchClassId)
         continue;
@@ -163,19 +95,11 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
     DCHECK(isEmpty());
   }
 
-  TransferBatch *createBatch(uptr ClassId, void *B) {
-    if (ClassId != BatchClassId)
-      B = allocate(BatchClassId);
+  void *getBatchClassBlock() {
+    void *B = allocate(BatchClassId);
     if (UNLIKELY(!B))
       reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
-    return reinterpret_cast<TransferBatch *>(B);
-  }
-
-  BatchGroup *createGroup() {
-    void *Ptr = allocate(BatchClassId);
-    if (UNLIKELY(!Ptr))
-      reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
-    return reinterpret_cast<BatchGroup *>(Ptr);
+    return B;
   }
 
   LocalStats &getStats() { return Stats; }
@@ -203,6 +127,11 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
       Str->append("    No block is cached.\n");
   }
 
+  static u16 getMaxCached(uptr Size) {
+    return Min(SizeClassMap::MaxNumCachedHint,
+               SizeClassMap::getMaxCachedHint(Size));
+  }
+
 private:
   static const uptr NumClasses = SizeClassMap::NumClasses;
   static const uptr BatchClassId = SizeClassMap::BatchClassId;
@@ -211,7 +140,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
     u16 MaxCount;
     // Note: ClassSize is zero for the transfer batch.
     uptr ClassSize;
-    CompactPtrT Chunks[2 * TransferBatch::MaxNumCached];
+    CompactPtrT Chunks[2 * SizeClassMap::MaxNumCachedHint];
   };
   PerClass PerClassArray[NumClasses] = {};
   LocalStats Stats;
@@ -228,7 +157,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
     for (uptr I = 0; I < NumClasses; I++) {
       PerClass *P = &PerClassArray[I];
       const uptr Size = SizeClassAllocator::getSizeByClassId(I);
-      P->MaxCount = static_cast<u16>(2 * TransferBatch::getMaxCached(Size));
+      P->MaxCount = static_cast<u16>(2 * getMaxCached(Size));
       if (I != BatchClassId) {
         P->ClassSize = Size;
       } else {
@@ -246,15 +175,14 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
 
   NOINLINE bool refill(PerClass *C, uptr ClassId) {
     initCacheMaybe(C);
-    TransferBatch *B = Allocator->popBatch(this, ClassId);
-    if (UNLIKELY(!B))
-      return false;
-    DCHECK_GT(B->getCount(), 0);
-    C->Count = B->getCount();
-    B->copyToArray(C->Chunks);
-    B->clear();
-    destroyBatch(ClassId, B);
-    return true;
+
+    // TODO(chiahungduan): Pass the max number cached for each size class.
+    const u16 NumBlocksRefilled =
+        Allocator->popBlocks(this, ClassId, C->Chunks);
+    DCHECK_LE(NumBlocksRefilled,
+              getMaxCached(SizeClassAllocator::getSizeByClassId(ClassId)));
+    C->Count += NumBlocksRefilled;
+    return NumBlocksRefilled != 0;
   }
 
   NOINLINE void drain(PerClass *C, uptr ClassId) {
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index 533615ad3816d..e153a3bf871cb 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -9,6 +9,7 @@
 #ifndef SCUDO_PRIMARY32_H_
 #define SCUDO_PRIMARY32_H_
 
+#include "allocator_common.h"
 #include "bytemap.h"
 #include "common.h"
 #include "list.h"
@@ -53,8 +54,11 @@ template <typename Config> class SizeClassAllocator32 {
                 "");
   typedef SizeClassAllocator32<Config> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
-  typedef typename CacheT::TransferBatch TransferBatch;
-  typedef typename CacheT::BatchGroup BatchGroup;
+  typedef TransferBatch<ThisT> TransferBatch;
+  typedef BatchGroup<ThisT> BatchGroup;
+
+  static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
+                "BatchGroup uses the same class size as TransferBatch");
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
@@ -187,6 +191,21 @@ template <typename Config> class SizeClassAllocator32 {
     return BlockSize > PageSize;
   }
 
+  u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray) {
+    TransferBatch *B = popBatch(C, ClassId);
+    if (!B)
+      return 0;
+
+    const u16 Count = B->getCount();
+    DCHECK_GT(Count, 0U);
+    B->moveToArray(ToArray);
+
+    if (ClassId != SizeClassMap::BatchClassId)
+      C->deallocate(SizeClassMap::BatchClassId, B);
+
+    return Count;
+  }
+
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     SizeClassInfo *Sci = getSizeClassInfo(ClassId);
@@ -520,8 +539,8 @@ template <typename Config> class SizeClassAllocator32 {
       // from `CreateGroup` in `pushBlocksImpl`
       BG->PushedBlocks = 1;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch = TransferBatch::getMaxCached(
-          getSizeByClassId(SizeClassMap::BatchClassId));
+      BG->MaxCachedPerBatch =
+          CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId));
 
       Sci->FreeListInfo.BlockList.push_front(BG);
     }
@@ -600,17 +619,17 @@ template <typename Config> class SizeClassAllocator32 {
     DCHECK_GT(Size, 0U);
 
     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
-      BatchGroup *BG = C->createGroup();
+      BatchGroup *BG = reinterpret_cast<BatchGroup *>(C->getBatchClassBlock());
       BG->Batches.clear();
-      TransferBatch *TB = C->createBatch(ClassId, nullptr);
+      TransferBatch *TB =
+          reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
       TB->clear();
 
       BG->CompactPtrGroupBase = CompactPtrGroupBase;
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch =
-          TransferBatch::getMaxCached(getSizeByClassId(ClassId));
+      BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));
 
       return BG;
     };
@@ -625,9 +644,7 @@ template <typename Config> class SizeClassAllocator32 {
         u16 UnusedSlots =
             static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
         if (UnusedSlots == 0) {
-          CurBatch = C->createBatch(
-              ClassId,
-              reinterpret_cast<void *>(decompactPtr(ClassId, Array[I])));
+          CurBatch = reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
           CurBatch->clear();
           Batches.push_front(CurBatch);
           UnusedSlots = BG->MaxCachedPerBatch;
@@ -775,7 +792,7 @@ template <typename Config> class SizeClassAllocator32 {
     }
 
     const uptr Size = getSizeByClassId(ClassId);
-    const u16 MaxCount = TransferBatch::getMaxCached(Size);
+    const u16 MaxCount = CacheT::getMaxCached(Size);
     DCHECK_GT(MaxCount, 0U);
     // The maximum number of blocks we should carve in the region is dictated
     // by the maximum number of batches we want to fill, and the amount of
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index ed0c4deaac2c8..688dc32fd52ab 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -9,6 +9,7 @@
 #ifndef SCUDO_PRIMARY64_H_
 #define SCUDO_PRIMARY64_H_
 
+#include "allocator_common.h"
 #include "bytemap.h"
 #include "common.h"
 #include "list.h"
@@ -55,8 +56,11 @@ template <typename Config> class SizeClassAllocator64 {
   static const uptr GroupScale = GroupSizeLog - CompactPtrScale;
   typedef SizeClassAllocator64<Config> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
-  typedef typename CacheT::TransferBatch TransferBatch;
-  typedef typename CacheT::BatchGroup BatchGroup;
+  typedef TransferBatch<ThisT> TransferBatch;
+  typedef BatchGroup<ThisT> BatchGroup;
+
+  static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
+                "BatchGroup uses the same class size as TransferBatch");
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
@@ -203,6 +207,21 @@ template <typename Config> class SizeClassAllocator64 {
     DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
   }
 
+  u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray) {
+    TransferBatch *B = popBatch(C, ClassId);
+    if (!B)
+      return 0;
+
+    const u16 Count = B->getCount();
+    DCHECK_GT(Count, 0U);
+    B->moveToArray(ToArray);
+
+    if (ClassId != SizeClassMap::BatchClassId)
+      C->deallocate(SizeClassMap::BatchClassId, B);
+
+    return Count;
+  }
+
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     RegionInfo *Region = getRegionInfo(ClassId);
@@ -636,8 +655,8 @@ template <typename Config> class SizeClassAllocator64 {
       // from `CreateGroup` in `pushBlocksImpl`
       BG->PushedBlocks = 1;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch = TransferBatch::getMaxCached(
-          getSizeByClassId(SizeClassMap::BatchClassId));
+      BG->MaxCachedPerBatch =
+          CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId));
 
       Region->FreeListInfo.BlockList.push_front(BG);
     }
@@ -715,17 +734,17 @@ template <typename Config> class SizeClassAllocator64 {
     DCHECK_GT(Size, 0U);
 
     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
-      BatchGroup *BG = C->createGroup();
+      BatchGroup *BG = reinterpret_cast<BatchGroup *>(C->getBatchClassBlock());
       BG->Batches.clear();
-      TransferBatch *TB = C->createBatch(ClassId, nullptr);
+      TransferBatch *TB =
+          reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
       TB->clear();
 
       BG->CompactPtrGroupBase = CompactPtrGroupBase;
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch =
-          TransferBatch::getMaxCached(getSizeByClassId(ClassId));
+      BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));
 
       return BG;
     };
@@ -740,9 +759,7 @@ template <typename Config> class SizeClassAllocator64 {
         u16 UnusedSlots =
             static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
         if (UnusedSlots == 0) {
-          CurBatch = C->createBatch(
-              ClassId,
-              reinterpret_cast<void *>(decompactPtr(ClassId, Array[I])));
+          CurBatch = reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
           CurBatch->clear();
           Batches.push_front(CurBatch);
           UnusedSlots = BG->MaxCachedPerBatch;
@@ -873,7 +890,7 @@ template <typename Config> class SizeClassAllocator64 {
                                                       RegionInfo *Region)
       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
     const uptr Size = getSizeByClassId(ClassId);
-    const u16 MaxCount = TransferBatch::getMaxCached(Size);
+    const u16 MaxCount = CacheT::getMaxCached(Size);
 
     const uptr RegionBeg = Region->RegionBeg;
     const uptr MappedUser = Region->MemMapInfo.MappedUser;
diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
index 074977ff27e65..e368f521bea71 100644
--- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
@@ -207,7 +207,7 @@ struct SmallRegionsConfig {
 // For the 32-bit one, it requires actually exhausting memory, so we skip it.
 TEST(ScudoPrimaryTest, Primary64OOM) {
   using Primary = scudo::SizeClassAllocator64<SmallRegionsConfig>;
-  using TransferBatch = Primary::CacheT::TransferBatch;
+  using TransferBatch = Primary::TransferBatch;
   Primary Allocator;
   Allocator.init(/*ReleaseToOsInterval=*/-1);
   typename Primary::CacheT Cache;
@@ -233,8 +233,9 @@ TEST(ScudoPrimaryTest, Primary64OOM) {
   while (!Batches.empty()) {
     TransferBatch *B = Batches.back();
     Batches.pop_back();
-    B->copyToArray(Blocks);
-    Allocator.pushBlocks(&Cache, ClassId, Blocks, B->getCount());
+    const scudo::u16 Count = B->getCount();
+    B->moveToArray(Blocks);
+    Allocator.pushBlocks(&Cache, ClassId, Blocks, Count);
     Cache.deallocate(Primary::SizeClassMap::BatchClassId, B);
   }
   Cache.destroy(nullptr);