diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index b1700e5ecef7f..02cd5e80e8736 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -363,9 +363,7 @@ class Allocator {
     if (LIKELY(PrimaryT::canAllocate(NeededSize))) {
       ClassId = SizeClassMap::getClassIdBySize(NeededSize);
       DCHECK_NE(ClassId, 0U);
-      bool UnlockRequired;
-      auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
-      TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+      typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
       Block = TSD->getCache().allocate(ClassId);
       // If the allocation failed, retry in each successively larger class until
       // it fits. If it fails to fit in the largest class, fallback to the
@@ -376,8 +374,6 @@ class Allocator {
         if (!Block)
           ClassId = 0;
       }
-      if (UnlockRequired)
-        TSD->unlock();
     }
     if (UNLIKELY(ClassId == 0)) {
       Block = Secondary.allocate(Options, Size, Alignment, &SecondaryBlockEnd,
@@ -1148,13 +1144,11 @@ class Allocator {
       void *BlockBegin = getBlockBegin(Ptr, Header);
       const uptr ClassId = Header->ClassId;
       if (LIKELY(ClassId)) {
-        bool UnlockRequired;
-        auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
-        TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-        const bool CacheDrained =
-            TSD->getCache().deallocate(ClassId, BlockBegin);
-        if (UnlockRequired)
-          TSD->unlock();
+        bool CacheDrained;
+        {
+          typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
+          CacheDrained = TSD->getCache().deallocate(ClassId, BlockBegin);
+        }
         // When we have drained some blocks back to the Primary from TSD, that
         // implies that we may have the chance to release some pages as well.
         // Note that in order not to block other thread's accessing the TSD,
@@ -1168,13 +1162,9 @@ class Allocator {
         Secondary.deallocate(Options, BlockBegin);
       }
     } else {
-      bool UnlockRequired;
-      auto *TSD = TSDRegistry.getTSDAndLock(&UnlockRequired);
-      TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+      typename TSDRegistryT::ScopedTSD TSD(TSDRegistry);
       Quarantine.put(&TSD->getQuarantineCache(),
                      QuarantineCallback(*this, TSD->getCache()), Ptr, Size);
-      if (UnlockRequired)
-        TSD->unlock();
     }
   }
 
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index 3dbd93cacefd6..a29463a034473 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -229,11 +229,25 @@ struct TestConditionVariableConfig {
 #define SCUDO_TYPED_TEST(FIXTURE, NAME)                                        \
   template <class TypeParam>                                                   \
   struct FIXTURE##NAME : public FIXTURE<TypeParam> {                           \
+    using BaseT = FIXTURE<TypeParam>;                                          \
     void Run();                                                                \
   };                                                                           \
   SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME)                                    \
   template <class TypeParam> void FIXTURE##NAME<TypeParam>::Run()
 
+// Accessing `TSD->getCache()` requires `TSD::Mutex` and which doesn't have easy
+// way to test it by thread-safety analysis. Alternatively, we verify the thread
+// safety through runtime check in ScopedTSD and mark the test body with
+// NO_THREAD_SAFETY_ANALYSIS.
+#define SCUDO_TYPED_TEST_SKIP_THREAD_SAFETY(FIXTURE, NAME)                     \
+  template <class TypeParam>                                                   \
+  struct FIXTURE##NAME : public FIXTURE<TypeParam> {                           \
+    using BaseT = FIXTURE<TypeParam>;                                          \
+    void Run() NO_THREAD_SAFETY_ANALYSIS;                                      \
+  };                                                                           \
+  SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME)                                    \
+  template <class TypeParam> void FIXTURE##NAME<TypeParam>::Run()
+
 SCUDO_TYPED_TEST(ScudoCombinedTest, IsOwned) {
   auto *Allocator = this->Allocator.get();
   static scudo::u8 StaticBuffer[scudo::Chunk::getHeaderSize() + 1];
@@ -547,7 +561,8 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, Stats) {
   EXPECT_NE(Stats.find("Stats: Quarantine"), std::string::npos);
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) NO_THREAD_SAFETY_ANALYSIS {
+SCUDO_TYPED_TEST_SKIP_THREAD_SAFETY(ScudoCombinedTest, CacheDrain) {
+  using AllocatorT = typename BaseT::AllocatorT;
   auto *Allocator = this->Allocator.get();
 
   std::vector<void *> V;
@@ -559,17 +574,15 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, CacheDrain) NO_THREAD_SAFETY_ANALYSIS {
   for (auto P : V)
     Allocator->deallocate(P, Origin);
 
-  bool UnlockRequired;
-  auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+  typename AllocatorT::TSDRegistryT::ScopedTSD TSD(
+      *Allocator->getTSDRegistry());
   EXPECT_TRUE(!TSD->getCache().isEmpty());
   TSD->getCache().drain();
   EXPECT_TRUE(TSD->getCache().isEmpty());
-  if (UnlockRequired)
-    TSD->unlock();
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, ForceCacheDrain) NO_THREAD_SAFETY_ANALYSIS {
+SCUDO_TYPED_TEST_SKIP_THREAD_SAFETY(ScudoCombinedTest, ForceCacheDrain) {
+  using AllocatorT = typename BaseT::AllocatorT;
   auto *Allocator = this->Allocator.get();
 
   std::vector<void *> V;
@@ -584,14 +597,11 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ForceCacheDrain) NO_THREAD_SAFETY_ANALYSIS {
   // `ForceAll` will also drain the caches.
   Allocator->releaseToOS(scudo::ReleaseToOS::ForceAll);
 
-  bool UnlockRequired;
-  auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+  typename AllocatorT::TSDRegistryT::ScopedTSD TSD(
+      *Allocator->getTSDRegistry());
   EXPECT_TRUE(TSD->getCache().isEmpty());
   EXPECT_EQ(TSD->getQuarantineCache().getSize(), 0U);
   EXPECT_TRUE(Allocator->getQuarantine()->isEmpty());
-  if (UnlockRequired)
-    TSD->unlock();
 }
 
 SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) {
@@ -892,8 +902,8 @@ TEST(ScudoCombinedTest, BasicTrustyConfig) {
   }
 
   bool UnlockRequired;
-  auto *TSD = Allocator->getTSDRegistry()->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
+  typename AllocatorT::TSDRegistryT::ScopedTSD TSD(
+      *Allocator->getTSDRegistry());
   TSD->getCache().drain();
 
   Allocator->releaseToOS(scudo::ReleaseToOS::Force);
diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
index fad8fcf900771..432129c37d424 100644
--- a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp
@@ -17,6 +17,7 @@
 #include <mutex>
 #include <set>
 #include <thread>
+#include <type_traits>
 
 // We mock out an allocator with a TSD registry, mostly using empty stubs. The
 // cache contains a single volatile uptr, to be able to test that several
@@ -86,7 +87,8 @@ TEST(ScudoTSDTest, TSDRegistryInit) {
   EXPECT_TRUE(Allocator->isInitialized());
 }
 
-template <class AllocatorT> static void testRegistry() {
+template <class AllocatorT>
+static void testRegistry() NO_THREAD_SAFETY_ANALYSIS {
   auto Deleter = [](AllocatorT *A) {
     A->unmapTestOnly();
     delete A;
@@ -99,22 +101,17 @@ template <class AllocatorT> static void testRegistry() {
   Registry->initThreadMaybe(Allocator.get(), /*MinimalInit=*/true);
   EXPECT_TRUE(Allocator->isInitialized());
 
-  bool UnlockRequired;
-  auto TSD = Registry->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-  EXPECT_NE(TSD, nullptr);
-  EXPECT_EQ(TSD->getCache().Canary, 0U);
-  if (UnlockRequired)
-    TSD->unlock();
+  {
+    typename AllocatorT::TSDRegistryT::ScopedTSD TSD(*Registry);
+    EXPECT_EQ(TSD->getCache().Canary, 0U);
+  }
 
   Registry->initThreadMaybe(Allocator.get(), /*MinimalInit=*/false);
-  TSD = Registry->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-  EXPECT_NE(TSD, nullptr);
-  EXPECT_EQ(TSD->getCache().Canary, 0U);
-  memset(&TSD->getCache(), 0x42, sizeof(TSD->getCache()));
-  if (UnlockRequired)
-    TSD->unlock();
+  {
+    typename AllocatorT::TSDRegistryT::ScopedTSD TSD(*Registry);
+    EXPECT_EQ(TSD->getCache().Canary, 0U);
+    memset(&TSD->getCache(), 0x42, sizeof(TSD->getCache()));
+  }
 }
 
 TEST(ScudoTSDTest, TSDRegistryBasic) {
@@ -129,7 +126,12 @@ static std::mutex Mutex;
 static std::condition_variable Cv;
 static bool Ready;
 
-template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) {
+// Accessing `TSD->getCache()` requires `TSD::Mutex` and which doesn't have easy
+// way to test it by thread-safety analysis. Alternatively, we verify the thread
+// safety through runtime check in ScopedTSD and mark it as
+// NO_THREAD_SAFETY_ANALYSIS here.
+template <typename AllocatorT>
+static void stressCache(AllocatorT *Allocator) NO_THREAD_SAFETY_ANALYSIS {
   auto Registry = Allocator->getTSDRegistry();
   {
     std::unique_lock<std::mutex> Lock(Mutex);
@@ -137,14 +139,13 @@ template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) {
       Cv.wait(Lock);
   }
   Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false);
-  bool UnlockRequired;
-  auto TSD = Registry->getTSDAndLock(&UnlockRequired);
-  TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-  EXPECT_NE(TSD, nullptr);
+  typename AllocatorT::TSDRegistryT::ScopedTSD TSD(*Registry);
   // For an exclusive TSD, the cache should be empty. We cannot guarantee the
   // same for a shared TSD.
-  if (!UnlockRequired)
+  if (std::is_same<typename AllocatorT::TSDRegistryT,
+                   scudo::TSDRegistryExT<AllocatorT>>()) {
     EXPECT_EQ(TSD->getCache().Canary, 0U);
+  }
   // Transform the thread id to a uptr to use it as canary.
   const scudo::uptr Canary = static_cast<scudo::uptr>(
       std::hash<std::thread::id>{}(std::this_thread::get_id()));
@@ -152,8 +153,6 @@ template <typename AllocatorT> static void stressCache(AllocatorT *Allocator) {
   // Loop a few times to make sure that a concurrent thread isn't modifying it.
   for (scudo::uptr I = 0; I < 4096U; I++)
     EXPECT_EQ(TSD->getCache().Canary, Canary);
-  if (UnlockRequired)
-    TSD->unlock();
 }
 
 template <class AllocatorT> static void testRegistryThreaded() {
@@ -195,14 +194,10 @@ static void stressSharedRegistry(MockAllocator<SharedCaches> *Allocator) {
       Cv.wait(Lock);
   }
   Registry->initThreadMaybe(Allocator, /*MinimalInit=*/false);
-  bool UnlockRequired;
   for (scudo::uptr I = 0; I < 4096U; I++) {
-    auto TSD = Registry->getTSDAndLock(&UnlockRequired);
-    TSD->assertLocked(/*BypassCheck=*/!UnlockRequired);
-    EXPECT_NE(TSD, nullptr);
-    Set.insert(reinterpret_cast<void *>(TSD));
-    if (UnlockRequired)
-      TSD->unlock();
+    typename MockAllocator<SharedCaches>::TSDRegistryT::ScopedTSD TSD(
+        *Registry);
+    Set.insert(reinterpret_cast<void *>(&*TSD));
   }
   {
     std::unique_lock<std::mutex> Lock(Mutex);
diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
index 238367420238d..9a32d03b06139 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
@@ -27,6 +27,31 @@ struct ThreadState {
 template <class Allocator> void teardownThread(void *Ptr);
 
 template <class Allocator> struct TSDRegistryExT {
+  using ThisT = TSDRegistryExT<Allocator>;
+
+  struct ScopedTSD {
+    ScopedTSD(ThisT &TSDRegistry) {
+      CurrentTSD = TSDRegistry.getTSDAndLock(&IsFallback);
+      DCHECK_NE(CurrentTSD, nullptr);
+    }
+
+    ~ScopedTSD() {
+      if (UNLIKELY(IsFallback))
+        CurrentTSD->unlock();
+    }
+
+    TSD<Allocator> &operator*() { return *CurrentTSD; }
+
+    TSD<Allocator> *operator->() {
+      CurrentTSD->assertLocked(/*BypassCheck=*/!IsFallback);
+      return CurrentTSD;
+    }
+
+  private:
+    TSD<Allocator> *CurrentTSD;
+    bool IsFallback;
+  };
+
   void init(Allocator *Instance) REQUIRES(Mutex) {
     DCHECK(!Initialized);
     Instance->init();
@@ -74,23 +99,6 @@ template <class Allocator> struct TSDRegistryExT {
     initThread(Instance, MinimalInit);
   }
 
-  // TODO(chiahungduan): Consider removing the argument `UnlockRequired` by
-  // embedding the logic into TSD or always locking the TSD. It will enable us
-  // to properly mark thread annotation here and adding proper runtime
-  // assertions in the member functions of TSD. For example, assert the lock is
-  // acquired before calling TSD::commitBack().
-  ALWAYS_INLINE TSD<Allocator> *
-  getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS {
-    if (LIKELY(State.InitState == ThreadState::Initialized &&
-               !atomic_load(&Disabled, memory_order_acquire))) {
-      *UnlockRequired = false;
-      return &ThreadTSD;
-    }
-    FallbackTSD.lock();
-    *UnlockRequired = true;
-    return &FallbackTSD;
-  }
-
   // To disable the exclusive TSD registry, we effectively lock the fallback TSD
   // and force all threads to attempt to use it instead of their local one.
   void disable() NO_THREAD_SAFETY_ANALYSIS {
@@ -123,6 +131,18 @@ template <class Allocator> struct TSDRegistryExT {
   }
 
 private:
+  ALWAYS_INLINE TSD<Allocator> *
+  getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS {
+    if (LIKELY(State.InitState == ThreadState::Initialized &&
+               !atomic_load(&Disabled, memory_order_acquire))) {
+      *UnlockRequired = false;
+      return &ThreadTSD;
+    }
+    FallbackTSD.lock();
+    *UnlockRequired = true;
+    return &FallbackTSD;
+  }
+
   // Using minimal initialization allows for global initialization while keeping
   // the thread specific structure untouched. The fallback structure will be
   // used instead.
diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h
index 1bca578ee14be..a2e9bb3d4a663 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_shared.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h
@@ -26,6 +26,27 @@ namespace scudo {
 
 template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount>
 struct TSDRegistrySharedT {
+  using ThisT = TSDRegistrySharedT<Allocator, TSDsArraySize, DefaultTSDCount>;
+
+  struct ScopedTSD {
+    ScopedTSD(ThisT &TSDRegistry) {
+      CurrentTSD = TSDRegistry.getTSDAndLock();
+      DCHECK_NE(CurrentTSD, nullptr);
+    }
+
+    ~ScopedTSD() { CurrentTSD->unlock(); }
+
+    TSD<Allocator> &operator*() { return *CurrentTSD; }
+
+    TSD<Allocator> *operator->() {
+      CurrentTSD->assertLocked(/*BypassCheck=*/false);
+      return CurrentTSD;
+    }
+
+  private:
+    TSD<Allocator> *CurrentTSD;
+  };
+
   void init(Allocator *Instance) REQUIRES(Mutex) {
     DCHECK(!Initialized);
     Instance->init();
@@ -70,26 +91,6 @@ struct TSDRegistrySharedT {
     initThread(Instance);
   }
 
-  // TSDs is an array of locks and which is not supported for marking
-  // thread-safety capability.
-  ALWAYS_INLINE TSD<Allocator> *
-  getTSDAndLock(bool *UnlockRequired) NO_THREAD_SAFETY_ANALYSIS {
-    TSD<Allocator> *TSD = getCurrentTSD();
-    DCHECK(TSD);
-    *UnlockRequired = true;
-    // Try to lock the currently associated context.
-    if (TSD->tryLock())
-      return TSD;
-    // If that fails, go down the slow path.
-    if (TSDsArraySize == 1U) {
-      // Only 1 TSD, not need to go any further.
-      // The compiler will optimize this one way or the other.
-      TSD->lock();
-      return TSD;
-    }
-    return getTSDAndLockSlow(TSD);
-  }
-
   void disable() NO_THREAD_SAFETY_ANALYSIS {
     Mutex.lock();
     for (u32 I = 0; I < TSDsArraySize; I++)
@@ -132,6 +133,23 @@ struct TSDRegistrySharedT {
   }
 
 private:
+  ALWAYS_INLINE TSD<Allocator> *
+  getTSDAndLock() NO_THREAD_SAFETY_ANALYSIS {
+    TSD<Allocator> *TSD = getCurrentTSD();
+    DCHECK(TSD);
+    // Try to lock the currently associated context.
+    if (TSD->tryLock())
+      return TSD;
+    // If that fails, go down the slow path.
+    if (TSDsArraySize == 1U) {
+      // Only 1 TSD, not need to go any further.
+      // The compiler will optimize this one way or the other.
+      TSD->lock();
+      return TSD;
+    }
+    return getTSDAndLockSlow(TSD);
+  }
+
   ALWAYS_INLINE uptr *getTlsPtr() const {
 #if SCUDO_HAS_PLATFORM_TLS_SLOT
     return reinterpret_cast<uptr *>(getPlatformAllocatorTlsSlot());