Optimize atomic_thread_fence (#740)

Fixes #739.
microsoft · Apr 30, 2020 · a5a9e49 · a5a9e49
1 parent 2c441c7
commit a5a9e49
Showing 1 changed file with 9 additions and 2 deletions.
diff --git a/stl/inc/atomic b/stl/inc/atomic
@@ -1942,8 +1942,15 @@ extern "C" inline void atomic_thread_fence(const memory_order _Order) noexcept {
 #else // ^^^ ARM32/ARM64 hardware / x86/x64 hardware vvv
     _Compiler_barrier();
     if (_Order == memory_order_seq_cst) {
-        static long _Guard;
-        (void) _InterlockedCompareExchange(&_Guard, 0, 0);
+        volatile long _Guard; // Not initialized to avoid an unnecessary operation; the value does not matter
+
+        // _mm_mfence could have been used, but it is not supported on older x86 CPUs and is slower on some recent CPUs.
+        // The memory fence provided by interlocked operations has some exceptions, but this is fine:
+        // std::atomic_thread_fence works with respect to other atomics only; it may not be a full fence for all ops.
+#pragma warning(suppress : 6001) // "Using uninitialized memory '_Guard'"
+#pragma warning(suppress : 28113) // "Accessing a local variable _Guard via an Interlocked function: This is an unusual
+                                  // usage which could be reconsidered."
+        (void) _InterlockedIncrement(&_Guard);
         _Compiler_barrier();
     }
 #endif // hardware