diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h index 89e5fefa3408a..a259b324220f3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h @@ -104,8 +104,6 @@ class MUTEX ThreadRegistry { return threads_.empty() ? nullptr : threads_[tid]; } - u32 NumThreadsLocked() const { return threads_.size(); } - u32 CreateThread(uptr user_id, bool detached, u32 parent_tid, void *arg); typedef void (*ThreadCallback)(ThreadContextBase *tctx, void *arg); diff --git a/compiler-rt/lib/tsan/CMakeLists.txt b/compiler-rt/lib/tsan/CMakeLists.txt index aede54f689aa3..c3284a5dc4220 100644 --- a/compiler-rt/lib/tsan/CMakeLists.txt +++ b/compiler-rt/lib/tsan/CMakeLists.txt @@ -119,6 +119,7 @@ set(TSAN_HEADERS rtl/tsan_symbolize.h rtl/tsan_sync.h rtl/tsan_trace.h + rtl/tsan_update_shadow_word.inc rtl/tsan_vector_clock.h ) diff --git a/compiler-rt/lib/tsan/check_analyze.sh b/compiler-rt/lib/tsan/check_analyze.sh index f507ba0172f34..3bd817c136971 100755 --- a/compiler-rt/lib/tsan/check_analyze.sh +++ b/compiler-rt/lib/tsan/check_analyze.sh @@ -34,27 +34,21 @@ check() { fi } -# All hot functions must contain no PUSH/POP -# and no CALLs (everything is tail-called). for f in write1 write2 write4 write8; do check $f rsp 1 - check $f push 0 - check $f pop 0 - check $f call 0 + check $f push 2 done for f in read1 read2 read4 read8; do check $f rsp 1 - check $f push 0 - check $f pop 0 - check $f call 0 + check $f push 3 done for f in func_entry func_exit; do check $f rsp 0 check $f push 0 check $f pop 0 - check $f call 0 + check $f call 1 # TraceSwitch() done echo LGTM diff --git a/compiler-rt/lib/tsan/go/build.bat b/compiler-rt/lib/tsan/go/build.bat index e834100443140..496e127d95817 100644 --- a/compiler-rt/lib/tsan/go/build.bat +++ b/compiler-rt/lib/tsan/go/build.bat @@ -14,7 +14,6 @@ type ^ ..\rtl\tsan_suppressions.cpp ^ ..\rtl\tsan_sync.cpp ^ ..\rtl\tsan_stack_trace.cpp ^ - ..\rtl\tsan_vector_clock.cpp ^ ..\..\sanitizer_common\sanitizer_allocator.cpp ^ ..\..\sanitizer_common\sanitizer_common.cpp ^ ..\..\sanitizer_common\sanitizer_flags.cpp ^ diff --git a/compiler-rt/lib/tsan/go/buildgo.sh b/compiler-rt/lib/tsan/go/buildgo.sh index ab0db57b27839..8f6ffd4d34c5b 100755 --- a/compiler-rt/lib/tsan/go/buildgo.sh +++ b/compiler-rt/lib/tsan/go/buildgo.sh @@ -19,7 +19,6 @@ SRCS=" ../rtl/tsan_stack_trace.cpp ../rtl/tsan_suppressions.cpp ../rtl/tsan_sync.cpp - ../rtl/tsan_vector_clock.cpp ../../sanitizer_common/sanitizer_allocator.cpp ../../sanitizer_common/sanitizer_common.cpp ../../sanitizer_common/sanitizer_common_libcdep.cpp diff --git a/compiler-rt/lib/tsan/go/tsan_go.cpp b/compiler-rt/lib/tsan/go/tsan_go.cpp index c689a51fb5e1d..104c5b325aee5 100644 --- a/compiler-rt/lib/tsan/go/tsan_go.cpp +++ b/compiler-rt/lib/tsan/go/tsan_go.cpp @@ -214,7 +214,7 @@ void __tsan_malloc(ThreadState *thr, uptr pc, uptr p, uptr sz) { } void __tsan_free(uptr p, uptr sz) { - ctx->metamap.FreeRange(get_cur_proc(), p, sz, false); + ctx->metamap.FreeRange(get_cur_proc(), p, sz); } void __tsan_go_start(ThreadState *parent, ThreadState **pthr, void *pc) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h index 752020534ed91..fe0c1da31599b 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_defs.h +++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h @@ -63,13 +63,6 @@ enum class Epoch : u16 {}; constexpr uptr kEpochBits = 14; constexpr Epoch kEpochZero = static_cast(0); constexpr Epoch kEpochOver = static_cast(1 << kEpochBits); -constexpr Epoch kEpochLast = static_cast((1 << kEpochBits) - 1); - -inline Epoch EpochInc(Epoch epoch) { - return static_cast(static_cast(epoch) + 1); -} - -inline bool EpochOverflow(Epoch epoch) { return epoch == kEpochOver; } const int kClkBits = 42; const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1; @@ -114,7 +107,7 @@ const uptr kShadowCnt = 4; const uptr kShadowCell = 8; // Single shadow value. -enum class RawShadow : u32 {}; +typedef u64 RawShadow; const uptr kShadowSize = sizeof(RawShadow); // Shadow memory is kShadowMultiplier times larger than user memory. @@ -191,13 +184,10 @@ MD5Hash md5_hash(const void *data, uptr size); struct Processor; struct ThreadState; class ThreadContext; -struct TidSlot; struct Context; struct ReportStack; class ReportDesc; class RegionAlloc; -struct Trace; -struct TracePart; typedef uptr AccessType; @@ -208,8 +198,6 @@ enum : AccessType { kAccessVptr = 1 << 2, // read or write of an object virtual table pointer kAccessFree = 1 << 3, // synthetic memory access during memory freeing kAccessExternalPC = 1 << 4, // access PC can have kExternalPCBit set - kAccessCheckOnly = 1 << 5, // check for races, but don't store - kAccessNoRodata = 1 << 6, // don't check for .rodata marker }; // Descriptor of user's memory block. @@ -231,18 +219,15 @@ enum ExternalTag : uptr { // as 16-bit values, see tsan_defs.h. }; -enum { - MutexTypeReport = MutexLastCommon, +enum MutexType { + MutexTypeTrace = MutexLastCommon, + MutexTypeReport, MutexTypeSyncVar, MutexTypeAnnotations, MutexTypeAtExit, MutexTypeFired, MutexTypeRacy, MutexTypeGlobalProc, - MutexTypeTrace, - MutexTypeSlot, - MutexTypeSlots, - MutexTypeMultiSlot, }; } // namespace __tsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h index 7a39a39d51de4..9e15f74a06152 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h +++ b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h @@ -104,15 +104,6 @@ class DenseSlabAlloc { return atomic_load_relaxed(&fillpos_) * kL2Size * sizeof(T); } - template - void ForEach(Func func) { - SpinMutexLock lock(&mtx_); - uptr fillpos = atomic_load_relaxed(&fillpos_); - for (uptr l1 = 0; l1 < fillpos; l1++) { - for (IndexT l2 = l1 == 0 ? 1 : 0; l2 < kL2Size; l2++) func(&map_[l1][l2]); - } - } - private: T *map_[kL1Size]; SpinMutex mtx_; diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp index 54bed9f9a6be1..ee89862d17bd8 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp @@ -110,6 +110,12 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) { if (common_flags()->help) parser.PrintFlagDescriptions(); + if (f->history_size < 0 || f->history_size > 7) { + Printf("ThreadSanitizer: incorrect value for history_size" + " (must be [0..7])\n"); + Die(); + } + if (f->io_sync < 0 || f->io_sync > 2) { Printf("ThreadSanitizer: incorrect value for io_sync" " (must be [0..2])\n"); diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.inc b/compiler-rt/lib/tsan/rtl/tsan_flags.inc index 3df180ec68cc9..7954a4307fa1e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_flags.inc +++ b/compiler-rt/lib/tsan/rtl/tsan_flags.inc @@ -59,10 +59,14 @@ TSAN_FLAG(bool, stop_on_start, false, "Stops on start until __tsan_resume() is called (for debugging).") TSAN_FLAG(bool, running_on_valgrind, false, "Controls whether RunningOnValgrind() returns true or false.") +// There are a lot of goroutines in Go, so we use smaller history. TSAN_FLAG( - uptr, history_size, 0, - "Per-thread history size," - " controls how many extra previous memory accesses are remembered per thread.") + int, history_size, SANITIZER_GO ? 1 : 3, + "Per-thread history size, controls how many previous memory accesses " + "are remembered per thread. Possible values are [0..7]. " + "history_size=0 amounts to 32K memory accesses. Each next value doubles " + "the amount of memory accesses, up to history_size=7 that amounts to " + "4M memory accesses. The default value is 2 (128K memory accesses).") TSAN_FLAG(int, io_sync, 1, "Controls level of synchronization implied by IO operations. " "0 - no synchronization " diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 9d17e9bbdc06e..25dbe487b2807 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -1967,7 +1967,6 @@ static void ReportErrnoSpoiling(ThreadState *thr, uptr pc) { static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire, int sig, __sanitizer_siginfo *info, void *uctx) { - CHECK(thr->slot); __sanitizer_sigaction *sigactions = interceptor_ctx()->sigactions; if (acquire) Acquire(thr, 0, (uptr)&sigactions[sig]); @@ -2255,7 +2254,7 @@ struct dl_iterate_phdr_data { }; static bool IsAppNotRodata(uptr addr) { - return IsAppMem(addr) && *MemToShadow(addr) != Shadow::kRodata; + return IsAppMem(addr) && *MemToShadow(addr) != kShadowRodata; } static int dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size, diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp index f794a2fcdd0df..24ba3bb1f65df 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp @@ -235,9 +235,8 @@ static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) { T v = NoTsanAtomicLoad(a, mo); SyncVar *s = ctx->metamap.GetSyncIfExists((uptr)a); if (s) { - SlotLocker locker(thr); - ReadLock lock(&s->mtx); - thr->clock.Acquire(s->clock); + ReadLock l(&s->mtx); + AcquireImpl(thr, pc, &s->clock); // Re-read under sync mutex because we need a consistent snapshot // of the value and the clock we acquire. v = NoTsanAtomicLoad(a, mo); @@ -271,14 +270,14 @@ static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v, NoTsanAtomicStore(a, v, mo); return; } - SlotLocker locker(thr); - { - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); - Lock lock(&s->mtx); - thr->clock.ReleaseStore(&s->clock); - NoTsanAtomicStore(a, v, mo); - } - IncrementEpoch(thr); + __sync_synchronize(); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); + Lock l(&s->mtx); + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + ReleaseStoreImpl(thr, pc, &s->clock); + NoTsanAtomicStore(a, v, mo); } template @@ -286,21 +285,18 @@ static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) { MemoryAccess(thr, pc, (uptr)a, AccessSize(), kAccessWrite | kAccessAtomic); if (LIKELY(mo == mo_relaxed)) return F(a, v); - SlotLocker locker(thr); - { - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); - RWLock lock(&s->mtx, IsReleaseOrder(mo)); - if (IsAcqRelOrder(mo)) - thr->clock.ReleaseAcquire(&s->clock); - else if (IsReleaseOrder(mo)) - thr->clock.Release(&s->clock); - else if (IsAcquireOrder(mo)) - thr->clock.Acquire(s->clock); - v = F(a, v); - } - if (IsReleaseOrder(mo)) - IncrementEpoch(thr); - return v; + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); + Lock l(&s->mtx); + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + if (IsAcqRelOrder(mo)) + AcquireReleaseImpl(thr, pc, &s->clock); + else if (IsReleaseOrder(mo)) + ReleaseImpl(thr, pc, &s->clock); + else if (IsAcquireOrder(mo)) + AcquireImpl(thr, pc, &s->clock); + return F(a, v); } template @@ -420,28 +416,27 @@ static bool AtomicCAS(ThreadState *thr, uptr pc, volatile T *a, T *c, T v, *c = pr; return false; } - SlotLocker locker(thr); + bool release = IsReleaseOrder(mo); - bool success; - { - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); - RWLock lock(&s->mtx, release); - T cc = *c; - T pr = func_cas(a, cc, v); - success = pr == cc; - if (!success) { - *c = pr; - mo = fmo; - } - if (success && IsAcqRelOrder(mo)) - thr->clock.ReleaseAcquire(&s->clock); - else if (success && IsReleaseOrder(mo)) - thr->clock.Release(&s->clock); - else if (IsAcquireOrder(mo)) - thr->clock.Acquire(s->clock); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false); + RWLock l(&s->mtx, release); + T cc = *c; + T pr = func_cas(a, cc, v); + bool success = pr == cc; + if (!success) { + *c = pr; + mo = fmo; } - if (success && release) - IncrementEpoch(thr); + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + + if (success && IsAcqRelOrder(mo)) + AcquireReleaseImpl(thr, pc, &s->clock); + else if (success && IsReleaseOrder(mo)) + ReleaseImpl(thr, pc, &s->clock); + else if (IsAcquireOrder(mo)) + AcquireImpl(thr, pc, &s->clock); return success; } diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp index 7c15a16388268..c090c1f08cbeb 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp @@ -106,7 +106,7 @@ void __tsan_java_free(jptr ptr, jptr size) { DCHECK_GE(ptr, jctx->heap_begin); DCHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size); - ctx->metamap.FreeRange(thr->proc(), ptr, size, false); + ctx->metamap.FreeRange(thr->proc(), ptr, size); } void __tsan_java_move(jptr src, jptr dst, jptr size) { @@ -133,7 +133,7 @@ void __tsan_java_move(jptr src, jptr dst, jptr size) { // support that anymore as it contains addresses of accesses. RawShadow *d = MemToShadow(dst); RawShadow *dend = MemToShadow(dst + size); - ShadowSet(d, dend, Shadow::kEmpty); + internal_memset(d, 0, (dend - d) * sizeof(*d)); } jptr __tsan_java_find(jptr *from_ptr, jptr to) { diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp index 9217b72bbb4af..ef97ad0bc94ea 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp @@ -219,17 +219,8 @@ void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr size, uptr n) { void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) { DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p); - // Note: this can run before thread initialization/after finalization. - // As a result this is not necessarily synchronized with DoReset, - // which iterates over and resets all sync objects, - // but it is fine to create new MBlocks in this context. ctx->metamap.AllocBlock(thr, pc, p, sz); - // If this runs before thread initialization/after finalization - // and we don't have trace initialized, we can't imitate writes. - // In such case just reset the shadow range, it is fine since - // it affects only a small fraction of special objects. - if (write && thr->ignore_reads_and_writes == 0 && - atomic_load_relaxed(&thr->trace_pos)) + if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited) MemoryRangeImitateWrite(thr, pc, (uptr)p, sz); else MemoryResetRange(thr, pc, (uptr)p, sz); @@ -237,16 +228,9 @@ void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) { void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) { CHECK_NE(p, (void*)0); - if (!thr->slot) { - // Very early/late in thread lifetime, or during fork. - UNUSED uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, false); - DPrintf("#%d: free(0x%zx, %zu) (no slot)\n", thr->tid, p, sz); - return; - } - SlotLocker locker(thr); - uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, true); + uptr sz = ctx->metamap.FreeBlock(thr->proc(), p); DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz); - if (write && thr->ignore_reads_and_writes == 0) + if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited) MemoryRangeFreed(thr, pc, (uptr)p, sz); } @@ -409,6 +393,8 @@ uptr __sanitizer_get_allocated_size(const void *p) { void __tsan_on_thread_idle() { ThreadState *thr = cur_thread(); + thr->clock.ResetCached(&thr->proc()->clock_cache); + thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache); allocator()->SwallowCache(&thr->proc()->alloc_cache); internal_allocator()->SwallowCache(&thr->proc()->internal_alloc_cache); ctx->metamap.OnProcIdle(thr->proc()); diff --git a/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp b/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp index 3a75b80ac30ff..735179686ba95 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp @@ -19,7 +19,57 @@ namespace __tsan { MutexSet::MutexSet() { } -void MutexSet::Reset() { internal_memset(this, 0, sizeof(*this)); } +void MutexSet::Add(u64 id, bool write, u64 epoch) { + // Look up existing mutex with the same id. + for (uptr i = 0; i < size_; i++) { + if (descs_[i].id == id) { + descs_[i].count++; + descs_[i].epoch = epoch; + return; + } + } + // On overflow, find the oldest mutex and drop it. + if (size_ == kMaxSize) { + u64 minepoch = (u64)-1; + u64 mini = (u64)-1; + for (uptr i = 0; i < size_; i++) { + if (descs_[i].epoch < minepoch) { + minepoch = descs_[i].epoch; + mini = i; + } + } + RemovePos(mini); + CHECK_EQ(size_, kMaxSize - 1); + } + // Add new mutex descriptor. + descs_[size_].addr = 0; + descs_[size_].stack_id = kInvalidStackID; + descs_[size_].id = id; + descs_[size_].write = write; + descs_[size_].epoch = epoch; + descs_[size_].seq = seq_++; + descs_[size_].count = 1; + size_++; +} + +void MutexSet::Del(u64 id, bool write) { + for (uptr i = 0; i < size_; i++) { + if (descs_[i].id == id) { + if (--descs_[i].count == 0) + RemovePos(i); + return; + } + } +} + +void MutexSet::Remove(u64 id) { + for (uptr i = 0; i < size_; i++) { + if (descs_[i].id == id) { + RemovePos(i); + return; + } + } +} void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) { // Look up existing mutex with the same id. @@ -43,7 +93,9 @@ void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) { // Add new mutex descriptor. descs_[size_].addr = addr; descs_[size_].stack_id = stack_id; + descs_[size_].id = 0; descs_[size_].write = write; + descs_[size_].epoch = 0; descs_[size_].seq = seq_++; descs_[size_].count = 1; size_++; diff --git a/compiler-rt/lib/tsan/rtl/tsan_mutexset.h b/compiler-rt/lib/tsan/rtl/tsan_mutexset.h index aabd361e6afd9..93776a6641351 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_mutexset.h +++ b/compiler-rt/lib/tsan/rtl/tsan_mutexset.h @@ -25,6 +25,8 @@ class MutexSet { struct Desc { uptr addr; StackID stack_id; + u64 id; + u64 epoch; u32 seq; u32 count; bool write; @@ -38,7 +40,10 @@ class MutexSet { }; MutexSet(); - void Reset(); + // The 'id' is obtained from SyncVar::GetId(). + void Add(u64 id, bool write, u64 epoch); + void Del(u64 id, bool write); + void Remove(u64 id); // Removes the mutex completely (if it's destroyed). void AddAddr(uptr addr, StackID stack_id, bool write); void DelAddr(uptr addr, bool destroy = false); uptr Size() const; @@ -77,7 +82,9 @@ class DynamicMutexSet { // in different goroutine). #if SANITIZER_GO MutexSet::MutexSet() {} -void MutexSet::Reset() {} +void MutexSet::Add(u64 id, bool write, u64 epoch) {} +void MutexSet::Del(u64 id, bool write) {} +void MutexSet::Remove(u64 id) {} void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {} void MutexSet::DelAddr(uptr addr, bool destroy) {} uptr MutexSet::Size() const { return 0; } diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h index e28bac2457aa1..7ff0acace8f6d 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -18,8 +18,8 @@ # error "Only 64-bit is supported" #endif -#include "sanitizer_common/sanitizer_common.h" #include "tsan_defs.h" +#include "tsan_trace.h" namespace __tsan { @@ -45,7 +45,9 @@ C/C++ on linux/x86_64 and freebsd/x86_64 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) 4000 0000 0000 - 5500 0000 0000: - 5500 0000 0000 - 5680 0000 0000: pie binaries without ASLR or on 4.1+ kernels -5680 0000 0000 - 7d00 0000 0000: - +5680 0000 0000 - 6000 0000 0000: - +6000 0000 0000 - 6200 0000 0000: traces +6200 0000 0000 - 7d00 0000 0000: - 7b00 0000 0000 - 7c00 0000 0000: heap 7c00 0000 0000 - 7e80 0000 0000: - 7e80 0000 0000 - 8000 0000 0000: modules and main thread stack @@ -65,6 +67,8 @@ C/C++ on netbsd/amd64 can reuse the same mapping: struct Mapping48AddressSpace { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x340000000000ull; + static const uptr kTraceMemBeg = 0x600000000000ull; + static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x010000000000ull; static const uptr kShadowEnd = 0x200000000000ull; static const uptr kHeapMemBeg = 0x7b0000000000ull; @@ -85,12 +89,14 @@ struct Mapping48AddressSpace { C/C++ on linux/mips64 (40-bit VMA) 0000 0000 00 - 0100 0000 00: - (4 GB) 0100 0000 00 - 0200 0000 00: main binary (4 GB) -0200 0000 00 - 1200 0000 00: - (120 GB) -1200 0000 00 - 4000 0000 00: shadow (128 GB) +0200 0000 00 - 2000 0000 00: - (120 GB) +2000 0000 00 - 4000 0000 00: shadow (128 GB) 4000 0000 00 - 5000 0000 00: metainfo (memory blocks and sync objects) (64 GB) 5000 0000 00 - aa00 0000 00: - (360 GB) aa00 0000 00 - ab00 0000 00: main binary (PIE) (4 GB) -ab00 0000 00 - fe00 0000 00: - (332 GB) +ab00 0000 00 - b000 0000 00: - (20 GB) +b000 0000 00 - b200 0000 00: traces (8 GB) +b200 0000 00 - fe00 0000 00: - (304 GB) fe00 0000 00 - ff00 0000 00: heap (4 GB) ff00 0000 00 - ff80 0000 00: - (2 GB) ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB) @@ -98,7 +104,9 @@ ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB) struct MappingMips64_40 { static const uptr kMetaShadowBeg = 0x4000000000ull; static const uptr kMetaShadowEnd = 0x5000000000ull; - static const uptr kShadowBeg = 0x1200000000ull; + static const uptr kTraceMemBeg = 0xb000000000ull; + static const uptr kTraceMemEnd = 0xb200000000ull; + static const uptr kShadowBeg = 0x2000000000ull; static const uptr kShadowEnd = 0x4000000000ull; static const uptr kHeapMemBeg = 0xfe00000000ull; static const uptr kHeapMemEnd = 0xff00000000ull; @@ -123,7 +131,9 @@ C/C++ on Darwin/iOS/ARM64 (36-bit VMA, 64 GB VM) 0400 0000 00 - 0c00 0000 00: shadow memory (32 GB) 0c00 0000 00 - 0d00 0000 00: - (4 GB) 0d00 0000 00 - 0e00 0000 00: metainfo (4 GB) -0e00 0000 00 - 1000 0000 00: - +0e00 0000 00 - 0f00 0000 00: - (4 GB) +0f00 0000 00 - 0fc0 0000 00: traces (3 GB) +0fc0 0000 00 - 1000 0000 00: - */ struct MappingAppleAarch64 { static const uptr kLoAppMemBeg = 0x0100000000ull; @@ -134,11 +144,13 @@ struct MappingAppleAarch64 { static const uptr kShadowEnd = 0x0c00000000ull; static const uptr kMetaShadowBeg = 0x0d00000000ull; static const uptr kMetaShadowEnd = 0x0e00000000ull; + static const uptr kTraceMemBeg = 0x0f00000000ull; + static const uptr kTraceMemEnd = 0x0fc0000000ull; static const uptr kHiAppMemBeg = 0x0fc0000000ull; static const uptr kHiAppMemEnd = 0x0fc0000000ull; static const uptr kShadowMsk = 0x0ull; static const uptr kShadowXor = 0x0ull; - static const uptr kShadowAdd = 0x0200000000ull; + static const uptr kShadowAdd = 0x0ull; static const uptr kVdsoBeg = 0x7000000000000000ull; static const uptr kMidAppMemBeg = 0; static const uptr kMidAppMemEnd = 0; @@ -147,25 +159,29 @@ struct MappingAppleAarch64 { /* C/C++ on linux/aarch64 (39-bit VMA) 0000 0010 00 - 0100 0000 00: main binary -0100 0000 00 - 0400 0000 00: - -0400 0000 00 - 2000 0000 00: shadow memory +0100 0000 00 - 0800 0000 00: - +0800 0000 00 - 2000 0000 00: shadow memory 2000 0000 00 - 3100 0000 00: - 3100 0000 00 - 3400 0000 00: metainfo 3400 0000 00 - 5500 0000 00: - 5500 0000 00 - 5600 0000 00: main binary (PIE) -5600 0000 00 - 7c00 0000 00: - +5600 0000 00 - 6000 0000 00: - +6000 0000 00 - 6200 0000 00: traces +6200 0000 00 - 7d00 0000 00: - 7c00 0000 00 - 7d00 0000 00: heap 7d00 0000 00 - 7fff ffff ff: modules and main thread stack */ struct MappingAarch64_39 { static const uptr kLoAppMemBeg = 0x0000001000ull; static const uptr kLoAppMemEnd = 0x0100000000ull; - static const uptr kShadowBeg = 0x0400000000ull; + static const uptr kShadowBeg = 0x0800000000ull; static const uptr kShadowEnd = 0x2000000000ull; static const uptr kMetaShadowBeg = 0x3100000000ull; static const uptr kMetaShadowEnd = 0x3400000000ull; static const uptr kMidAppMemBeg = 0x5500000000ull; - static const uptr kMidAppMemEnd = 0x5600000000ull; + static const uptr kMidAppMemEnd = 0x5600000000ull; + static const uptr kTraceMemBeg = 0x6000000000ull; + static const uptr kTraceMemEnd = 0x6200000000ull; static const uptr kHeapMemBeg = 0x7c00000000ull; static const uptr kHeapMemEnd = 0x7d00000000ull; static const uptr kHiAppMemBeg = 0x7e00000000ull; @@ -179,13 +195,15 @@ struct MappingAarch64_39 { /* C/C++ on linux/aarch64 (42-bit VMA) 00000 0010 00 - 01000 0000 00: main binary -01000 0000 00 - 08000 0000 00: - -08000 0000 00 - 20000 0000 00: shadow memory +01000 0000 00 - 10000 0000 00: - +10000 0000 00 - 20000 0000 00: shadow memory 20000 0000 00 - 26000 0000 00: - 26000 0000 00 - 28000 0000 00: metainfo 28000 0000 00 - 2aa00 0000 00: - 2aa00 0000 00 - 2ab00 0000 00: main binary (PIE) -2ab00 0000 00 - 3e000 0000 00: - +2ab00 0000 00 - 36200 0000 00: - +36200 0000 00 - 36240 0000 00: traces +36240 0000 00 - 3e000 0000 00: - 3e000 0000 00 - 3f000 0000 00: heap 3f000 0000 00 - 3ffff ffff ff: modules and main thread stack */ @@ -193,12 +211,14 @@ struct MappingAarch64_42 { static const uptr kBroken = kBrokenReverseMapping; static const uptr kLoAppMemBeg = 0x00000001000ull; static const uptr kLoAppMemEnd = 0x01000000000ull; - static const uptr kShadowBeg = 0x08000000000ull; + static const uptr kShadowBeg = 0x10000000000ull; static const uptr kShadowEnd = 0x20000000000ull; static const uptr kMetaShadowBeg = 0x26000000000ull; static const uptr kMetaShadowEnd = 0x28000000000ull; static const uptr kMidAppMemBeg = 0x2aa00000000ull; - static const uptr kMidAppMemEnd = 0x2ab00000000ull; + static const uptr kMidAppMemEnd = 0x2ab00000000ull; + static const uptr kTraceMemBeg = 0x36200000000ull; + static const uptr kTraceMemEnd = 0x36400000000ull; static const uptr kHeapMemBeg = 0x3e000000000ull; static const uptr kHeapMemEnd = 0x3f000000000ull; static const uptr kHiAppMemBeg = 0x3f000000000ull; @@ -212,12 +232,14 @@ struct MappingAarch64_42 { struct MappingAarch64_48 { static const uptr kLoAppMemBeg = 0x0000000001000ull; static const uptr kLoAppMemEnd = 0x0000200000000ull; - static const uptr kShadowBeg = 0x0001000000000ull; + static const uptr kShadowBeg = 0x0002000000000ull; static const uptr kShadowEnd = 0x0004000000000ull; static const uptr kMetaShadowBeg = 0x0005000000000ull; static const uptr kMetaShadowEnd = 0x0006000000000ull; static const uptr kMidAppMemBeg = 0x0aaaa00000000ull; - static const uptr kMidAppMemEnd = 0x0aaaf00000000ull; + static const uptr kMidAppMemEnd = 0x0aaaf00000000ull; + static const uptr kTraceMemBeg = 0x0f06000000000ull; + static const uptr kTraceMemEnd = 0x0f06200000000ull; static const uptr kHeapMemBeg = 0x0ffff00000000ull; static const uptr kHeapMemEnd = 0x0ffff00000000ull; static const uptr kHiAppMemBeg = 0x0ffff00000000ull; @@ -235,7 +257,9 @@ C/C++ on linux/powerpc64 (44-bit VMA) 0001 0000 0000 - 0b00 0000 0000: shadow 0b00 0000 0000 - 0b00 0000 0000: - 0b00 0000 0000 - 0d00 0000 0000: metainfo (memory blocks and sync objects) -0d00 0000 0000 - 0f00 0000 0000: - +0d00 0000 0000 - 0d00 0000 0000: - +0d00 0000 0000 - 0f00 0000 0000: traces +0f00 0000 0000 - 0f00 0000 0000: - 0f00 0000 0000 - 0f50 0000 0000: heap 0f50 0000 0000 - 0f60 0000 0000: - 0f60 0000 0000 - 1000 0000 0000: modules and main thread stack @@ -245,6 +269,8 @@ struct MappingPPC64_44 { kBrokenMapping | kBrokenReverseMapping | kBrokenLinearity; static const uptr kMetaShadowBeg = 0x0b0000000000ull; static const uptr kMetaShadowEnd = 0x0d0000000000ull; + static const uptr kTraceMemBeg = 0x0d0000000000ull; + static const uptr kTraceMemEnd = 0x0f0000000000ull; static const uptr kShadowBeg = 0x000100000000ull; static const uptr kShadowEnd = 0x0b0000000000ull; static const uptr kLoAppMemBeg = 0x000000000100ull; @@ -269,7 +295,8 @@ C/C++ on linux/powerpc64 (46-bit VMA) 1000 0000 0000 - 1000 0000 0000: - 1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects) 2000 0000 0000 - 2000 0000 0000: - -1200 0000 0000 - 3d00 0000 0000: - +2000 0000 0000 - 2200 0000 0000: traces +2200 0000 0000 - 3d00 0000 0000: - 3d00 0000 0000 - 3e00 0000 0000: heap 3e00 0000 0000 - 3e80 0000 0000: - 3e80 0000 0000 - 4000 0000 0000: modules and main thread stack @@ -277,6 +304,8 @@ C/C++ on linux/powerpc64 (46-bit VMA) struct MappingPPC64_46 { static const uptr kMetaShadowBeg = 0x100000000000ull; static const uptr kMetaShadowEnd = 0x200000000000ull; + static const uptr kTraceMemBeg = 0x200000000000ull; + static const uptr kTraceMemEnd = 0x220000000000ull; static const uptr kShadowBeg = 0x010000000000ull; static const uptr kShadowEnd = 0x100000000000ull; static const uptr kHeapMemBeg = 0x3d0000000000ull; @@ -300,7 +329,9 @@ C/C++ on linux/powerpc64 (47-bit VMA) 0100 0000 0000 - 1000 0000 0000: shadow 1000 0000 0000 - 1000 0000 0000: - 1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects) -2000 0000 0000 - 7d00 0000 0000: - +2000 0000 0000 - 2000 0000 0000: - +2000 0000 0000 - 2200 0000 0000: traces +2200 0000 0000 - 7d00 0000 0000: - 7d00 0000 0000 - 7e00 0000 0000: heap 7e00 0000 0000 - 7e80 0000 0000: - 7e80 0000 0000 - 8000 0000 0000: modules and main thread stack @@ -308,6 +339,8 @@ C/C++ on linux/powerpc64 (47-bit VMA) struct MappingPPC64_47 { static const uptr kMetaShadowBeg = 0x100000000000ull; static const uptr kMetaShadowEnd = 0x200000000000ull; + static const uptr kTraceMemBeg = 0x200000000000ull; + static const uptr kTraceMemEnd = 0x220000000000ull; static const uptr kShadowBeg = 0x010000000000ull; static const uptr kShadowEnd = 0x100000000000ull; static const uptr kHeapMemBeg = 0x7d0000000000ull; @@ -329,17 +362,21 @@ C/C++ on linux/s390x While the kernel provides a 64-bit address space, we have to restrict ourselves to 48 bits due to how e.g. SyncVar::GetId() works. 0000 0000 1000 - 0e00 0000 0000: binary, modules, stacks - 14 TiB -0e00 0000 0000 - 2000 0000 0000: - -2000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app) +0e00 0000 0000 - 4000 0000 0000: - +4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app) 8000 0000 0000 - 9000 0000 0000: - 9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app) -9800 0000 0000 - be00 0000 0000: - +9800 0000 0000 - a000 0000 0000: - +a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads) +b000 0000 0000 - be00 0000 0000: - be00 0000 0000 - c000 0000 0000: heap - 2TiB (max supported by the allocator) */ struct MappingS390x { static const uptr kMetaShadowBeg = 0x900000000000ull; static const uptr kMetaShadowEnd = 0x980000000000ull; - static const uptr kShadowBeg = 0x200000000000ull; + static const uptr kTraceMemBeg = 0xa00000000000ull; + static const uptr kTraceMemEnd = 0xb00000000000ull; + static const uptr kShadowBeg = 0x400000000000ull; static const uptr kShadowEnd = 0x800000000000ull; static const uptr kHeapMemBeg = 0xbe0000000000ull; static const uptr kHeapMemEnd = 0xc00000000000ull; @@ -363,12 +400,16 @@ struct MappingS390x { 2000 0000 0000 - 2380 0000 0000: shadow 2380 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 8000 0000 0000: - +4000 0000 0000 - 6000 0000 0000: - +6000 0000 0000 - 6200 0000 0000: traces +6200 0000 0000 - 8000 0000 0000: - */ struct MappingGo48 { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x400000000000ull; + static const uptr kTraceMemBeg = 0x600000000000ull; + static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x238000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -391,7 +432,7 @@ struct MappingGo48 { 00c0 0000 0000 - 00e0 0000 0000: heap 00e0 0000 0000 - 0100 0000 0000: - 0100 0000 0000 - 0500 0000 0000: shadow -0500 0000 0000 - 0700 0000 0000: - +0500 0000 0000 - 0700 0000 0000: traces 0700 0000 0000 - 0770 0000 0000: metainfo (memory blocks and sync objects) 07d0 0000 0000 - 8000 0000 0000: - */ @@ -399,6 +440,8 @@ struct MappingGo48 { struct MappingGoWindows { static const uptr kMetaShadowBeg = 0x070000000000ull; static const uptr kMetaShadowEnd = 0x077000000000ull; + static const uptr kTraceMemBeg = 0x050000000000ull; + static const uptr kTraceMemEnd = 0x070000000000ull; static const uptr kShadowBeg = 0x010000000000ull; static const uptr kShadowEnd = 0x050000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -423,12 +466,16 @@ struct MappingGoWindows { 2000 0000 0000 - 2380 0000 0000: shadow 2380 0000 0000 - 2400 0000 0000: - 2400 0000 0000 - 3400 0000 0000: metainfo (memory blocks and sync objects) -3400 0000 0000 - 4000 0000 0000: - +3400 0000 0000 - 3600 0000 0000: - +3600 0000 0000 - 3800 0000 0000: traces +3800 0000 0000 - 4000 0000 0000: - */ struct MappingGoPPC64_46 { static const uptr kMetaShadowBeg = 0x240000000000ull; static const uptr kMetaShadowEnd = 0x340000000000ull; + static const uptr kTraceMemBeg = 0x360000000000ull; + static const uptr kTraceMemEnd = 0x380000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x238000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -453,12 +500,16 @@ struct MappingGoPPC64_46 { 2000 0000 0000 - 3000 0000 0000: shadow 3000 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 8000 0000 0000: - +4000 0000 0000 - 6000 0000 0000: - +6000 0000 0000 - 6200 0000 0000: traces +6200 0000 0000 - 8000 0000 0000: - */ struct MappingGoPPC64_47 { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x400000000000ull; + static const uptr kTraceMemBeg = 0x600000000000ull; + static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x300000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -483,11 +534,15 @@ struct MappingGoPPC64_47 { 2000 0000 0000 - 3000 0000 0000: shadow 3000 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -4000 0000 0000 - 8000 0000 0000: - +4000 0000 0000 - 6000 0000 0000: - +6000 0000 0000 - 6200 0000 0000: traces +6200 0000 0000 - 8000 0000 0000: - */ struct MappingGoAarch64 { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x400000000000ull; + static const uptr kTraceMemBeg = 0x600000000000ull; + static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x300000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -513,11 +568,15 @@ Go on linux/mips64 (47-bit VMA) 2000 0000 0000 - 3000 0000 0000: shadow 3000 0000 0000 - 3000 0000 0000: - 3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects) -3200 0000 0000 - 8000 0000 0000: - +4000 0000 0000 - 6000 0000 0000: - +6000 0000 0000 - 6200 0000 0000: traces +6200 0000 0000 - 8000 0000 0000: - */ struct MappingGoMips64_47 { static const uptr kMetaShadowBeg = 0x300000000000ull; static const uptr kMetaShadowEnd = 0x400000000000ull; + static const uptr kTraceMemBeg = 0x600000000000ull; + static const uptr kTraceMemEnd = 0x620000000000ull; static const uptr kShadowBeg = 0x200000000000ull; static const uptr kShadowEnd = 0x300000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -541,10 +600,14 @@ Go on linux/s390x 4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app) 8000 0000 0000 - 9000 0000 0000: - 9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app) +9800 0000 0000 - a000 0000 0000: - +a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads) */ struct MappingGoS390x { static const uptr kMetaShadowBeg = 0x900000000000ull; static const uptr kMetaShadowEnd = 0x980000000000ull; + static const uptr kTraceMemBeg = 0xa00000000000ull; + static const uptr kTraceMemEnd = 0xb00000000000ull; static const uptr kShadowBeg = 0x400000000000ull; static const uptr kShadowEnd = 0x800000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; @@ -652,6 +715,8 @@ enum MappingType { kShadowEnd, kMetaShadowBeg, kMetaShadowEnd, + kTraceMemBeg, + kTraceMemEnd, kVdsoBeg, }; @@ -685,6 +750,10 @@ struct MappingField { return Mapping::kMetaShadowBeg; case kMetaShadowEnd: return Mapping::kMetaShadowEnd; + case kTraceMemBeg: + return Mapping::kTraceMemBeg; + case kTraceMemEnd: + return Mapping::kTraceMemEnd; } Die(); } @@ -723,6 +792,11 @@ uptr MetaShadowBeg(void) { return SelectMapping(kMetaShadowBeg); } ALWAYS_INLINE uptr MetaShadowEnd(void) { return SelectMapping(kMetaShadowEnd); } +ALWAYS_INLINE +uptr TraceMemBeg(void) { return SelectMapping(kTraceMemBeg); } +ALWAYS_INLINE +uptr TraceMemEnd(void) { return SelectMapping(kTraceMemEnd); } + struct IsAppMemImpl { template static bool Apply(uptr mem) { @@ -860,10 +934,43 @@ inline uptr RestoreAddr(uptr addr) { return SelectMapping(addr); } +// The additional page is to catch shadow stack overflow as paging fault. +// Windows wants 64K alignment for mmaps. +const uptr kTotalTraceSize = (kTraceSize * sizeof(Event) + sizeof(Trace) + + (64 << 10) + (64 << 10) - 1) & ~((64 << 10) - 1); + +struct GetThreadTraceImpl { + template + static uptr Apply(uptr tid) { + uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize; + DCHECK_LT(p, Mapping::kTraceMemEnd); + return p; + } +}; + +ALWAYS_INLINE +uptr GetThreadTrace(int tid) { return SelectMapping(tid); } + +struct GetThreadTraceHeaderImpl { + template + static uptr Apply(uptr tid) { + uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize + + kTraceSize * sizeof(Event); + DCHECK_LT(p, Mapping::kTraceMemEnd); + return p; + } +}; + +ALWAYS_INLINE +uptr GetThreadTraceHeader(int tid) { + return SelectMapping(tid); +} + void InitializePlatform(); void InitializePlatformEarly(); void CheckAndProtect(); void InitializeShadowMemoryPlatform(); +void FlushShadowMemory(); void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns); int ExtractResolvFDs(void *state, int *fds, int nfd); int ExtractRecvmsgFDs(void *msg, int *fds, int nfd); diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp index 17dbdff8a5393..73ec14892d28f 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp @@ -94,6 +94,7 @@ enum { MemMeta, MemFile, MemMmap, + MemTrace, MemHeap, MemOther, MemCount, @@ -111,6 +112,8 @@ void FillProfileCallback(uptr p, uptr rss, bool file, uptr *mem) { mem[file ? MemFile : MemMmap] += rss; else if (p >= HeapMemBeg() && p < HeapMemEnd()) mem[MemHeap] += rss; + else if (p >= TraceMemBeg() && p < TraceMemEnd()) + mem[MemTrace] += rss; else mem[MemOther] += rss; } @@ -123,33 +126,42 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { StackDepotStats stacks = StackDepotGetStats(); uptr nthread, nlive; ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive); - uptr trace_mem; - { - Lock l(&ctx->slot_mtx); - trace_mem = ctx->trace_part_total_allocated * sizeof(TracePart); - } uptr internal_stats[AllocatorStatCount]; internal_allocator()->GetStats(internal_stats); // All these are allocated from the common mmap region. - mem[MemMmap] -= meta.mem_block + meta.sync_obj + trace_mem + - stacks.allocated + internal_stats[AllocatorStatMapped]; + mem[MemMmap] -= meta.mem_block + meta.sync_obj + stacks.allocated + + internal_stats[AllocatorStatMapped]; if (s64(mem[MemMmap]) < 0) mem[MemMmap] = 0; internal_snprintf( buf, buf_size, - "==%zu== %llus [%zu]: RSS %zd MB: shadow:%zd meta:%zd file:%zd" - " mmap:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu" - " trace:%zu stacks=%zd threads=%zu/%zu\n", - internal_getpid(), uptime_ns / (1000 * 1000 * 1000), ctx->global_epoch, - mem[MemTotal] >> 20, mem[MemShadow] >> 20, mem[MemMeta] >> 20, - mem[MemFile] >> 20, mem[MemMmap] >> 20, mem[MemHeap] >> 20, + "%llus: RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd" + " trace:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu" + " stacks=%zd[%zd] nthr=%zd/%zd\n", + uptime_ns / (1000 * 1000 * 1000), mem[MemTotal] >> 20, + mem[MemShadow] >> 20, mem[MemMeta] >> 20, mem[MemFile] >> 20, + mem[MemMmap] >> 20, mem[MemTrace] >> 20, mem[MemHeap] >> 20, mem[MemOther] >> 20, internal_stats[AllocatorStatMapped] >> 20, - meta.mem_block >> 20, meta.sync_obj >> 20, trace_mem >> 20, - stacks.allocated >> 20, nlive, nthread); + meta.mem_block >> 20, meta.sync_obj >> 20, stacks.allocated >> 20, + stacks.n_uniq_ids, nlive, nthread); +} + +# if SANITIZER_LINUX +void FlushShadowMemoryCallback( + const SuspendedThreadsList &suspended_threads_list, + void *argument) { + ReleaseMemoryPagesToOS(ShadowBeg(), ShadowEnd()); +} +#endif + +void FlushShadowMemory() { +#if SANITIZER_LINUX + StopTheWorld(FlushShadowMemoryCallback, 0); +#endif } #if !SANITIZER_GO -// Mark shadow for .rodata sections with the special Shadow::kRodata marker. +// Mark shadow for .rodata sections with the special kShadowRodata marker. // Accesses to .rodata can't race, so this saves time, memory and trace space. static void MapRodata() { // First create temp file. @@ -170,13 +182,13 @@ static void MapRodata() { return; internal_unlink(name); // Unlink it now, so that we can reuse the buffer. fd_t fd = openrv; - // Fill the file with Shadow::kRodata. + // Fill the file with kShadowRodata. const uptr kMarkerSize = 512 * 1024 / sizeof(RawShadow); InternalMmapVector marker(kMarkerSize); // volatile to prevent insertion of memset for (volatile RawShadow *p = marker.data(); p < marker.data() + kMarkerSize; p++) - *p = Shadow::kRodata; + *p = kShadowRodata; internal_write(fd, marker.data(), marker.size() * sizeof(RawShadow)); // Map the file into memory. uptr page = internal_mmap(0, GetPageSizeCached(), PROT_READ | PROT_WRITE, diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp index 52af76f2f28ce..3faa2d0c6192c 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp @@ -112,6 +112,9 @@ void cur_thread_finalize() { } #endif +void FlushShadowMemory() { +} + static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) { vm_address_t address = start; vm_address_t end_address = end; @@ -139,8 +142,10 @@ static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) { void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { uptr shadow_res, shadow_dirty; uptr meta_res, meta_dirty; + uptr trace_res, trace_dirty; RegionMemUsage(ShadowBeg(), ShadowEnd(), &shadow_res, &shadow_dirty); RegionMemUsage(MetaShadowBeg(), MetaShadowEnd(), &meta_res, &meta_dirty); + RegionMemUsage(TraceMemBeg(), TraceMemEnd(), &trace_res, &trace_dirty); #if !SANITIZER_GO uptr low_res, low_dirty; @@ -161,6 +166,7 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { buf, buf_size, "shadow (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" "meta (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" + "traces (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" # if !SANITIZER_GO "low app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" "high app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n" @@ -173,6 +179,7 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) { "------------------------------\n", ShadowBeg(), ShadowEnd(), shadow_res / 1024, shadow_dirty / 1024, MetaShadowBeg(), MetaShadowEnd(), meta_res / 1024, meta_dirty / 1024, + TraceMemBeg(), TraceMemEnd(), trace_res / 1024, trace_dirty / 1024, # if !SANITIZER_GO LoAppMemBeg(), LoAppMemEnd(), low_res / 1024, low_dirty / 1024, HiAppMemBeg(), HiAppMemEnd(), high_res / 1024, high_dirty / 1024, diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp index 763a533de525b..763ac444377e0 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp @@ -113,20 +113,24 @@ void CheckAndProtect() { # if defined(__aarch64__) && defined(__APPLE__) && SANITIZER_IOS ProtectRange(HeapMemEnd(), ShadowBeg()); ProtectRange(ShadowEnd(), MetaShadowBeg()); - ProtectRange(MetaShadowEnd(), HeapMemBeg()); -# else + ProtectRange(MetaShadowEnd(), TraceMemBeg()); +#else ProtectRange(LoAppMemEnd(), ShadowBeg()); ProtectRange(ShadowEnd(), MetaShadowBeg()); if (MidAppMemBeg()) { ProtectRange(MetaShadowEnd(), MidAppMemBeg()); - ProtectRange(MidAppMemEnd(), HeapMemBeg()); + ProtectRange(MidAppMemEnd(), TraceMemBeg()); } else { - ProtectRange(MetaShadowEnd(), HeapMemBeg()); + ProtectRange(MetaShadowEnd(), TraceMemBeg()); } + // Memory for traces is mapped lazily in MapThreadTrace. + // Protect the whole range for now, so that user does not map something here. + ProtectRange(TraceMemBeg(), TraceMemEnd()); + ProtectRange(TraceMemEnd(), HeapMemBeg()); ProtectRange(HeapEnd(), HiAppMemBeg()); -# endif +#endif -# if defined(__s390x__) +#if defined(__s390x__) // Protect the rest of the address space. const uptr user_addr_max_l4 = 0x0020000000000000ull; const uptr user_addr_max_l5 = 0xfffffffffffff000ull; diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp index eb8f354742f4a..fea893768c79f 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp @@ -20,6 +20,9 @@ namespace __tsan { +void FlushShadowMemory() { +} + void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {} void InitializePlatformEarly() { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index c68a13d718ce9..8126a503ff6dc 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -57,348 +57,110 @@ Context *ctx; bool OnFinalize(bool failed); void OnInitialize(); #else +#include SANITIZER_WEAK_CXX_DEFAULT_IMPL bool OnFinalize(bool failed) { -# if !SANITIZER_GO +#if !SANITIZER_GO if (on_finalize) return on_finalize(failed); -# endif +#endif return failed; } - SANITIZER_WEAK_CXX_DEFAULT_IMPL void OnInitialize() { -# if !SANITIZER_GO +#if !SANITIZER_GO if (on_initialize) on_initialize(); -# endif +#endif } #endif -static TracePart* TracePartAlloc(ThreadState* thr) { - TracePart* part = nullptr; - { - Lock lock(&ctx->slot_mtx); - uptr max_parts = Trace::kMinParts + flags()->history_size; - Trace* trace = &thr->tctx->trace; - if (trace->parts_allocated == max_parts || - ctx->trace_part_finished_excess) { - part = ctx->trace_part_recycle.PopFront(); - DPrintf("#%d: TracePartAlloc: part=%p\n", thr->tid, part); - if (part && part->trace) { - Trace* trace1 = part->trace; - Lock trace_lock(&trace1->mtx); - part->trace = nullptr; - TracePart* part1 = trace1->parts.PopFront(); - CHECK_EQ(part, part1); - if (trace1->parts_allocated > trace1->parts.Size()) { - ctx->trace_part_finished_excess += - trace1->parts_allocated - trace1->parts.Size(); - trace1->parts_allocated = trace1->parts.Size(); - } - } - } - if (trace->parts_allocated < max_parts) { - trace->parts_allocated++; - if (ctx->trace_part_finished_excess) - ctx->trace_part_finished_excess--; +static ThreadContextBase *CreateThreadContext(Tid tid) { + // Map thread trace when context is created. + char name[50]; + internal_snprintf(name, sizeof(name), "trace %u", tid); + MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event), name); + const uptr hdr = GetThreadTraceHeader(tid); + internal_snprintf(name, sizeof(name), "trace header %u", tid); + MapThreadTrace(hdr, sizeof(Trace), name); + new((void*)hdr) Trace(); + // We are going to use only a small part of the trace with the default + // value of history_size. However, the constructor writes to the whole trace. + // Release the unused part. + uptr hdr_end = hdr + sizeof(Trace); + hdr_end -= sizeof(TraceHeader) * (kTraceParts - TraceParts()); + hdr_end = RoundUp(hdr_end, GetPageSizeCached()); + if (hdr_end < hdr + sizeof(Trace)) { + ReleaseMemoryPagesToOS(hdr_end, hdr + sizeof(Trace)); + uptr unused = hdr + sizeof(Trace) - hdr_end; + if (hdr_end != (uptr)MmapFixedNoAccess(hdr_end, unused)) { + Report("ThreadSanitizer: failed to mprotect [0x%zx-0x%zx) \n", hdr_end, + unused); + CHECK("unable to mprotect" && 0); } - if (!part) - ctx->trace_part_total_allocated++; - else if (ctx->trace_part_recycle_finished) - ctx->trace_part_recycle_finished--; } - if (!part) - part = new (MmapOrDie(sizeof(*part), "TracePart")) TracePart(); - return part; + return New(tid); } -static void TracePartFree(TracePart* part) REQUIRES(ctx->slot_mtx) { - DCHECK(part->trace); - part->trace = nullptr; - ctx->trace_part_recycle.PushFront(part); -} - -void TraceResetForTesting() { - Lock lock(&ctx->slot_mtx); - while (auto* part = ctx->trace_part_recycle.PopFront()) { - if (auto trace = part->trace) - CHECK_EQ(trace->parts.PopFront(), part); - UnmapOrDie(part, sizeof(*part)); - } - ctx->trace_part_total_allocated = 0; - ctx->trace_part_recycle_finished = 0; - ctx->trace_part_finished_excess = 0; -} - -static void DoResetImpl(uptr epoch) { - ThreadRegistryLock lock0(&ctx->thread_registry); - Lock lock1(&ctx->slot_mtx); - CHECK_EQ(ctx->global_epoch, epoch); - ctx->global_epoch++; - CHECK(!ctx->resetting); - ctx->resetting = true; - for (u32 i = ctx->thread_registry.NumThreadsLocked(); i--;) { - ThreadContext* tctx = (ThreadContext*)ctx->thread_registry.GetThreadLocked( - static_cast(i)); - // Potentially we could purge all ThreadStatusDead threads from the - // registry. Since we reset all shadow, they can't race with anything - // anymore. However, their tid's can still be stored in some aux places - // (e.g. tid of thread that created something). - auto trace = &tctx->trace; - Lock lock(&trace->mtx); - bool attached = tctx->thr && tctx->thr->slot; - auto parts = &trace->parts; - bool local = false; - while (!parts->Empty()) { - auto part = parts->Front(); - local = local || part == trace->local_head; - if (local) - CHECK(!ctx->trace_part_recycle.Queued(part)); - else - ctx->trace_part_recycle.Remove(part); - if (attached && parts->Size() == 1) { - // The thread is running and this is the last/current part. - // Set the trace position to the end of the current part - // to force the thread to call SwitchTracePart and re-attach - // to a new slot and allocate a new trace part. - // Note: the thread is concurrently modifying the position as well, - // so this is only best-effort. The thread can only modify position - // within this part, because switching parts is protected by - // slot/trace mutexes that we hold here. - atomic_store_relaxed( - &tctx->thr->trace_pos, - reinterpret_cast(&part->events[TracePart::kSize])); - break; - } - parts->Remove(part); - TracePartFree(part); - } - CHECK_LE(parts->Size(), 1); - trace->local_head = parts->Front(); - if (tctx->thr && !tctx->thr->slot) { - atomic_store_relaxed(&tctx->thr->trace_pos, 0); - tctx->thr->trace_prev_pc = 0; - } - if (trace->parts_allocated > trace->parts.Size()) { - ctx->trace_part_finished_excess += - trace->parts_allocated - trace->parts.Size(); - trace->parts_allocated = trace->parts.Size(); - } - } - while (ctx->slot_queue.PopFront()) { - } - for (auto& slot : ctx->slots) { - slot.SetEpoch(kEpochZero); - slot.journal.Reset(); - slot.thr = nullptr; - ctx->slot_queue.PushBack(&slot); - } - - DPrintf("Resetting shadow...\n"); - if (!MmapFixedSuperNoReserve(ShadowBeg(), ShadowEnd() - ShadowBeg(), - "shadow")) { - Printf("failed to reset shadow memory\n"); - Die(); - } - DPrintf("Resetting meta shadow...\n"); - ctx->metamap.ResetClocks(); - ctx->resetting = false; -} - -// Clang does not understand locking all slots in the loop: -// error: expecting mutex 'slot.mtx' to be held at start of each loop -void DoReset(ThreadState* thr, uptr epoch) NO_THREAD_SAFETY_ANALYSIS { - { - Lock l(&ctx->multi_slot_mtx); - for (auto& slot : ctx->slots) { - slot.mtx.Lock(); - if (UNLIKELY(epoch == 0)) - epoch = ctx->global_epoch; - if (UNLIKELY(epoch != ctx->global_epoch)) { - // Epoch can't change once we've locked the first slot. - CHECK_EQ(slot.sid, 0); - slot.mtx.Unlock(); - return; - } - } - } - DPrintf("#%d: DoReset epoch=%lu\n", thr ? thr->tid : -1, epoch); - DoResetImpl(epoch); - for (auto& slot : ctx->slots) slot.mtx.Unlock(); -} - -void FlushShadowMemory() { DoReset(nullptr, 0); } - -static TidSlot* FindSlotAndLock(ThreadState* thr) - ACQUIRE(thr->slot->mtx) NO_THREAD_SAFETY_ANALYSIS { - CHECK(!thr->slot); - TidSlot* slot = nullptr; - for (;;) { - uptr epoch; - { - Lock lock(&ctx->slot_mtx); - epoch = ctx->global_epoch; - if (slot) { - // This is an exhausted slot from the previous iteration. - if (ctx->slot_queue.Queued(slot)) - ctx->slot_queue.Remove(slot); - thr->slot_locked = false; - slot->mtx.Unlock(); - } - for (;;) { - slot = ctx->slot_queue.PopFront(); - if (!slot) - break; - if (slot->epoch() != kEpochLast) { - ctx->slot_queue.PushBack(slot); - break; - } - } - } - if (!slot) { - DoReset(thr, epoch); - continue; - } - slot->mtx.Lock(); - CHECK(!thr->slot_locked); - thr->slot_locked = true; - if (slot->thr) { - DPrintf("#%d: preempting sid=%d tid=%d\n", thr->tid, (u32)slot->sid, - slot->thr->tid); - slot->SetEpoch(slot->thr->fast_state.epoch()); - slot->thr = nullptr; - } - if (slot->epoch() != kEpochLast) - return slot; - } -} - -void SlotAttachAndLock(ThreadState* thr) { - TidSlot* slot = FindSlotAndLock(thr); - DPrintf("#%d: SlotAttach: slot=%u\n", thr->tid, static_cast(slot->sid)); - CHECK(!slot->thr); - CHECK(!thr->slot); - slot->thr = thr; - thr->slot = slot; - Epoch epoch = EpochInc(slot->epoch()); - CHECK(!EpochOverflow(epoch)); - slot->SetEpoch(epoch); - thr->fast_state.SetSid(slot->sid); - thr->fast_state.SetEpoch(epoch); - if (thr->slot_epoch != ctx->global_epoch) { - thr->slot_epoch = ctx->global_epoch; - thr->clock.Reset(); #if !SANITIZER_GO - thr->last_sleep_stack_id = kInvalidStackID; - thr->last_sleep_clock.Reset(); +static const u32 kThreadQuarantineSize = 16; +#else +static const u32 kThreadQuarantineSize = 64; #endif - } - thr->clock.Set(slot->sid, epoch); - slot->journal.PushBack({thr->tid, epoch}); -} - -static void SlotDetachImpl(ThreadState* thr, bool exiting) { - TidSlot* slot = thr->slot; - thr->slot = nullptr; - if (thr != slot->thr) { - slot = nullptr; // we don't own the slot anymore - if (thr->slot_epoch != ctx->global_epoch) { - TracePart* part = nullptr; - auto* trace = &thr->tctx->trace; - { - Lock l(&trace->mtx); - auto* parts = &trace->parts; - // The trace can be completely empty in an unlikely event - // the thread is preempted right after it acquired the slot - // in ThreadStart and did not trace any events yet. - CHECK_LE(parts->Size(), 1); - part = parts->PopFront(); - thr->tctx->trace.local_head = nullptr; - atomic_store_relaxed(&thr->trace_pos, 0); - thr->trace_prev_pc = 0; - } - if (part) { - Lock l(&ctx->slot_mtx); - TracePartFree(part); - } - } - return; - } - CHECK(exiting || thr->fast_state.epoch() == kEpochLast); - slot->SetEpoch(thr->fast_state.epoch()); - slot->thr = nullptr; -} - -void SlotDetach(ThreadState* thr) { - Lock lock(&thr->slot->mtx); - SlotDetachImpl(thr, true); -} - -void SlotLock(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS { - DCHECK(!thr->slot_locked); - TidSlot* slot = thr->slot; - slot->mtx.Lock(); - thr->slot_locked = true; - if (LIKELY(thr == slot->thr && thr->fast_state.epoch() != kEpochLast)) - return; - SlotDetachImpl(thr, false); - thr->slot_locked = false; - slot->mtx.Unlock(); - SlotAttachAndLock(thr); -} - -void SlotUnlock(ThreadState* thr) { - DCHECK(thr->slot_locked); - thr->slot_locked = false; - thr->slot->mtx.Unlock(); -} Context::Context() : initialized(), report_mtx(MutexTypeReport), nreported(), - thread_registry([](Tid tid) -> ThreadContextBase* { - return new (Alloc(sizeof(ThreadContext))) ThreadContext(tid); - }), + thread_registry(CreateThreadContext, kMaxTid, kThreadQuarantineSize, + kMaxTidReuse), racy_mtx(MutexTypeRacy), racy_stacks(), racy_addresses(), fired_suppressions_mtx(MutexTypeFired), - clock_alloc(LINKER_INITIALIZED, "clock allocator"), - slot_mtx(MutexTypeSlots), - multi_slot_mtx(MutexTypeMultiSlot), - resetting() { + clock_alloc(LINKER_INITIALIZED, "clock allocator") { fired_suppressions.reserve(8); - for (uptr i = 0; i < ARRAY_SIZE(slots); i++) { - TidSlot* slot = &slots[i]; - slot->sid = static_cast(i); - slot_queue.PushBack(slot); - } - global_epoch = 1; } -TidSlot::TidSlot() : mtx(MutexTypeSlot) {} - // The objects are allocated in TLS, so one may rely on zero-initialization. -ThreadState::ThreadState(Tid tid) - // Do not touch these, rely on zero initialization, - // they may be accessed before the ctor. - // ignore_reads_and_writes() - // ignore_interceptors() - : tid(tid) { +ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch, + unsigned reuse_count, uptr stk_addr, uptr stk_size, + uptr tls_addr, uptr tls_size) + : fast_state(tid, epoch) + // Do not touch these, rely on zero initialization, + // they may be accessed before the ctor. + // , ignore_reads_and_writes() + // , ignore_interceptors() + , + clock(tid, reuse_count) +#if !SANITIZER_GO + , + jmp_bufs() +#endif + , + tid(tid), + unique_id(unique_id), + stk_addr(stk_addr), + stk_size(stk_size), + tls_addr(tls_addr), + tls_size(tls_size) +#if !SANITIZER_GO + , + last_sleep_clock(tid) +#endif +{ CHECK_EQ(reinterpret_cast(this) % SANITIZER_CACHE_LINE_SIZE, 0); #if !SANITIZER_GO // C/C++ uses fixed size shadow stack. const int kInitStackSize = kShadowStackSize; - shadow_stack = static_cast( + shadow_stack = static_cast( MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack")); SetShadowRegionHugePageMode(reinterpret_cast(shadow_stack), kInitStackSize * sizeof(uptr)); #else // Go uses malloc-allocated shadow stack with dynamic size. const int kInitStackSize = 8; - shadow_stack = static_cast(Alloc(kInitStackSize * sizeof(uptr))); + shadow_stack = static_cast(Alloc(kInitStackSize * sizeof(uptr))); #endif shadow_stack_pos = shadow_stack; shadow_stack_end = shadow_stack + kInitStackSize; @@ -516,8 +278,7 @@ void UnmapShadow(ThreadState *thr, uptr addr, uptr size) { if (size == 0) return; DontNeedShadowFor(addr, size); ScopedGlobalProcessor sgp; - SlotLocker locker(thr, true); - ctx->metamap.ResetRange(thr->proc(), addr, size, true); + ctx->metamap.ResetRange(thr->proc(), addr, size); } #endif @@ -563,6 +324,18 @@ void MapShadow(uptr addr, uptr size) { addr + size, meta_begin, meta_end); } +void MapThreadTrace(uptr addr, uptr size, const char *name) { + DPrintf("#0: Mapping trace at 0x%zx-0x%zx(0x%zx)\n", addr, addr + size, size); + CHECK_GE(addr, TraceMemBeg()); + CHECK_LE(addr + size, TraceMemEnd()); + CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment + if (!MmapFixedSuperNoReserve(addr, size, name)) { + Printf("FATAL: ThreadSanitizer can not mmap thread trace (0x%zx/0x%zx)\n", + addr, size); + Die(); + } +} + #if !SANITIZER_GO static void OnStackUnwind(const SignalContext &sig, const void *, BufferedStackTrace *stack) { @@ -581,11 +354,8 @@ void CheckUnwind() { // since we are going to die soon. ScopedIgnoreInterceptors ignore; #if !SANITIZER_GO - ThreadState* thr = cur_thread(); - thr->nomalloc = false; - thr->ignore_sync++; - thr->ignore_reads_and_writes++; - atomic_store_relaxed(&thr->in_signal_handler, 0); + cur_thread()->ignore_sync++; + cur_thread()->ignore_reads_and_writes++; #endif PrintCurrentStackSlow(StackTrace::GetCurrentPc()); } @@ -640,22 +410,22 @@ void Initialize(ThreadState *thr) { Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer); #endif - VPrintf(1, "***** Running under ThreadSanitizer v3 (pid %d) *****\n", + VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n", (int)internal_getpid()); // Initialize thread 0. - Tid tid = ThreadCreate(nullptr, 0, 0, true); + Tid tid = ThreadCreate(thr, 0, 0, true); CHECK_EQ(tid, kMainTid); ThreadStart(thr, tid, GetTid(), ThreadType::Regular); #if TSAN_CONTAINS_UBSAN __ubsan::InitAsPlugin(); #endif + ctx->initialized = true; #if !SANITIZER_GO Symbolizer::LateInitialize(); InitializeMemoryProfiler(); #endif - ctx->initialized = true; if (flags()->stop_on_start) { Printf("ThreadSanitizer is suspended at startup (pid %d)." @@ -681,6 +451,7 @@ void MaybeSpawnBackgroundThread() { #endif } + int Finalize(ThreadState *thr) { bool failed = false; @@ -688,12 +459,12 @@ int Finalize(ThreadState *thr) { DumpProcessMap(); if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1) - internal_usleep(u64(flags()->atexit_sleep_ms) * 1000); + SleepForMillis(flags()->atexit_sleep_ms); - { - // Wait for pending reports. - ScopedErrorReportLock lock; - } + // Wait for pending reports. + ctx->report_mtx.Lock(); + { ScopedErrorReportLock l; } + ctx->report_mtx.Unlock(); #if !SANITIZER_GO if (Verbosity()) AllocatorPrintStats(); @@ -720,13 +491,8 @@ int Finalize(ThreadState *thr) { #if !SANITIZER_GO void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { - // Detaching from the slot makes OnUserFree skip writing to the shadow. - // The slot will be locked so any attempts to use it will deadlock anyway. - SlotDetach(thr); - ctx->multi_slot_mtx.Lock(); - for (auto& slot : ctx->slots) slot.mtx.Lock(); ctx->thread_registry.Lock(); - ctx->slot_mtx.Lock(); + ctx->report_mtx.Lock(); ScopedErrorReportLock::Lock(); // Suppress all reports in the pthread_atfork callbacks. // Reports will deadlock on the report_mtx. @@ -739,34 +505,30 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { // On OS X libSystem_atfork_prepare/parent/child callbacks are called // after/before our callbacks and they call free. thr->ignore_interceptors++; - // Disables memory write in OnUserFree. - thr->ignore_reads_and_writes++; __tsan_test_only_on_fork(); } -static void ForkAfter(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS { +void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS { thr->suppress_reports--; // Enabled in ForkBefore. thr->ignore_interceptors--; - thr->ignore_reads_and_writes--; ScopedErrorReportLock::Unlock(); - ctx->slot_mtx.Unlock(); + ctx->report_mtx.Unlock(); ctx->thread_registry.Unlock(); - for (auto& slot : ctx->slots) slot.mtx.Unlock(); - ctx->multi_slot_mtx.Unlock(); - SlotAttachAndLock(thr); - SlotUnlock(thr); } -void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr); } +void ForkChildAfter(ThreadState *thr, uptr pc, + bool start_thread) NO_THREAD_SAFETY_ANALYSIS { + thr->suppress_reports--; // Enabled in ForkBefore. + thr->ignore_interceptors--; + ScopedErrorReportLock::Unlock(); + ctx->report_mtx.Unlock(); + ctx->thread_registry.Unlock(); -void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) { - ForkAfter(thr); - u32 nthread = ThreadCount(thr); - VPrintf(1, - "ThreadSanitizer: forked new process with pid %d," - " parent had %d threads\n", - (int)internal_getpid(), (int)nthread); + uptr nthread = 0; + ctx->thread_registry.GetNumberOfThreads(0, 0, &nthread /* alive threads */); + VPrintf(1, "ThreadSanitizer: forked new process with pid %d," + " parent had %d threads\n", (int)internal_getpid(), (int)nthread); if (nthread == 1) { if (start_thread) StartBackgroundThread(); @@ -776,7 +538,6 @@ void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) { // ignores for everything in the hope that we will exec soon. ctx->after_multithreaded_fork = true; thr->ignore_interceptors++; - thr->suppress_reports++; ThreadIgnoreBegin(thr, pc); ThreadIgnoreSyncBegin(thr, pc); } @@ -798,10 +559,8 @@ void GrowShadowStack(ThreadState *thr) { #endif StackID CurrentStackId(ThreadState *thr, uptr pc) { -#if !SANITIZER_GO if (!thr->is_inited) // May happen during bootstrap. return kInvalidStackID; -#endif if (pc != 0) { #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -819,72 +578,53 @@ StackID CurrentStackId(ThreadState *thr, uptr pc) { return id; } -static bool TraceSkipGap(ThreadState* thr) { +namespace v3 { + +NOINLINE +void TraceSwitchPart(ThreadState *thr) { Trace *trace = &thr->tctx->trace; Event *pos = reinterpret_cast(atomic_load_relaxed(&thr->trace_pos)); DCHECK_EQ(reinterpret_cast(pos + 1) & TracePart::kAlignment, 0); auto *part = trace->parts.Back(); - DPrintf("#%d: TraceSwitchPart enter trace=%p parts=%p-%p pos=%p\n", thr->tid, - trace, trace->parts.Front(), part, pos); - if (!part) - return false; - // We can get here when we still have space in the current trace part. - // The fast-path check in TraceAcquire has false positives in the middle of - // the part. Check if we are indeed at the end of the current part or not, - // and fill any gaps with NopEvent's. - Event* end = &part->events[TracePart::kSize]; - DCHECK_GE(pos, &part->events[0]); - DCHECK_LE(pos, end); - if (pos + 1 < end) { - if ((reinterpret_cast(pos) & TracePart::kAlignment) == - TracePart::kAlignment) + DPrintf("TraceSwitchPart part=%p pos=%p\n", part, pos); + if (part) { + // We can get here when we still have space in the current trace part. + // The fast-path check in TraceAcquire has false positives in the middle of + // the part. Check if we are indeed at the end of the current part or not, + // and fill any gaps with NopEvent's. + Event *end = &part->events[TracePart::kSize]; + DCHECK_GE(pos, &part->events[0]); + DCHECK_LE(pos, end); + if (pos + 1 < end) { + if ((reinterpret_cast(pos) & TracePart::kAlignment) == + TracePart::kAlignment) + *pos++ = NopEvent; *pos++ = NopEvent; - *pos++ = NopEvent; - DCHECK_LE(pos + 2, end); - atomic_store_relaxed(&thr->trace_pos, reinterpret_cast(pos)); - return true; + DCHECK_LE(pos + 2, end); + atomic_store_relaxed(&thr->trace_pos, reinterpret_cast(pos)); + // Ensure we setup trace so that the next TraceAcquire + // won't detect trace part end. + Event *ev; + CHECK(TraceAcquire(thr, &ev)); + return; + } + // We are indeed at the end. + for (; pos < end; pos++) *pos = NopEvent; } - // We are indeed at the end. - for (; pos < end; pos++) *pos = NopEvent; - return false; -} - -NOINLINE -void TraceSwitchPart(ThreadState* thr) { - if (TraceSkipGap(thr)) - return; #if !SANITIZER_GO if (ctx->after_multithreaded_fork) { // We just need to survive till exec. - TracePart* part = thr->tctx->trace.parts.Back(); - if (part) { - atomic_store_relaxed(&thr->trace_pos, - reinterpret_cast(&part->events[0])); - return; - } + CHECK(part); + atomic_store_relaxed(&thr->trace_pos, + reinterpret_cast(&part->events[0])); + return; } #endif - TraceSwitchPartImpl(thr); -} - -void TraceSwitchPartImpl(ThreadState* thr) { - SlotLocker locker(thr, true); - Trace* trace = &thr->tctx->trace; - TracePart* part = TracePartAlloc(thr); + part = new (MmapOrDie(sizeof(TracePart), "TracePart")) TracePart(); part->trace = trace; thr->trace_prev_pc = 0; - TracePart* recycle = nullptr; - // Keep roughly half of parts local to the thread - // (not queued into the recycle queue). - uptr local_parts = (Trace::kMinParts + flags()->history_size + 1) / 2; { Lock lock(&trace->mtx); - if (trace->parts.Empty()) - trace->local_head = part; - if (trace->parts.Size() >= local_parts) { - recycle = trace->local_head; - trace->local_head = trace->parts.Next(recycle); - } trace->parts.PushBack(part); atomic_store_relaxed(&thr->trace_pos, reinterpret_cast(&part->events[0])); @@ -892,45 +632,60 @@ void TraceSwitchPartImpl(ThreadState* thr) { // Make this part self-sufficient by restoring the current stack // and mutex set in the beginning of the trace. TraceTime(thr); - { - // Pathologically large stacks may not fit into the part. - // In these cases we log only fixed number of top frames. - const uptr kMaxFrames = 1000; - // Sanity check that kMaxFrames won't consume the whole part. - static_assert(kMaxFrames < TracePart::kSize / 2, "kMaxFrames is too big"); - uptr* pos = Max(&thr->shadow_stack[0], thr->shadow_stack_pos - kMaxFrames); - for (; pos < thr->shadow_stack_pos; pos++) { - if (TryTraceFunc(thr, *pos)) - continue; - CHECK(TraceSkipGap(thr)); - CHECK(TryTraceFunc(thr, *pos)); - } - } + for (uptr *pos = &thr->shadow_stack[0]; pos < thr->shadow_stack_pos; pos++) + CHECK(TryTraceFunc(thr, *pos)); for (uptr i = 0; i < thr->mset.Size(); i++) { MutexSet::Desc d = thr->mset.Get(i); - for (uptr i = 0; i < d.count; i++) - TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0, - d.addr, d.stack_id); - } - { - Lock lock(&ctx->slot_mtx); - ctx->slot_queue.Remove(thr->slot); - ctx->slot_queue.PushBack(thr->slot); - if (recycle) - ctx->trace_part_recycle.PushBack(recycle); + TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0, + d.addr, d.stack_id); } - DPrintf("#%d: TraceSwitchPart exit parts=%p-%p pos=0x%zx\n", thr->tid, - trace->parts.Front(), trace->parts.Back(), - atomic_load_relaxed(&thr->trace_pos)); +} + +} // namespace v3 + +void TraceSwitch(ThreadState *thr) { +#if !SANITIZER_GO + if (ctx->after_multithreaded_fork) + return; +#endif + thr->nomalloc++; + Trace *thr_trace = ThreadTrace(thr->tid); + Lock l(&thr_trace->mtx); + unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts(); + TraceHeader *hdr = &thr_trace->headers[trace]; + hdr->epoch0 = thr->fast_state.epoch(); + ObtainCurrentStack(thr, 0, &hdr->stack0); + hdr->mset0 = thr->mset; + thr->nomalloc--; +} + +Trace *ThreadTrace(Tid tid) { return (Trace *)GetThreadTraceHeader(tid); } + +uptr TraceTopPC(ThreadState *thr) { + Event *events = (Event*)GetThreadTrace(thr->tid); + uptr pc = events[thr->fast_state.GetTracePos()]; + return pc; +} + +uptr TraceSize() { + return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1)); +} + +uptr TraceParts() { + return TraceSize() / kTracePartSize; } #if !SANITIZER_GO -extern "C" void __tsan_trace_switch() {} +extern "C" void __tsan_trace_switch() { + TraceSwitch(cur_thread()); +} -extern "C" void __tsan_report_race() {} +extern "C" void __tsan_report_race() { + ReportRace(cur_thread()); +} #endif -void ThreadIgnoreBegin(ThreadState* thr, uptr pc) { +void ThreadIgnoreBegin(ThreadState *thr, uptr pc) { DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid); thr->ignore_reads_and_writes++; CHECK_GT(thr->ignore_reads_and_writes, 0); @@ -990,6 +745,7 @@ void build_consistency_debug() {} #else void build_consistency_release() {} #endif + } // namespace __tsan #if SANITIZER_CHECK_DEADLOCKS @@ -997,27 +753,18 @@ namespace __sanitizer { using namespace __tsan; MutexMeta mutex_meta[] = { {MutexInvalid, "Invalid", {}}, - {MutexThreadRegistry, - "ThreadRegistry", - {MutexTypeSlots, MutexTypeTrace, MutexTypeReport}}, - {MutexTypeReport, "Report", {MutexTypeTrace}}, - {MutexTypeSyncVar, "SyncVar", {MutexTypeReport, MutexTypeTrace}}, - {MutexTypeAnnotations, "Annotations", {MutexLeaf}}, - {MutexTypeAtExit, "AtExit", {}}, + {MutexThreadRegistry, "ThreadRegistry", {}}, + {MutexTypeTrace, "Trace", {MutexLeaf}}, + {MutexTypeReport, "Report", {MutexTypeSyncVar}}, + {MutexTypeSyncVar, "SyncVar", {}}, + {MutexTypeAnnotations, "Annotations", {}}, + {MutexTypeAtExit, "AtExit", {MutexTypeSyncVar}}, {MutexTypeFired, "Fired", {MutexLeaf}}, {MutexTypeRacy, "Racy", {MutexLeaf}}, - {MutexTypeGlobalProc, "GlobalProc", {MutexTypeSlot, MutexTypeSlots}}, - {MutexTypeTrace, "Trace", {}}, - {MutexTypeSlot, - "Slot", - {MutexMulti, MutexTypeTrace, MutexTypeSyncVar, MutexThreadRegistry, - MutexTypeSlots}}, - {MutexTypeSlots, "Slots", {MutexTypeTrace, MutexTypeReport}}, - {MutexTypeMultiSlot, "MultiSlot", {MutexTypeSlot, MutexTypeSlots}}, + {MutexTypeGlobalProc, "GlobalProc", {}}, {}, }; void PrintMutexPC(uptr pc) { StackTrace(&pc, 1).Print(); } - } // namespace __sanitizer #endif diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index 3175847a880ab..c71b27e1cbf58 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -38,7 +38,6 @@ #include "tsan_defs.h" #include "tsan_flags.h" #include "tsan_ignoreset.h" -#include "tsan_ilist.h" #include "tsan_mman.h" #include "tsan_mutexset.h" #include "tsan_platform.h" @@ -47,7 +46,6 @@ #include "tsan_stack_trace.h" #include "tsan_sync.h" #include "tsan_trace.h" -#include "tsan_vector_clock.h" #if SANITIZER_WORDSIZE != 64 # error "ThreadSanitizer is supported only on 64-bit platforms" @@ -118,6 +116,7 @@ struct Processor { #endif DenseSlabAllocCache block_cache; DenseSlabAllocCache sync_cache; + DenseSlabAllocCache clock_cache; DDPhysicalThread *dd_pt; }; @@ -131,56 +130,30 @@ struct ScopedGlobalProcessor { }; #endif -struct TidEpoch { - Tid tid; - Epoch epoch; -}; - -struct TidSlot { - Mutex mtx; - Sid sid; - atomic_uint32_t raw_epoch; - ThreadState *thr; - Vector journal; - INode node; - - Epoch epoch() const { - return static_cast(atomic_load(&raw_epoch, memory_order_relaxed)); - } - - void SetEpoch(Epoch v) { - atomic_store(&raw_epoch, static_cast(v), memory_order_relaxed); - } - - TidSlot(); -} ALIGNED(SANITIZER_CACHE_LINE_SIZE); - // This struct is stored in TLS. struct ThreadState { FastState fast_state; - int ignore_sync; -#if !SANITIZER_GO - int ignore_interceptors; -#endif - uptr *shadow_stack_pos; - - // Current position in tctx->trace.Back()->events (Event*). - atomic_uintptr_t trace_pos; - // PC of the last memory access, used to compute PC deltas in the trace. - uptr trace_prev_pc; - + // Synch epoch represents the threads's epoch before the last synchronization + // action. It allows to reduce number of shadow state updates. + // For example, fast_synch_epoch=100, last write to addr X was at epoch=150, + // if we are processing write to X from the same thread at epoch=200, + // we do nothing, because both writes happen in the same 'synch epoch'. + // That is, if another memory access does not race with the former write, + // it does not race with the latter as well. + // QUESTION: can we can squeeze this into ThreadState::Fast? + // E.g. ThreadState::Fast is a 44-bit, 32 are taken by synch_epoch and 12 are + // taken by epoch between synchs. + // This way we can save one load from tls. + u64 fast_synch_epoch; // Technically `current` should be a separate THREADLOCAL variable; // but it is placed here in order to share cache line with previous fields. ThreadState* current; - - atomic_sint32_t pending_signals; - - VectorClock clock; - // This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read. // We do not distinguish beteween ignoring reads and writes // for better performance. int ignore_reads_and_writes; + atomic_sint32_t pending_signals; + int ignore_sync; int suppress_reports; // Go does not support ignores. #if !SANITIZER_GO @@ -189,27 +162,31 @@ struct ThreadState { #endif uptr *shadow_stack; uptr *shadow_stack_end; + uptr *shadow_stack_pos; + RawShadow *racy_shadow_addr; + RawShadow racy_state[2]; + MutexSet mset; + ThreadClock clock; #if !SANITIZER_GO Vector jmp_bufs; - int in_symbolizer; + int ignore_interceptors; +#endif + const Tid tid; + const int unique_id; + bool in_symbolizer; bool in_ignored_lib; bool is_inited; -#endif - MutexSet mset; bool is_dead; - const Tid tid; - uptr stk_addr; - uptr stk_size; - uptr tls_addr; - uptr tls_size; + bool is_freeing; + bool is_vptr_access; + const uptr stk_addr; + const uptr stk_size; + const uptr tls_addr; + const uptr tls_size; ThreadContext *tctx; DDLogicalThread *dd_lt; - TidSlot *slot; - uptr slot_epoch; - bool slot_locked; - // Current wired Processor, or nullptr. Required to handle any events. Processor *proc1; #if !SANITIZER_GO @@ -223,7 +200,7 @@ struct ThreadState { #if !SANITIZER_GO StackID last_sleep_stack_id; - VectorClock last_sleep_clock; + ThreadClock last_sleep_clock; #endif // Set in regions of runtime that must be signal-safe and fork-safe. @@ -232,7 +209,16 @@ struct ThreadState { const ReportDesc *current_report; - explicit ThreadState(Tid tid); + // Current position in tctx->trace.Back()->events (Event*). + atomic_uintptr_t trace_pos; + // PC of the last memory access, used to compute PC deltas in the trace. + uptr trace_prev_pc; + Sid sid; + Epoch epoch; + + explicit ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch, + unsigned reuse_count, uptr stk_addr, uptr stk_size, + uptr tls_addr, uptr tls_size); } ALIGNED(SANITIZER_CACHE_LINE_SIZE); #if !SANITIZER_GO @@ -266,9 +252,14 @@ class ThreadContext final : public ThreadContextBase { ~ThreadContext(); ThreadState *thr; StackID creation_stack_id; - VectorClock *sync; - uptr sync_epoch; - Trace trace; + SyncClock sync; + // Epoch at which the thread had started. + // If we see an event from the thread stamped by an older epoch, + // the event is from a dead thread that shared tid with this thread. + u64 epoch0; + u64 epoch1; + + v3::Trace trace; // Override superclass callbacks. void OnDead() override; @@ -328,21 +319,7 @@ struct Context { Flags flags; fd_t memprof_fd; - // The last slot index (kFreeSid) is used to denote freed memory. - TidSlot slots[kThreadSlotCount - 1]; - - // Protects global_epoch, slot_queue, trace_part_recycle. Mutex slot_mtx; - // Prevents lock order inversions when we lock more than 1 slot. - Mutex multi_slot_mtx; - uptr global_epoch; // guarded by slot_mtx and by all slot mutexes - bool resetting; // global reset is in progress - IList slot_queue GUARDED_BY(slot_mtx); - IList trace_part_recycle - GUARDED_BY(slot_mtx); - uptr trace_part_total_allocated GUARDED_BY(slot_mtx); - uptr trace_part_recycle_finished GUARDED_BY(slot_mtx); - uptr trace_part_finished_excess GUARDED_BY(slot_mtx); }; extern Context *ctx; // The one and the only global runtime context. @@ -371,13 +348,14 @@ uptr TagFromShadowStackFrame(uptr pc); class ScopedReportBase { public: - void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, Tid tid, - StackTrace stack, const MutexSet *mset); + void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, StackTrace stack, + const MutexSet *mset); void AddStack(StackTrace stack, bool suppressable = false); void AddThread(const ThreadContext *tctx, bool suppressable = false); - void AddThread(Tid tid, bool suppressable = false); + void AddThread(Tid unique_tid, bool suppressable = false); void AddUniqueTid(Tid unique_tid); - int AddMutex(uptr addr, StackID creation_stack_id); + void AddMutex(const SyncVar *s); + u64 AddMutex(u64 id); void AddLocation(uptr addr, uptr size); void AddSleep(StackID stack_id); void SetCount(int count); @@ -394,6 +372,8 @@ class ScopedReportBase { // at best it will cause deadlocks on internal mutexes. ScopedIgnoreInterceptors ignore_interceptors_; + void AddDeadMutex(u64 id); + ScopedReportBase(const ScopedReportBase &) = delete; void operator=(const ScopedReportBase &) = delete; }; @@ -409,6 +389,8 @@ class ScopedReport : public ScopedReportBase { bool ShouldReport(ThreadState *thr, ReportType typ); ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack); +void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk, + MutexSet *mset, uptr *tag = nullptr); // The stack could look like: // |
| | tag | @@ -456,8 +438,7 @@ void ForkBefore(ThreadState *thr, uptr pc); void ForkParentAfter(ThreadState *thr, uptr pc); void ForkChildAfter(ThreadState *thr, uptr pc, bool start_thread); -void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, - AccessType typ); +void ReportRace(ThreadState *thr); bool OutputReport(ThreadState *thr, const ScopedReport &srep); bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace); bool IsExpectedReport(uptr addr, uptr size); @@ -487,28 +468,55 @@ int Finalize(ThreadState *thr); void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write); void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write); -void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, - AccessType typ); +void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, + int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic); +void MemoryAccessImpl(ThreadState *thr, uptr addr, + int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic, + u64 *shadow_mem, Shadow cur); +void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, + uptr size, bool is_write); void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, AccessType typ); -// This creates 2 non-inlined specialized versions of MemoryAccessRange. -template -void MemoryAccessRangeT(ThreadState *thr, uptr pc, uptr addr, uptr size); + +const int kSizeLog1 = 0; +const int kSizeLog2 = 1; +const int kSizeLog4 = 2; +const int kSizeLog8 = 3; ALWAYS_INLINE -void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, - bool is_write) { - if (size == 0) - return; - if (is_write) - MemoryAccessRangeT(thr, pc, addr, size); - else - MemoryAccessRangeT(thr, pc, addr, size); +void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, + AccessType typ) { + int size_log; + switch (size) { + case 1: + size_log = kSizeLog1; + break; + case 2: + size_log = kSizeLog2; + break; + case 4: + size_log = kSizeLog4; + break; + default: + DCHECK_EQ(size, 8); + size_log = kSizeLog8; + break; + } + bool is_write = !(typ & kAccessRead); + bool is_atomic = typ & kAccessAtomic; + if (typ & kAccessVptr) + thr->is_vptr_access = true; + if (typ & kAccessFree) + thr->is_freeing = true; + MemoryAccess(thr, pc, addr, size_log, is_write, is_atomic); + if (typ & kAccessVptr) + thr->is_vptr_access = false; + if (typ & kAccessFree) + thr->is_freeing = false; } -void ShadowSet(RawShadow *p, RawShadow *end, RawShadow v); -void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size); void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size); +void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size); void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size); void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size); @@ -518,6 +526,9 @@ void ThreadIgnoreEnd(ThreadState *thr); void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc); void ThreadIgnoreSyncEnd(ThreadState *thr); +void FuncEntry(ThreadState *thr, uptr pc); +void FuncExit(ThreadState *thr); + Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached); void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, ThreadType thread_type); @@ -563,7 +574,11 @@ void Release(ThreadState *thr, uptr pc, uptr addr); void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr); void ReleaseStore(ThreadState *thr, uptr pc, uptr addr); void AfterSleep(ThreadState *thr, uptr pc); -void IncrementEpoch(ThreadState *thr); +void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c); +void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c); +void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c); +void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c); +void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c); // The hacky call uses custom calling convention and an assembly thunk. // It is considerably faster that a normal call for the caller @@ -586,19 +601,43 @@ void IncrementEpoch(ThreadState *thr); #define HACKY_CALL(f) f() #endif +void TraceSwitch(ThreadState *thr); +uptr TraceTopPC(ThreadState *thr); +uptr TraceSize(); +uptr TraceParts(); +Trace *ThreadTrace(Tid tid); + +extern "C" void __tsan_trace_switch(); +void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs, + EventType typ, u64 addr) { + if (!kCollectHistory) + return; + // TraceSwitch accesses shadow_stack, but it's called infrequently, + // so we check it here proactively. + DCHECK(thr->shadow_stack); + DCHECK_GE((int)typ, 0); + DCHECK_LE((int)typ, 7); + DCHECK_EQ(GetLsb(addr, kEventPCBits), addr); + u64 pos = fs.GetTracePos(); + if (UNLIKELY((pos % kTracePartSize) == 0)) { +#if !SANITIZER_GO + HACKY_CALL(__tsan_trace_switch); +#else + TraceSwitch(thr); +#endif + } + Event *trace = (Event*)GetThreadTrace(fs.tid()); + Event *evp = &trace[pos]; + Event ev = (u64)addr | ((u64)typ << kEventPCBits); + *evp = ev; +} + #if !SANITIZER_GO uptr ALWAYS_INLINE HeapEnd() { return HeapMemEnd() + PrimaryAllocator::AdditionalSize(); } #endif -void SlotAttachAndLock(ThreadState *thr) ACQUIRE(thr->slot->mtx); -void SlotDetach(ThreadState *thr); -void SlotLock(ThreadState *thr) ACQUIRE(thr->slot->mtx); -void SlotUnlock(ThreadState *thr) RELEASE(thr->slot->mtx); -void DoReset(ThreadState *thr, uptr epoch); -void FlushShadowMemory(); - ThreadState *FiberCreate(ThreadState *thr, uptr pc, unsigned flags); void FiberDestroy(ThreadState *thr, uptr pc, ThreadState *fiber); void FiberSwitch(ThreadState *thr, uptr pc, ThreadState *fiber, unsigned flags); @@ -609,53 +648,6 @@ enum FiberSwitchFlags { FiberSwitchFlagNoSync = 1 << 0, // __tsan_switch_to_fiber_no_sync }; -class SlotPairLocker { - public: - SlotPairLocker(ThreadState *thr, Sid sid); - ~SlotPairLocker(); - - private: - ThreadState *thr_; - TidSlot *slot_; -}; - -class SlotLocker { - public: - ALWAYS_INLINE - SlotLocker(ThreadState *thr, bool recursive = false) - : thr_(thr), locked_(recursive ? thr->slot_locked : false) { - if (!locked_) - SlotLock(thr_); - } - - ALWAYS_INLINE - ~SlotLocker() { - if (!locked_) - SlotUnlock(thr_); - } - - private: - ThreadState *thr_; - bool locked_; -}; - -class SlotUnlocker { - public: - SlotUnlocker(ThreadState *thr) : thr_(thr), locked_(thr->slot_locked) { - if (locked_) - SlotUnlock(thr_); - } - - ~SlotUnlocker() { - if (locked_) - SlotLock(thr_); - } - - private: - ThreadState *thr_; - bool locked_; -}; - ALWAYS_INLINE void ProcessPendingSignals(ThreadState *thr) { if (UNLIKELY(atomic_load_relaxed(&thr->pending_signals))) ProcessPendingSignalsImpl(thr); @@ -674,19 +666,16 @@ void LazyInitialize(ThreadState *thr) { #endif } -void TraceResetForTesting(); +namespace v3 { + void TraceSwitchPart(ThreadState *thr); -void TraceSwitchPartImpl(ThreadState *thr); -bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, - AccessType typ, Tid *ptid, VarSizeStackTrace *pstk, +bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, + uptr size, AccessType typ, VarSizeStackTrace *pstk, MutexSet *pmset, uptr *ptag); template ALWAYS_INLINE WARN_UNUSED_RESULT bool TraceAcquire(ThreadState *thr, EventT **ev) { - // TraceSwitchPart accesses shadow_stack, but it's called infrequently, - // so we check it here proactively. - DCHECK(thr->shadow_stack); Event *pos = reinterpret_cast(atomic_load_relaxed(&thr->trace_pos)); #if SANITIZER_DEBUG // TraceSwitch acquires these mutexes, @@ -757,16 +746,20 @@ void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr, void TraceMutexUnlock(ThreadState *thr, uptr addr); void TraceTime(ThreadState *thr); -void TraceRestartFuncExit(ThreadState *thr); -void TraceRestartFuncEntry(ThreadState *thr, uptr pc); +} // namespace v3 void GrowShadowStack(ThreadState *thr); ALWAYS_INLINE void FuncEntry(ThreadState *thr, uptr pc) { - DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.sid(), (void *)pc); - if (UNLIKELY(!TryTraceFunc(thr, pc))) - return TraceRestartFuncEntry(thr, pc); + DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void *)pc); + if (kCollectHistory) { + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc); + } + + // Shadow stack maintenance can be replaced with + // stack unwinding during trace switch (which presumably must be faster). DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack); #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -780,9 +773,12 @@ void FuncEntry(ThreadState *thr, uptr pc) { ALWAYS_INLINE void FuncExit(ThreadState *thr) { - DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.sid()); - if (UNLIKELY(!TryTraceFunc(thr, 0))) - return TraceRestartFuncExit(thr); + DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid()); + if (kCollectHistory) { + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0); + } + DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack); #if !SANITIZER_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); @@ -794,6 +790,7 @@ void FuncExit(ThreadState *thr) { extern void (*on_initialize)(void); extern int (*on_finalize)(int); #endif + } // namespace __tsan #endif // TSAN_RTL_H diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp index 76e269e2ed2a2..7365fdaa30384 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp @@ -15,13 +15,15 @@ namespace __tsan { -ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc, +namespace v3 { + +ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, AccessType typ) { DCHECK(size == 1 || size == 2 || size == 4 || size == 8); if (!kCollectHistory) return true; - EventAccess* ev; + EventAccess *ev; if (UNLIKELY(!TraceAcquire(thr, &ev))) return false; u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3; @@ -38,27 +40,25 @@ ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc, TraceRelease(thr, ev); return true; } - auto* evex = reinterpret_cast(ev); + auto *evex = reinterpret_cast(ev); evex->is_access = 0; evex->is_func = 0; evex->type = EventType::kAccessExt; evex->is_read = !!(typ & kAccessRead); evex->is_atomic = !!(typ & kAccessAtomic); evex->size_log = size_log; - // Note: this is important, see comment in EventAccessExt. - evex->_ = 0; evex->addr = CompressAddr(addr); evex->pc = pc; TraceRelease(thr, evex); return true; } -ALWAYS_INLINE -bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, - AccessType typ) { +ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc, + uptr addr, uptr size, + AccessType typ) { if (!kCollectHistory) return true; - EventAccessRange* ev; + EventAccessRange *ev; if (UNLIKELY(!TraceAcquire(thr, &ev))) return false; thr->trace_prev_pc = pc; @@ -75,7 +75,7 @@ bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, return true; } -void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, +void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, AccessType typ) { if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ))) return; @@ -84,7 +84,7 @@ void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, DCHECK(res); } -void TraceFunc(ThreadState* thr, uptr pc) { +void TraceFunc(ThreadState *thr, uptr pc) { if (LIKELY(TryTraceFunc(thr, pc))) return; TraceSwitchPart(thr); @@ -92,17 +92,7 @@ void TraceFunc(ThreadState* thr, uptr pc) { DCHECK(res); } -NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) { - TraceSwitchPart(thr); - FuncEntry(thr, pc); -} - -NOINLINE void TraceRestartFuncExit(ThreadState* thr) { - TraceSwitchPart(thr); - FuncExit(thr); -} - -void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr, +void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr, StackID stk) { DCHECK(type == EventType::kLock || type == EventType::kRLock); if (!kCollectHistory) @@ -119,7 +109,7 @@ void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr, TraceEvent(thr, ev); } -void TraceMutexUnlock(ThreadState* thr, uptr addr) { +void TraceMutexUnlock(ThreadState *thr, uptr addr) { if (!kCollectHistory) return; EventUnlock ev; @@ -131,485 +121,396 @@ void TraceMutexUnlock(ThreadState* thr, uptr addr) { TraceEvent(thr, ev); } -void TraceTime(ThreadState* thr) { +void TraceTime(ThreadState *thr) { if (!kCollectHistory) return; - FastState fast_state = thr->fast_state; EventTime ev; ev.is_access = 0; ev.is_func = 0; ev.type = EventType::kTime; - ev.sid = static_cast(fast_state.sid()); - ev.epoch = static_cast(fast_state.epoch()); + ev.sid = static_cast(thr->sid); + ev.epoch = static_cast(thr->epoch); ev._ = 0; TraceEvent(thr, ev); } -ALWAYS_INLINE RawShadow LoadShadow(RawShadow* p) { - return static_cast( - atomic_load((atomic_uint32_t*)p, memory_order_relaxed)); -} +} // namespace v3 -ALWAYS_INLINE void StoreShadow(RawShadow* sp, RawShadow s) { - atomic_store((atomic_uint32_t*)sp, static_cast(s), memory_order_relaxed); +ALWAYS_INLINE +Shadow LoadShadow(u64 *p) { + u64 raw = atomic_load((atomic_uint64_t *)p, memory_order_relaxed); + return Shadow(raw); } -NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, - Shadow old, - AccessType typ) NO_THREAD_SAFETY_ANALYSIS { - // For the free shadow markers the first element (that contains kFreeSid) - // triggers the race, but the second element contains info about the freeing - // thread, take it. - if (old.sid() == kFreeSid) - old = Shadow(LoadShadow(&shadow_mem[1])); - // This prevents trapping on this address in future. - for (uptr i = 0; i < kShadowCnt; i++) - StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty); - // See the comment in MemoryRangeFreed as to why the slot is locked - // for free memory accesses. ReportRace must not be called with - // the slot locked because of the fork. But MemoryRangeFreed is not - // called during fork because fork sets ignore_reads_and_writes, - // so simply unlocking the slot should be fine. - if (typ & kAccessFree) - SlotUnlock(thr); - ReportRace(thr, shadow_mem, cur, Shadow(old), typ); - if (typ & kAccessFree) - SlotLock(thr); +ALWAYS_INLINE +void StoreShadow(u64 *sp, u64 s) { + atomic_store((atomic_uint64_t *)sp, s, memory_order_relaxed); } -#if !TSAN_VECTORIZE ALWAYS_INLINE -bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1, - AccessType typ) { - for (uptr i = 0; i < kShadowCnt; i++) { - auto old = LoadShadow(&s[i]); - if (!(typ & kAccessRead)) { - if (old == cur.raw()) - return true; - continue; - } - auto masked = static_cast(static_cast(old) | - static_cast(Shadow::kRodata)); - if (masked == cur.raw()) - return true; - if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { - if (old == Shadow::kRodata) - return true; - } - } - return false; +void StoreIfNotYetStored(u64 *sp, u64 *s) { + StoreShadow(sp, *s); + *s = 0; } +extern "C" void __tsan_report_race(); + ALWAYS_INLINE -bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, - int unused0, int unused1, AccessType typ) { - bool stored = false; - for (uptr idx = 0; idx < kShadowCnt; idx++) { - RawShadow* sp = &shadow_mem[idx]; - Shadow old(LoadShadow(sp)); - if (LIKELY(old.raw() == Shadow::kEmpty)) { - if (!(typ & kAccessCheckOnly) && !stored) - StoreShadow(sp, cur.raw()); - return false; - } - if (LIKELY(!(cur.access() & old.access()))) - continue; - if (LIKELY(cur.sid() == old.sid())) { - if (!(typ & kAccessCheckOnly) && - LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) { - StoreShadow(sp, cur.raw()); - stored = true; - } - continue; - } - if (LIKELY(old.IsBothReadsOrAtomic(typ))) - continue; - if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch())) - continue; - DoReportRace(thr, shadow_mem, cur, old, typ); - return true; - } - // We did not find any races and had already stored - // the current access info, so we are done. - if (LIKELY(stored)) - return false; - // Choose a random candidate slot and replace it. - uptr index = - atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt; - StoreShadow(&shadow_mem[index], cur.raw()); - return false; +void HandleRace(ThreadState *thr, u64 *shadow_mem, Shadow cur, Shadow old) { + thr->racy_state[0] = cur.raw(); + thr->racy_state[1] = old.raw(); + thr->racy_shadow_addr = shadow_mem; +#if !SANITIZER_GO + HACKY_CALL(__tsan_report_race); +#else + ReportRace(thr); +#endif } -# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0 - -#else /* !TSAN_VECTORIZE */ +static inline bool HappensBefore(Shadow old, ThreadState *thr) { + return thr->clock.get(old.TidWithIgnore()) >= old.epoch(); +} ALWAYS_INLINE -bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow, - m128 access, AccessType typ) { - // Note: we could check if there is a larger access of the same type, - // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes) - // and now do smaller reads/writes, these can also be considered as "same - // access". However, it will make the check more expensive, so it's unclear - // if it's worth it. But this would conserve trace space, so it's useful - // besides potential speed up. - if (!(typ & kAccessRead)) { - const m128 same = _mm_cmpeq_epi32(shadow, access); - return _mm_movemask_epi8(same); +void MemoryAccessImpl1(ThreadState *thr, uptr addr, int kAccessSizeLog, + bool kAccessIsWrite, bool kIsAtomic, u64 *shadow_mem, + Shadow cur) { + // This potentially can live in an MMX/SSE scratch register. + // The required intrinsics are: + // __m128i _mm_move_epi64(__m128i*); + // _mm_storel_epi64(u64*, __m128i); + u64 store_word = cur.raw(); + bool stored = false; + + // scan all the shadow values and dispatch to 4 categories: + // same, replace, candidate and race (see comments below). + // we consider only 3 cases regarding access sizes: + // equal, intersect and not intersect. initially I considered + // larger and smaller as well, it allowed to replace some + // 'candidates' with 'same' or 'replace', but I think + // it's just not worth it (performance- and complexity-wise). + + Shadow old(0); + + // It release mode we manually unroll the loop, + // because empirically gcc generates better code this way. + // However, we can't afford unrolling in debug mode, because the function + // consumes almost 4K of stack. Gtest gives only 4K of stack to death test + // threads, which is not enough for the unrolled loop. +#if SANITIZER_DEBUG + for (int idx = 0; idx < 4; idx++) { +# include "tsan_update_shadow_word.inc" + } +#else + int idx = 0; +# include "tsan_update_shadow_word.inc" + idx = 1; + if (stored) { +# include "tsan_update_shadow_word.inc" + } else { +# include "tsan_update_shadow_word.inc" } - // For reads we need to reset read bit in the shadow, - // because we need to match read with both reads and writes. - // Shadow::kRodata has only read bit set, so it does what we want. - // We also abuse it for rodata check to save few cycles - // since we already loaded Shadow::kRodata into a register. - // Reads from rodata can't race. - // Measurements show that they can be 10-20% of all memory accesses. - // Shadow::kRodata has epoch 0 which cannot appear in shadow normally - // (thread epochs start from 1). So the same read bit mask - // serves as rodata indicator. - const m128 read_mask = _mm_set1_epi32(static_cast(Shadow::kRodata)); - const m128 masked_shadow = _mm_or_si128(shadow, read_mask); - m128 same = _mm_cmpeq_epi32(masked_shadow, access); - // Range memory accesses check Shadow::kRodata before calling this, - // Shadow::kRodatas is not possible for free memory access - // and Go does not use Shadow::kRodata. - if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { - const m128 ro = _mm_cmpeq_epi32(shadow, read_mask); - same = _mm_or_si128(ro, same); + idx = 2; + if (stored) { +# include "tsan_update_shadow_word.inc" + } else { +# include "tsan_update_shadow_word.inc" } - return _mm_movemask_epi8(same); -} + idx = 3; + if (stored) { +# include "tsan_update_shadow_word.inc" + } else { +# include "tsan_update_shadow_word.inc" + } +#endif -NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, - u32 race_mask, m128 shadow, AccessType typ) { - // race_mask points which of the shadow elements raced with the current - // access. Extract that element. - CHECK_NE(race_mask, 0); - u32 old; - // Note: _mm_extract_epi32 index must be a constant value. - switch (__builtin_ffs(race_mask) / 4) { - case 0: - old = _mm_extract_epi32(shadow, 0); - break; - case 1: - old = _mm_extract_epi32(shadow, 1); - break; - case 2: - old = _mm_extract_epi32(shadow, 2); - break; - case 3: - old = _mm_extract_epi32(shadow, 3); - break; + // we did not find any races and had already stored + // the current access info, so we are done + if (LIKELY(stored)) + return; + // choose a random candidate slot and replace it + StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word); + return; +RACE: + HandleRace(thr, shadow_mem, cur, old); + return; +} + +void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size, + AccessType typ) { + DCHECK(!(typ & kAccessAtomic)); + const bool kAccessIsWrite = !(typ & kAccessRead); + const bool kIsAtomic = false; + while (size) { + int size1 = 1; + int kAccessSizeLog = kSizeLog1; + if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) { + size1 = 8; + kAccessSizeLog = kSizeLog8; + } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) { + size1 = 4; + kAccessSizeLog = kSizeLog4; + } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) { + size1 = 2; + kAccessSizeLog = kSizeLog2; + } + MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic); + addr += size1; + size -= size1; } - Shadow prev(static_cast(old)); - // For the free shadow markers the first element (that contains kFreeSid) - // triggers the race, but the second element contains info about the freeing - // thread, take it. - if (prev.sid() == kFreeSid) - prev = Shadow(static_cast(_mm_extract_epi32(shadow, 1))); - DoReportRace(thr, shadow_mem, cur, prev, typ); } ALWAYS_INLINE -bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, - m128 shadow, m128 access, AccessType typ) { - // Note: empty/zero slots don't intersect with any access. - const m128 zero = _mm_setzero_si128(); - const m128 mask_access = _mm_set1_epi32(0x000000ff); - const m128 mask_sid = _mm_set1_epi32(0x0000ff00); - const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000); - const m128 access_and = _mm_and_si128(access, shadow); - const m128 access_xor = _mm_xor_si128(access, shadow); - const m128 intersect = _mm_and_si128(access_and, mask_access); - const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero); - const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid); - const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero); - const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic); - const m128 no_race = - _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic); - const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero)); - if (UNLIKELY(race_mask)) - goto SHARED; - -STORE : { - if (typ & kAccessCheckOnly) - return false; - // We could also replace different sid's if access is the same, - // rw weaker and happens before. However, just checking access below - // is not enough because we also need to check that !both_read_or_atomic - // (reads from different sids can be concurrent). - // Theoretically we could replace smaller accesses with larger accesses, - // but it's unclear if it's worth doing. - const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff); - const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid); - const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero); - const m128 access_read_atomic = - _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30); - const m128 rw_weaker = - _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow); - const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker); - const int rewrite_mask = _mm_movemask_epi8(rewrite); - int index = __builtin_ffs(rewrite_mask); - if (UNLIKELY(index == 0)) { - const m128 empty = _mm_cmpeq_epi32(shadow, zero); - const int empty_mask = _mm_movemask_epi8(empty); - index = __builtin_ffs(empty_mask); - if (UNLIKELY(index == 0)) - index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16; +bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) { + Shadow cur(a); + for (uptr i = 0; i < kShadowCnt; i++) { + Shadow old(LoadShadow(&s[i])); + if (Shadow::Addr0AndSizeAreEqual(cur, old) && + old.TidWithIgnore() == cur.TidWithIgnore() && + old.epoch() > sync_epoch && old.IsAtomic() == cur.IsAtomic() && + old.IsRead() <= cur.IsRead()) + return true; } - StoreShadow(&shadow_mem[index / 4], cur.raw()); - // We could zero other slots determined by rewrite_mask. - // That would help other threads to evict better slots, - // but it's unclear if it's worth it. return false; } -SHARED: - m128 thread_epochs = _mm_set1_epi32(0x7fffffff); - // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32 - // indexes must be constants. -# define LOAD_EPOCH(idx) \ - if (LIKELY(race_mask & (1 << (idx * 4)))) { \ - u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \ - u16 epoch = static_cast(thr->clock.Get(static_cast(sid))); \ - thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \ - } - LOAD_EPOCH(0); - LOAD_EPOCH(1); - LOAD_EPOCH(2); - LOAD_EPOCH(3); -# undef LOAD_EPOCH - const m128 mask_epoch = _mm_set1_epi32(0x3fff0000); - const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch); - const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs); - const int concurrent_mask = _mm_movemask_epi8(concurrent); - if (LIKELY(concurrent_mask == 0)) - goto STORE; - - DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ); - return true; +#if TSAN_VECTORIZE +# define SHUF(v0, v1, i0, i1, i2, i3) \ + _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \ + _mm_castsi128_ps(v1), \ + (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64)) +ALWAYS_INLINE +bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) { + // This is an optimized version of ContainsSameAccessSlow. + // load current access into access[0:63] + const m128 access = _mm_cvtsi64_si128(a); + // duplicate high part of access in addr0: + // addr0[0:31] = access[32:63] + // addr0[32:63] = access[32:63] + // addr0[64:95] = access[32:63] + // addr0[96:127] = access[32:63] + const m128 addr0 = SHUF(access, access, 1, 1, 1, 1); + // load 4 shadow slots + const m128 shadow0 = _mm_load_si128((__m128i *)s); + const m128 shadow1 = _mm_load_si128((__m128i *)s + 1); + // load high parts of 4 shadow slots into addr_vect: + // addr_vect[0:31] = shadow0[32:63] + // addr_vect[32:63] = shadow0[96:127] + // addr_vect[64:95] = shadow1[32:63] + // addr_vect[96:127] = shadow1[96:127] + m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3); + if (!is_write) { + // set IsRead bit in addr_vect + const m128 rw_mask1 = _mm_cvtsi64_si128(1 << 15); + const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0); + addr_vect = _mm_or_si128(addr_vect, rw_mask); + } + // addr0 == addr_vect? + const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect); + // epoch1[0:63] = sync_epoch + const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch); + // epoch[0:31] = sync_epoch[0:31] + // epoch[32:63] = sync_epoch[0:31] + // epoch[64:95] = sync_epoch[0:31] + // epoch[96:127] = sync_epoch[0:31] + const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0); + // load low parts of shadow cell epochs into epoch_vect: + // epoch_vect[0:31] = shadow0[0:31] + // epoch_vect[32:63] = shadow0[64:95] + // epoch_vect[64:95] = shadow1[0:31] + // epoch_vect[96:127] = shadow1[64:95] + const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2); + // epoch_vect >= sync_epoch? + const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch); + // addr_res & epoch_res + const m128 res = _mm_and_si128(addr_res, epoch_res); + // mask[0] = res[7] + // mask[1] = res[15] + // ... + // mask[15] = res[127] + const int mask = _mm_movemask_epi8(res); + return mask != 0; } - -# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \ - const m128 access = _mm_set1_epi32(static_cast((cur).raw())); \ - const m128 shadow = _mm_load_si128(reinterpret_cast(shadow_mem)) #endif -char* DumpShadow(char* buf, RawShadow raw) { - if (raw == Shadow::kEmpty) { - internal_snprintf(buf, 64, "0"); - return buf; - } - Shadow s(raw); - AccessType typ; - s.GetAccess(nullptr, nullptr, &typ); - internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}", - static_cast(s.sid()), static_cast(s.epoch()), - s.access(), static_cast(typ)); - return buf; +ALWAYS_INLINE +bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) { +#if TSAN_VECTORIZE + bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write); + // NOTE: this check can fail if the shadow is concurrently mutated + // by other threads. But it still can be useful if you modify + // ContainsSameAccessFast and want to ensure that it's not completely broken. + // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write)); + return res; +#else + return ContainsSameAccessSlow(s, a, sync_epoch, is_write); +#endif } -// TryTrace* and TraceRestart* functions allow to turn memory access and func -// entry/exit callbacks into leaf functions with all associated performance -// benefits. These hottest callbacks do only 2 slow path calls: report a race -// and trace part switching. Race reporting is easy to turn into a tail call, we -// just always return from the runtime after reporting a race. But trace part -// switching is harder because it needs to be in the middle of callbacks. To -// turn it into a tail call we immidiately return after TraceRestart* functions, -// but TraceRestart* functions themselves recurse into the callback after -// switching trace part. As the result the hottest callbacks contain only tail -// calls, which effectively makes them leaf functions (can use all registers, -// no frame setup, etc). -NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr, - uptr size, AccessType typ) { - TraceSwitchPart(thr); - MemoryAccess(thr, pc, addr, size, typ); -} +ALWAYS_INLINE USED void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, + int kAccessSizeLog, bool kAccessIsWrite, + bool kIsAtomic) { + RawShadow *shadow_mem = MemToShadow(addr); + DPrintf2( + "#%d: MemoryAccess: @%p %p size=%d" + " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n", + (int)thr->fast_state.tid(), (void *)pc, (void *)addr, + (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem, + (uptr)shadow_mem[0], (uptr)shadow_mem[1], (uptr)shadow_mem[2], + (uptr)shadow_mem[3]); +#if SANITIZER_DEBUG + if (!IsAppMem(addr)) { + Printf("Access to non app mem %zx\n", addr); + DCHECK(IsAppMem(addr)); + } + if (!IsShadowMem(shadow_mem)) { + Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); + DCHECK(IsShadowMem(shadow_mem)); + } +#endif -ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr, - uptr size, AccessType typ) { - RawShadow* shadow_mem = MemToShadow(addr); - UNUSED char memBuf[4][64]; - DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid, - static_cast(thr->fast_state.sid()), - static_cast(thr->fast_state.epoch()), (void*)addr, size, - static_cast(typ), DumpShadow(memBuf[0], shadow_mem[0]), - DumpShadow(memBuf[1], shadow_mem[1]), - DumpShadow(memBuf[2], shadow_mem[2]), - DumpShadow(memBuf[3], shadow_mem[3])); + if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) { + // Access to .rodata section, no races here. + // Measurements show that it can be 10-20% of all memory accesses. + return; + } FastState fast_state = thr->fast_state; - Shadow cur(fast_state, addr, size, typ); - - LOAD_CURRENT_SHADOW(cur, shadow_mem); - if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) + if (UNLIKELY(fast_state.GetIgnoreBit())) { return; - if (UNLIKELY(fast_state.GetIgnoreBit())) - return; - if (!TryTraceMemoryAccess(thr, pc, addr, size, typ)) - return TraceRestartMemoryAccess(thr, pc, addr, size, typ); - CheckRaces(thr, shadow_mem, cur, shadow, access, typ); -} + } -NOINLINE -void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr, - uptr size, AccessType typ) { - TraceSwitchPart(thr); - UnalignedMemoryAccess(thr, pc, addr, size, typ); -} + Shadow cur(fast_state); + cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog); + cur.SetWrite(kAccessIsWrite); + cur.SetAtomic(kIsAtomic); -ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc, - uptr addr, uptr size, - AccessType typ) { - DCHECK_LE(size, 8); - FastState fast_state = thr->fast_state; - if (UNLIKELY(fast_state.GetIgnoreBit())) + if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch, + kAccessIsWrite))) { return; - RawShadow* shadow_mem = MemToShadow(addr); - bool traced = false; - uptr size1 = Min(size, RoundUp(addr + 1, kShadowCell) - addr); - { - Shadow cur(fast_state, addr, size1, typ); - LOAD_CURRENT_SHADOW(cur, shadow_mem); - if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) - goto SECOND; - if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) - return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); - traced = true; - if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) - return; } -SECOND: - uptr size2 = size - size1; - if (LIKELY(size2 == 0)) - return; - shadow_mem += kShadowCnt; - Shadow cur(fast_state, 0, size2, typ); - LOAD_CURRENT_SHADOW(cur, shadow_mem); - if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) - return; - if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) - return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); - CheckRaces(thr, shadow_mem, cur, shadow, access, typ); + + if (kCollectHistory) { + fast_state.IncrementEpoch(); + thr->fast_state = fast_state; + TraceAddEvent(thr, fast_state, EventTypeMop, pc); + cur.IncrementEpoch(); + } + + MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, + shadow_mem, cur); } -void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) { - DCHECK_LE(p, end); - DCHECK(IsShadowMem(p)); - DCHECK(IsShadowMem(end)); - UNUSED const uptr kAlign = kShadowCnt * kShadowSize; - DCHECK_EQ(reinterpret_cast(p) % kAlign, 0); - DCHECK_EQ(reinterpret_cast(end) % kAlign, 0); -#if !TSAN_VECTORIZE - for (; p < end; p += kShadowCnt) { - p[0] = v; - for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty; +// Called by MemoryAccessRange in tsan_rtl_thread.cpp +ALWAYS_INLINE USED void MemoryAccessImpl(ThreadState *thr, uptr addr, + int kAccessSizeLog, + bool kAccessIsWrite, bool kIsAtomic, + u64 *shadow_mem, Shadow cur) { + if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch, + kAccessIsWrite))) { + return; } -#else - m128 vv = _mm_setr_epi32( - static_cast(v), static_cast(Shadow::kEmpty), - static_cast(Shadow::kEmpty), static_cast(Shadow::kEmpty)); - m128* vp = reinterpret_cast(p); - m128* vend = reinterpret_cast(end); - for (; vp < vend; vp++) _mm_store_si128(vp, vv); -#endif + + MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic, + shadow_mem, cur); } -static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) { +static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size, + u64 val) { + (void)thr; + (void)pc; if (size == 0) return; - DCHECK_EQ(addr % kShadowCell, 0); - DCHECK_EQ(size % kShadowCell, 0); + // FIXME: fix me. + uptr offset = addr % kShadowCell; + if (offset) { + offset = kShadowCell - offset; + if (size <= offset) + return; + addr += offset; + size -= offset; + } + DCHECK_EQ(addr % 8, 0); // If a user passes some insane arguments (memset(0)), // let it just crash as usual. if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) return; - RawShadow* begin = MemToShadow(addr); - RawShadow* end = begin + size / kShadowCell * kShadowCnt; // Don't want to touch lots of shadow memory. // If a program maps 10MB stack, there is no need reset the whole range. + size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1); // UnmapOrDie/MmapFixedNoReserve does not work on Windows. - if (SANITIZER_WINDOWS || - size <= common_flags()->clear_shadow_mmap_threshold) { - ShadowSet(begin, end, val); - return; - } - // The region is big, reset only beginning and end. - const uptr kPageSize = GetPageSizeCached(); - // Set at least first kPageSize/2 to page boundary. - RawShadow* mid1 = - Min(end, reinterpret_cast(RoundUp( - reinterpret_cast(begin) + kPageSize / 2, kPageSize))); - ShadowSet(begin, mid1, val); - // Reset middle part. - RawShadow* mid2 = RoundDown(end, kPageSize); - if (mid2 > mid1) { - if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1)) + if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) { + RawShadow *p = MemToShadow(addr); + CHECK(IsShadowMem(p)); + CHECK(IsShadowMem(p + size * kShadowCnt / kShadowCell - 1)); + // FIXME: may overwrite a part outside the region + for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) { + p[i++] = val; + for (uptr j = 1; j < kShadowCnt; j++) p[i++] = 0; + } + } else { + // The region is big, reset only beginning and end. + const uptr kPageSize = GetPageSizeCached(); + RawShadow *begin = MemToShadow(addr); + RawShadow *end = begin + size / kShadowCell * kShadowCnt; + RawShadow *p = begin; + // Set at least first kPageSize/2 to page boundary. + while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) { + *p++ = val; + for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0; + } + // Reset middle part. + RawShadow *p1 = p; + p = RoundDown(end, kPageSize); + if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1)) Die(); + // Set the ending. + while (p < end) { + *p++ = val; + for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0; + } } - // Set the ending. - ShadowSet(mid2, end, val); } -void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) { - uptr addr1 = RoundDown(addr, kShadowCell); - uptr size1 = RoundUp(size + addr - addr1, kShadowCell); - MemoryRangeSet(addr1, size1, Shadow::kEmpty); +void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) { + MemoryRangeSet(thr, pc, addr, size, 0); } -void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) { - // Callers must lock the slot to ensure synchronization with the reset. - // The problem with "freed" memory is that it's not "monotonic" - // with respect to bug detection: freed memory is bad to access, - // but then if the heap block is reallocated later, it's good to access. - // As the result a garbage "freed" shadow can lead to a false positive - // if it happens to match a real free in the thread trace, - // but the heap block was reallocated before the current memory access, - // so it's still good to access. It's not the case with data races. - DCHECK(thr->slot_locked); - DCHECK_EQ(addr % kShadowCell, 0); - size = RoundUp(size, kShadowCell); - // Processing more than 1k (2k of shadow) is expensive, +void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) { + // Processing more than 1k (4k of shadow) is expensive, // can cause excessive memory consumption (user does not necessary touch // the whole range) and most likely unnecessary. - size = Min(size, 1024); - const AccessType typ = - kAccessWrite | kAccessFree | kAccessCheckOnly | kAccessNoRodata; - TraceMemoryAccessRange(thr, pc, addr, size, typ); - RawShadow* shadow_mem = MemToShadow(addr); - Shadow cur(thr->fast_state, 0, kShadowCell, typ); -#if TSAN_VECTORIZE - const m128 access = _mm_set1_epi32(static_cast(cur.raw())); - const m128 freed = _mm_setr_epi32( - static_cast(Shadow::FreedMarker()), - static_cast(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0); - for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { - const m128 shadow = _mm_load_si128((m128*)shadow_mem); - if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) - return; - _mm_store_si128((m128*)shadow_mem, freed); + if (size > 1024) + size = 1024; + CHECK_EQ(thr->is_freeing, false); + thr->is_freeing = true; + MemoryAccessRange(thr, pc, addr, size, true); + thr->is_freeing = false; + if (kCollectHistory) { + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); } -#else - for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { - if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ))) - return; - StoreShadow(&shadow_mem[0], Shadow::FreedMarker()); - StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch())); - StoreShadow(&shadow_mem[2], Shadow::kEmpty); - StoreShadow(&shadow_mem[3], Shadow::kEmpty); + Shadow s(thr->fast_state); + s.ClearIgnoreBit(); + s.MarkAsFreed(); + s.SetWrite(true); + s.SetAddr0AndSizeLog(0, 3); + MemoryRangeSet(thr, pc, addr, size, s.raw()); +} + +void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) { + if (kCollectHistory) { + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc); } -#endif -} - -void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) { - DCHECK_EQ(addr % kShadowCell, 0); - size = RoundUp(size, kShadowCell); - TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite); - Shadow cur(thr->fast_state, 0, 8, kAccessWrite); - MemoryRangeSet(addr, size, cur.raw()); + Shadow s(thr->fast_state); + s.ClearIgnoreBit(); + s.SetWrite(true); + s.SetAddr0AndSizeLog(0, 3); + MemoryRangeSet(thr, pc, addr, size, s.raw()); } -void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr, +void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) { if (thr->ignore_reads_and_writes == 0) MemoryRangeImitateWrite(thr, pc, addr, size); @@ -617,29 +518,14 @@ void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr, MemoryResetRange(thr, pc, addr, size); } -ALWAYS_INLINE -bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, - AccessType typ) { - LOAD_CURRENT_SHADOW(cur, shadow_mem); - if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) - return false; - return CheckRaces(thr, shadow_mem, cur, shadow, access, typ); -} - -template -NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, - uptr size) { - TraceSwitchPart(thr); - MemoryAccessRangeT(thr, pc, addr, size); -} +void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size, + bool is_write) { + if (size == 0) + return; -template -void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) { - const AccessType typ = - (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata; - RawShadow* shadow_mem = MemToShadow(addr); - DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid, - (void*)pc, (void*)addr, (int)size, is_read); + RawShadow *shadow_mem = MemToShadow(addr); + DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n", thr->tid, + (void *)pc, (void *)addr, (int)size, is_write); #if SANITIZER_DEBUG if (!IsAppMem(addr)) { @@ -651,57 +537,65 @@ void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) { DCHECK(IsAppMem(addr + size - 1)); } if (!IsShadowMem(shadow_mem)) { - Printf("Bad shadow addr %p (%zx)\n", static_cast(shadow_mem), addr); + Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr); DCHECK(IsShadowMem(shadow_mem)); } - if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) { - Printf("Bad shadow addr %p (%zx)\n", - static_cast(shadow_mem + size * kShadowCnt - 1), + if (!IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)) { + Printf("Bad shadow addr %p (%zx)\n", shadow_mem + size * kShadowCnt / 8 - 1, addr + size - 1); - DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1)); + DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)); } #endif - // Access to .rodata section, no races here. - // Measurements show that it can be 10-20% of all memory accesses. - if (is_read && *shadow_mem == Shadow::kRodata) + if (*shadow_mem == kShadowRodata) { + DCHECK(!is_write); + // Access to .rodata section, no races here. + // Measurements show that it can be 10-20% of all memory accesses. return; + } FastState fast_state = thr->fast_state; - if (UNLIKELY(fast_state.GetIgnoreBit())) + if (fast_state.GetIgnoreBit()) return; - if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) - return RestartMemoryAccessRange(thr, pc, addr, size); + fast_state.IncrementEpoch(); + thr->fast_state = fast_state; + TraceAddEvent(thr, fast_state, EventTypeMop, pc); - if (UNLIKELY(addr % kShadowCell)) { - // Handle unaligned beginning, if any. - uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr); - size -= size1; - Shadow cur(fast_state, addr, size1, typ); - if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) - return; - shadow_mem += kShadowCnt; + bool unaligned = (addr % kShadowCell) != 0; + + // Handle unaligned beginning, if any. + for (; addr % kShadowCell && size; addr++, size--) { + int const kAccessSizeLog = 0; + Shadow cur(fast_state); + cur.SetWrite(is_write); + cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog); + MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem, + cur); } + if (unaligned) + shadow_mem += kShadowCnt; // Handle middle part, if any. - Shadow cur(fast_state, 0, kShadowCell, typ); - for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) { - if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) - return; + for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) { + int const kAccessSizeLog = 3; + Shadow cur(fast_state); + cur.SetWrite(is_write); + cur.SetAddr0AndSizeLog(0, kAccessSizeLog); + MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem, + cur); + shadow_mem += kShadowCnt; } // Handle ending, if any. - if (UNLIKELY(size)) { - Shadow cur(fast_state, 0, size, typ); - if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) - return; + for (; size; addr++, size--) { + int const kAccessSizeLog = 0; + Shadow cur(fast_state); + cur.SetWrite(is_write); + cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog); + MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem, + cur); } } -template void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, - uptr size); -template void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, - uptr size); - } // namespace __tsan #if !SANITIZER_GO diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp index 5ca2e4fca827d..7d6b41116aa6f 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp @@ -23,8 +23,6 @@ namespace __tsan { void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r); -void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr, - FastState last_lock, StackID creation_stack_id); struct Callback final : public DDCallback { ThreadState *thr; @@ -38,17 +36,17 @@ struct Callback final : public DDCallback { } StackID Unwind() override { return CurrentStackId(thr, pc); } - int UniqueTid() override { return thr->tid; } + int UniqueTid() override { return thr->unique_id; } }; void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s) { Callback cb(thr, pc); ctx->dd->MutexInit(&cb, &s->dd); - s->dd.ctx = s->addr; + s->dd.ctx = s->GetId(); } static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ, - uptr addr, StackID creation_stack_id) { + uptr addr, u64 mid) { // In Go, these misuses are either impossible, or detected by std lib, // or false positives (e.g. unlock in a different thread). if (SANITIZER_GO) @@ -57,7 +55,7 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ, return; ThreadRegistryLock l(&ctx->thread_registry); ScopedReport rep(typ); - rep.AddMutex(addr, creation_stack_id); + rep.AddMutex(mid); VarSizeStackTrace trace; ObtainCurrentStack(thr, pc, &trace); rep.AddStack(trace, true); @@ -65,93 +63,95 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ, OutputReport(thr, rep); } -static void RecordMutexLock(ThreadState *thr, uptr pc, uptr addr, - StackID stack_id, bool write) { - auto typ = write ? EventType::kLock : EventType::kRLock; - // Note: it's important to trace before modifying mutex set - // because tracing can switch trace part and we write the current - // mutex set in the beginning of each part. - // If we do it in the opposite order, we will write already reduced - // mutex set in the beginning of the part and then trace unlock again. - TraceMutexLock(thr, typ, pc, addr, stack_id); - thr->mset.AddAddr(addr, stack_id, write); -} - -static void RecordMutexUnlock(ThreadState *thr, uptr addr) { - // See the comment in RecordMutexLock re order of operations. - TraceMutexUnlock(thr, addr); - thr->mset.DelAddr(addr); -} - void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexCreate %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (!(flagz & MutexFlagLinkerInit) && pc && IsAppMem(addr)) + if (!(flagz & MutexFlagLinkerInit) && IsAppMem(addr)) { + CHECK(!thr->is_freeing); + thr->is_freeing = true; MemoryAccess(thr, pc, addr, 1, kAccessWrite); - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + thr->is_freeing = false; + } + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + Lock l(&s->mtx); s->SetFlags(flagz & MutexCreationFlagMask); // Save stack in the case the sync object was created before as atomic. - if (!SANITIZER_GO && s->creation_stack_id == kInvalidStackID) + if (!SANITIZER_GO && s->creation_stack_id == 0) s->creation_stack_id = CurrentStackId(thr, pc); } void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexDestroy %zx\n", thr->tid, addr); bool unlock_locked = false; - StackID creation_stack_id; - FastState last_lock; + u64 mid = 0; + u64 last_lock = 0; { - auto s = ctx->metamap.GetSyncIfExists(addr); - if (!s) + SyncVar *s = ctx->metamap.GetSyncIfExists(addr); + if (s == 0) return; - SlotLocker locker(thr); - { - Lock lock(&s->mtx); - creation_stack_id = s->creation_stack_id; - last_lock = s->last_lock; - if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) || - ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) { - // Destroy is no-op for linker-initialized mutexes. - return; - } - if (common_flags()->detect_deadlocks) { - Callback cb(thr, pc); - ctx->dd->MutexDestroy(&cb, &s->dd); - ctx->dd->MutexInit(&cb, &s->dd); - } - if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid && - !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - unlock_locked = true; - } - s->Reset(); + Lock l(&s->mtx); + if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) || + ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) { + // Destroy is no-op for linker-initialized mutexes. + return; + } + if (common_flags()->detect_deadlocks) { + Callback cb(thr, pc); + ctx->dd->MutexDestroy(&cb, &s->dd); + ctx->dd->MutexInit(&cb, &s->dd); + } + if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid && + !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + unlock_locked = true; + } + mid = s->GetId(); + last_lock = s->last_lock; + if (!unlock_locked) + s->Reset(thr->proc()); // must not reset it before the report is printed + } + if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) { + ThreadRegistryLock l(&ctx->thread_registry); + ScopedReport rep(ReportTypeMutexDestroyLocked); + rep.AddMutex(mid); + VarSizeStackTrace trace; + ObtainCurrentStack(thr, pc, &trace); + rep.AddStack(trace, true); + FastState last(last_lock); + RestoreStack(last.tid(), last.epoch(), &trace, 0); + rep.AddStack(trace, true); + rep.AddLocation(addr, 1); + OutputReport(thr, rep); + + SyncVar *s = ctx->metamap.GetSyncIfExists(addr); + if (s != 0) { + Lock l(&s->mtx); + s->Reset(thr->proc()); } - // Imitate a memory write to catch unlock-destroy races. - if (pc && IsAppMem(addr)) - MemoryAccess(thr, pc, addr, 1, kAccessWrite | kAccessFree); } - if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) - ReportDestroyLocked(thr, pc, addr, last_lock, creation_stack_id); - thr->mset.DelAddr(addr, true); + thr->mset.Remove(mid); + // Imitate a memory write to catch unlock-destroy races. + // Do this outside of sync mutex, because it can report a race which locks + // sync mutexes. + if (IsAppMem(addr)) + MemoryAccess(thr, pc, addr, 1, kAccessWrite | kAccessFree); // s will be destroyed and freed in MetaMap::FreeBlock. } void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexPreLock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (flagz & MutexFlagTryLock) - return; - if (!common_flags()->detect_deadlocks) - return; - Callback cb(thr, pc); - { - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - ReadLock lock(&s->mtx); - s->UpdateFlags(flagz); - if (s->owner_tid != thr->tid) - ctx->dd->MutexBeforeLock(&cb, &s->dd, true); + if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) { + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + { + ReadLock l(&s->mtx); + s->UpdateFlags(flagz); + if (s->owner_tid != thr->tid) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeLock(&cb, &s->dd, true); + } + } + Callback cb(thr, pc); + ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); } - ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); } void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) { @@ -161,51 +161,48 @@ void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) { CHECK_GT(rec, 0); else rec = 1; - if (pc && IsAppMem(addr)) + if (IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - bool report_double_lock = false; + u64 mid = 0; bool pre_lock = false; bool first = false; - StackID creation_stack_id = kInvalidStackID; + bool report_double_lock = false; { - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - creation_stack_id = s->creation_stack_id; - RecordMutexLock(thr, pc, addr, creation_stack_id, true); - { - Lock lock(&s->mtx); - first = s->recursion == 0; - s->UpdateFlags(flagz); - if (s->owner_tid == kInvalidTid) { - CHECK_EQ(s->recursion, 0); - s->owner_tid = thr->tid; - s->last_lock = thr->fast_state; - } else if (s->owner_tid == thr->tid) { - CHECK_GT(s->recursion, 0); - } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_double_lock = true; - } - s->recursion += rec; - if (first) { - if (!thr->ignore_sync) { - thr->clock.Acquire(s->clock); - thr->clock.Acquire(s->read_clock); - } - } - if (first && common_flags()->detect_deadlocks) { - pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) && - !(flagz & MutexFlagTryLock); - Callback cb(thr, pc); - if (pre_lock) - ctx->dd->MutexBeforeLock(&cb, &s->dd, true); - ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock); - } + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + Lock l(&s->mtx); + s->UpdateFlags(flagz); + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId()); + if (s->owner_tid == kInvalidTid) { + CHECK_EQ(s->recursion, 0); + s->owner_tid = thr->tid; + s->last_lock = thr->fast_state.raw(); + } else if (s->owner_tid == thr->tid) { + CHECK_GT(s->recursion, 0); + } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_double_lock = true; + } + first = s->recursion == 0; + s->recursion += rec; + if (first) { + AcquireImpl(thr, pc, &s->clock); + AcquireImpl(thr, pc, &s->read_clock); + } else if (!s->IsFlagSet(MutexFlagWriteReentrant)) { } + thr->mset.Add(s->GetId(), true, thr->fast_state.epoch()); + if (first && common_flags()->detect_deadlocks) { + pre_lock = + (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock); + Callback cb(thr, pc); + if (pre_lock) + ctx->dd->MutexBeforeLock(&cb, &s->dd, true); + ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock); + } + mid = s->GetId(); } if (report_double_lock) - ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, - creation_stack_id); + ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, mid); if (first && pre_lock && common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -214,47 +211,40 @@ void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) { int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexUnlock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (pc && IsAppMem(addr)) + if (IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - StackID creation_stack_id; - RecordMutexUnlock(thr, addr); + u64 mid = 0; bool report_bad_unlock = false; int rec = 0; { - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - bool released = false; - { - Lock lock(&s->mtx); - creation_stack_id = s->creation_stack_id; - if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) { - if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_unlock = true; - } - } else { - rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1; - s->recursion -= rec; - if (s->recursion == 0) { - s->owner_tid = kInvalidTid; - if (!thr->ignore_sync) { - thr->clock.ReleaseStore(&s->clock); - released = true; - } - } + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + Lock l(&s->mtx); + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId()); + if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) { + if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_unlock = true; } - if (common_flags()->detect_deadlocks && s->recursion == 0 && - !report_bad_unlock) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true); + } else { + rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1; + s->recursion -= rec; + if (s->recursion == 0) { + s->owner_tid = kInvalidTid; + ReleaseStoreImpl(thr, pc, &s->clock); + } else { } } - if (released) - IncrementEpoch(thr); + thr->mset.Del(s->GetId(), true); + if (common_flags()->detect_deadlocks && s->recursion == 0 && + !report_bad_unlock) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true); + } + mid = s->GetId(); } if (report_bad_unlock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, - creation_stack_id); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid); if (common_flags()->detect_deadlocks && !report_bad_unlock) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -264,56 +254,53 @@ int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexPreReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if ((flagz & MutexFlagTryLock) || !common_flags()->detect_deadlocks) - return; - Callback cb(thr, pc); - { - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - ReadLock lock(&s->mtx); - s->UpdateFlags(flagz); - ctx->dd->MutexBeforeLock(&cb, &s->dd, false); + if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) { + { + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + ReadLock l(&s->mtx); + s->UpdateFlags(flagz); + Callback cb(thr, pc); + ctx->dd->MutexBeforeLock(&cb, &s->dd, false); + } + Callback cb(thr, pc); + ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); } - ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); } void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { DPrintf("#%d: MutexPostReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz); - if (pc && IsAppMem(addr)) + if (IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); + u64 mid = 0; bool report_bad_lock = false; bool pre_lock = false; - StackID creation_stack_id = kInvalidStackID; { - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - creation_stack_id = s->creation_stack_id; - RecordMutexLock(thr, pc, addr, creation_stack_id, false); - { - ReadLock lock(&s->mtx); - s->UpdateFlags(flagz); - if (s->owner_tid != kInvalidTid) { - if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_lock = true; - } - } - if (!thr->ignore_sync) - thr->clock.Acquire(s->clock); - s->last_lock = thr->fast_state; - if (common_flags()->detect_deadlocks) { - pre_lock = (flagz & MutexFlagDoPreLockOnPostLock) && - !(flagz & MutexFlagTryLock); - Callback cb(thr, pc); - if (pre_lock) - ctx->dd->MutexBeforeLock(&cb, &s->dd, false); - ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + ReadLock l(&s->mtx); + s->UpdateFlags(flagz); + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId()); + if (s->owner_tid != kInvalidTid) { + if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_lock = true; } } + AcquireImpl(thr, pc, &s->clock); + s->last_lock = thr->fast_state.raw(); + thr->mset.Add(s->GetId(), false, thr->fast_state.epoch()); + if (common_flags()->detect_deadlocks) { + pre_lock = + (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock); + Callback cb(thr, pc); + if (pre_lock) + ctx->dd->MutexBeforeLock(&cb, &s->dd, false); + ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock); + } + mid = s->GetId(); } if (report_bad_lock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, - creation_stack_id); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, mid); if (pre_lock && common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -322,39 +309,31 @@ void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) { void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexReadUnlock %zx\n", thr->tid, addr); - if (pc && IsAppMem(addr)) + if (IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - RecordMutexUnlock(thr, addr); - StackID creation_stack_id; + u64 mid = 0; bool report_bad_unlock = false; { - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - bool released = false; - { - Lock lock(&s->mtx); - creation_stack_id = s->creation_stack_id; - if (s->owner_tid != kInvalidTid) { - if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_unlock = true; - } - } - if (!thr->ignore_sync) { - thr->clock.Release(&s->read_clock); - released = true; - } - if (common_flags()->detect_deadlocks && s->recursion == 0) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + Lock l(&s->mtx); + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId()); + if (s->owner_tid != kInvalidTid) { + if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_unlock = true; } } - if (released) - IncrementEpoch(thr); + ReleaseImpl(thr, pc, &s->read_clock); + if (common_flags()->detect_deadlocks && s->recursion == 0) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false); + } + mid = s->GetId(); } + thr->mset.Del(mid, false); if (report_bad_unlock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr, - creation_stack_id); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr, mid); if (common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -363,52 +342,44 @@ void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) { void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexReadOrWriteUnlock %zx\n", thr->tid, addr); - if (pc && IsAppMem(addr)) + if (IsAppMem(addr)) MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic); - RecordMutexUnlock(thr, addr); - StackID creation_stack_id; + u64 mid = 0; bool report_bad_unlock = false; - bool write = true; { - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - bool released = false; - { - Lock lock(&s->mtx); - creation_stack_id = s->creation_stack_id; - if (s->owner_tid == kInvalidTid) { - // Seems to be read unlock. - write = false; - if (!thr->ignore_sync) { - thr->clock.Release(&s->read_clock); - released = true; - } - } else if (s->owner_tid == thr->tid) { - // Seems to be write unlock. - CHECK_GT(s->recursion, 0); - s->recursion--; - if (s->recursion == 0) { - s->owner_tid = kInvalidTid; - if (!thr->ignore_sync) { - thr->clock.ReleaseStore(&s->clock); - released = true; - } - } - } else if (!s->IsFlagSet(MutexFlagBroken)) { - s->SetFlags(MutexFlagBroken); - report_bad_unlock = true; - } - if (common_flags()->detect_deadlocks && s->recursion == 0) { - Callback cb(thr, pc); - ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + Lock l(&s->mtx); + bool write = true; + if (s->owner_tid == kInvalidTid) { + // Seems to be read unlock. + write = false; + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId()); + ReleaseImpl(thr, pc, &s->read_clock); + } else if (s->owner_tid == thr->tid) { + // Seems to be write unlock. + thr->fast_state.IncrementEpoch(); + TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId()); + CHECK_GT(s->recursion, 0); + s->recursion--; + if (s->recursion == 0) { + s->owner_tid = kInvalidTid; + ReleaseStoreImpl(thr, pc, &s->clock); + } else { } + } else if (!s->IsFlagSet(MutexFlagBroken)) { + s->SetFlags(MutexFlagBroken); + report_bad_unlock = true; + } + thr->mset.Del(s->GetId(), write); + if (common_flags()->detect_deadlocks && s->recursion == 0) { + Callback cb(thr, pc); + ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write); } - if (released) - IncrementEpoch(thr); + mid = s->GetId(); } if (report_bad_unlock) - ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, - creation_stack_id); + ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid); if (common_flags()->detect_deadlocks) { Callback cb(thr, pc); ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb)); @@ -417,120 +388,151 @@ void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) { void MutexRepair(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexRepair %zx\n", thr->tid, addr); - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - Lock lock(&s->mtx); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + Lock l(&s->mtx); s->owner_tid = kInvalidTid; s->recursion = 0; } void MutexInvalidAccess(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: MutexInvalidAccess %zx\n", thr->tid, addr); - StackID creation_stack_id = kInvalidStackID; - { - SlotLocker locker(thr); - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); - if (s) - creation_stack_id = s->creation_stack_id; - } - ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, - creation_stack_id); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true); + ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, s->GetId()); } void Acquire(ThreadState *thr, uptr pc, uptr addr) { DPrintf("#%d: Acquire %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - auto s = ctx->metamap.GetSyncIfExists(addr); + SyncVar *s = ctx->metamap.GetSyncIfExists(addr); if (!s) return; - SlotLocker locker(thr); - if (!s->clock) - return; - ReadLock lock(&s->mtx); - thr->clock.Acquire(s->clock); + ReadLock l(&s->mtx); + AcquireImpl(thr, pc, &s->clock); +} + +static void UpdateClockCallback(ThreadContextBase *tctx_base, void *arg) { + ThreadState *thr = reinterpret_cast(arg); + ThreadContext *tctx = static_cast(tctx_base); + u64 epoch = tctx->epoch1; + if (tctx->status == ThreadStatusRunning) { + epoch = tctx->thr->fast_state.epoch(); + tctx->thr->clock.NoteGlobalAcquire(epoch); + } + thr->clock.set(&thr->proc()->clock_cache, tctx->tid, epoch); } void AcquireGlobal(ThreadState *thr) { DPrintf("#%d: AcquireGlobal\n", thr->tid); if (thr->ignore_sync) return; - SlotLocker locker(thr); - for (auto &slot : ctx->slots) thr->clock.Set(slot.sid, slot.epoch()); + ThreadRegistryLock l(&ctx->thread_registry); + ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateClockCallback, thr); } -void Release(ThreadState *thr, uptr pc, uptr addr) { - DPrintf("#%d: Release %zx\n", thr->tid, addr); +void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) { + DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - SlotLocker locker(thr); - { - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); - Lock lock(&s->mtx); - thr->clock.Release(&s->clock); - } - IncrementEpoch(thr); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); + Lock l(&s->mtx); + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + ReleaseStoreAcquireImpl(thr, pc, &s->clock); } -void ReleaseStore(ThreadState *thr, uptr pc, uptr addr) { - DPrintf("#%d: ReleaseStore %zx\n", thr->tid, addr); +void Release(ThreadState *thr, uptr pc, uptr addr) { + DPrintf("#%d: Release %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - SlotLocker locker(thr); - { - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); - Lock lock(&s->mtx); - thr->clock.ReleaseStore(&s->clock); - } - IncrementEpoch(thr); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); + Lock l(&s->mtx); + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + ReleaseImpl(thr, pc, &s->clock); } -void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) { - DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr); +void ReleaseStore(ThreadState *thr, uptr pc, uptr addr) { + DPrintf("#%d: ReleaseStore %zx\n", thr->tid, addr); if (thr->ignore_sync) return; - SlotLocker locker(thr); - { - auto s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); - Lock lock(&s->mtx); - thr->clock.ReleaseStoreAcquire(&s->clock); - } - IncrementEpoch(thr); + SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false); + Lock l(&s->mtx); + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + ReleaseStoreImpl(thr, pc, &s->clock); } -void IncrementEpoch(ThreadState *thr) { - DCHECK(!thr->ignore_sync); - DCHECK(thr->slot_locked); - Epoch epoch = EpochInc(thr->fast_state.epoch()); - if (!EpochOverflow(epoch)) { - Sid sid = thr->fast_state.sid(); - thr->clock.Set(sid, epoch); - thr->fast_state.SetEpoch(epoch); - thr->slot->SetEpoch(epoch); - TraceTime(thr); - } +#if !SANITIZER_GO +static void UpdateSleepClockCallback(ThreadContextBase *tctx_base, void *arg) { + ThreadState *thr = reinterpret_cast(arg); + ThreadContext *tctx = static_cast(tctx_base); + u64 epoch = tctx->epoch1; + if (tctx->status == ThreadStatusRunning) + epoch = tctx->thr->fast_state.epoch(); + thr->last_sleep_clock.set(&thr->proc()->clock_cache, tctx->tid, epoch); } -#if !SANITIZER_GO void AfterSleep(ThreadState *thr, uptr pc) { DPrintf("#%d: AfterSleep\n", thr->tid); if (thr->ignore_sync) return; thr->last_sleep_stack_id = CurrentStackId(thr, pc); - thr->last_sleep_clock.Reset(); - SlotLocker locker(thr); - for (auto &slot : ctx->slots) - thr->last_sleep_clock.Set(slot.sid, slot.epoch()); + ThreadRegistryLock l(&ctx->thread_registry); + ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateSleepClockCallback, + thr); } #endif +void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) { + if (thr->ignore_sync) + return; + thr->clock.set(thr->fast_state.epoch()); + thr->clock.acquire(&thr->proc()->clock_cache, c); +} + +void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) { + if (thr->ignore_sync) + return; + thr->clock.set(thr->fast_state.epoch()); + thr->fast_synch_epoch = thr->fast_state.epoch(); + thr->clock.releaseStoreAcquire(&thr->proc()->clock_cache, c); +} + +void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) { + if (thr->ignore_sync) + return; + thr->clock.set(thr->fast_state.epoch()); + thr->fast_synch_epoch = thr->fast_state.epoch(); + thr->clock.release(&thr->proc()->clock_cache, c); +} + +void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c) { + if (thr->ignore_sync) + return; + thr->clock.set(thr->fast_state.epoch()); + thr->fast_synch_epoch = thr->fast_state.epoch(); + thr->clock.ReleaseStore(&thr->proc()->clock_cache, c); +} + +void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) { + if (thr->ignore_sync) + return; + thr->clock.set(thr->fast_state.epoch()); + thr->fast_synch_epoch = thr->fast_state.epoch(); + thr->clock.acq_rel(&thr->proc()->clock_cache, c); +} + void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock)) return; ThreadRegistryLock l(&ctx->thread_registry); ScopedReport rep(ReportTypeDeadlock); for (int i = 0; i < r->n; i++) { - rep.AddMutex(r->loop[i].mtx_ctx0, r->loop[i].stk[0]); + rep.AddMutex(r->loop[i].mtx_ctx0); rep.AddUniqueTid((int)r->loop[i].thr_ctx); rep.AddThread((int)r->loop[i].thr_ctx); } @@ -538,7 +540,7 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { for (int i = 0; i < r->n; i++) { for (int j = 0; j < (flags()->second_deadlock_stack ? 2 : 1); j++) { u32 stk = r->loop[i].stk[j]; - if (stk && stk != kInvalidStackID) { + if (stk && stk != 0xffffffff) { rep.AddStack(StackDepotGet(stk), true); } else { // Sometimes we fail to extract the stack trace (FIXME: investigate), @@ -550,26 +552,4 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) { OutputReport(thr, rep); } -void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr, - FastState last_lock, StackID creation_stack_id) { - SlotPairLocker locker(thr, last_lock.sid()); - ThreadRegistryLock l0(&ctx->thread_registry); - Lock slots_lock(&ctx->slot_mtx); - ScopedReport rep(ReportTypeMutexDestroyLocked); - rep.AddMutex(addr, creation_stack_id); - VarSizeStackTrace trace; - ObtainCurrentStack(thr, pc, &trace); - rep.AddStack(trace, true); - - Tid tid; - DynamicMutexSet mset; - uptr tag; - if (!RestoreStack(EventType::kLock, last_lock.sid(), last_lock.epoch(), addr, - 0, kAccessWrite, &tid, &trace, mset, &tag)) - return; - rep.AddStack(trace, true); - rep.AddLocation(addr, 1); - OutputReport(thr, rep); -} - } // namespace __tsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp index 5acc3967208e3..def61cca14d57 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp @@ -35,6 +35,7 @@ void ProcDestroy(Processor *proc) { #if !SANITIZER_GO AllocatorProcFinish(proc); #endif + ctx->clock_alloc.FlushCache(&proc->clock_cache); ctx->metamap.OnProcIdle(proc); if (common_flags()->detect_deadlocks) ctx->dd->DestroyPhysicalThread(proc->dd_pt); diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp index e331539bcdb11..811695d144c56 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp @@ -175,26 +175,22 @@ void ScopedReportBase::AddStack(StackTrace stack, bool suppressable) { } void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, - Tid tid, StackTrace stack, - const MutexSet *mset) { - uptr addr0, size; - AccessType typ; - s.GetAccess(&addr0, &size, &typ); + StackTrace stack, const MutexSet *mset) { auto *mop = New(); rep_->mops.PushBack(mop); - mop->tid = tid; - mop->addr = addr + addr0; - mop->size = size; - mop->write = !(typ & kAccessRead); - mop->atomic = typ & kAccessAtomic; + mop->tid = s.tid(); + mop->addr = addr + s.addr0(); + mop->size = s.size(); + mop->write = s.IsWrite(); + mop->atomic = s.IsAtomic(); mop->stack = SymbolizeStack(stack); mop->external_tag = external_tag; if (mop->stack) mop->stack->suppressable = true; for (uptr i = 0; i < mset->Size(); i++) { MutexSet::Desc d = mset->Get(i); - u64 id = this->AddMutex(d.addr, d.stack_id); - ReportMopMutex mtx = {id, d.write}; + u64 mid = this->AddMutex(d.id); + ReportMopMutex mtx = {mid, d.write}; mop->mset.PushBack(mtx); } } @@ -223,6 +219,18 @@ void ScopedReportBase::AddThread(const ThreadContext *tctx, bool suppressable) { } #if !SANITIZER_GO +static bool FindThreadByUidLockedCallback(ThreadContextBase *tctx, void *arg) { + int unique_id = *(int *)arg; + return tctx->unique_id == (u32)unique_id; +} + +static ThreadContext *FindThreadByUidLocked(Tid unique_id) { + ctx->thread_registry.CheckLocked(); + return static_cast( + ctx->thread_registry.FindThreadContextLocked( + FindThreadByUidLockedCallback, &unique_id)); +} + static ThreadContext *FindThreadByTidLocked(Tid tid) { ctx->thread_registry.CheckLocked(); return static_cast( @@ -254,25 +262,55 @@ ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack) { } #endif -void ScopedReportBase::AddThread(Tid tid, bool suppressable) { +void ScopedReportBase::AddThread(Tid unique_tid, bool suppressable) { #if !SANITIZER_GO - if (const ThreadContext *tctx = FindThreadByTidLocked(tid)) + if (const ThreadContext *tctx = FindThreadByUidLocked(unique_tid)) AddThread(tctx, suppressable); #endif } -int ScopedReportBase::AddMutex(uptr addr, StackID creation_stack_id) { +void ScopedReportBase::AddMutex(const SyncVar *s) { for (uptr i = 0; i < rep_->mutexes.Size(); i++) { - if (rep_->mutexes[i]->addr == addr) - return rep_->mutexes[i]->id; + if (rep_->mutexes[i]->id == s->uid) + return; } auto *rm = New(); rep_->mutexes.PushBack(rm); - rm->id = rep_->mutexes.Size() - 1; - rm->addr = addr; + rm->id = s->uid; + rm->addr = s->addr; rm->destroyed = false; - rm->stack = SymbolizeStackId(creation_stack_id); - return rm->id; + rm->stack = SymbolizeStackId(s->creation_stack_id); +} + +u64 ScopedReportBase::AddMutex(u64 id) { + u64 uid = 0; + u64 mid = id; + uptr addr = SyncVar::SplitId(id, &uid); + SyncVar *s = ctx->metamap.GetSyncIfExists(addr); + // Check that the mutex is still alive. + // Another mutex can be created at the same address, + // so check uid as well. + if (s && s->CheckId(uid)) { + Lock l(&s->mtx); + mid = s->uid; + AddMutex(s); + } else { + AddDeadMutex(id); + } + return mid; +} + +void ScopedReportBase::AddDeadMutex(u64 id) { + for (uptr i = 0; i < rep_->mutexes.Size(); i++) { + if (rep_->mutexes[i]->id == id) + return; + } + auto *rm = New(); + rep_->mutexes.PushBack(rm); + rm->id = id; + rm->addr = 0; + rm->destroyed = true; + rm->stack = 0; } void ScopedReportBase::AddLocation(uptr addr, uptr size) { @@ -289,7 +327,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) { loc->tid = creat_tid; loc->stack = SymbolizeStackId(creat_stack); rep_->locs.PushBack(loc); - ThreadContext *tctx = FindThreadByTidLocked(creat_tid); + ThreadContext *tctx = FindThreadByUidLocked(creat_tid); if (tctx) AddThread(tctx); return; @@ -305,15 +343,16 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) { if (!b) b = JavaHeapBlock(addr, &block_begin); if (b != 0) { + ThreadContext *tctx = FindThreadByTidLocked(b->tid); auto *loc = New(); loc->type = ReportLocationHeap; loc->heap_chunk_start = (uptr)allocator()->GetBlockBegin((void *)addr); loc->heap_chunk_size = b->siz; loc->external_tag = b->tag; - loc->tid = b->tid; + loc->tid = tctx ? tctx->tid : b->tid; loc->stack = SymbolizeStackId(b->stk); rep_->locs.PushBack(loc); - if (ThreadContext *tctx = FindThreadByTidLocked(b->tid)) + if (tctx) AddThread(tctx); return; } @@ -348,6 +387,71 @@ ScopedReport::ScopedReport(ReportType typ, uptr tag) ScopedReport::~ScopedReport() {} +void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk, + MutexSet *mset, uptr *tag) { + // This function restores stack trace and mutex set for the thread/epoch. + // It does so by getting stack trace and mutex set at the beginning of + // trace part, and then replaying the trace till the given epoch. + Trace* trace = ThreadTrace(tid); + ReadLock l(&trace->mtx); + const int partidx = (epoch / kTracePartSize) % TraceParts(); + TraceHeader* hdr = &trace->headers[partidx]; + if (epoch < hdr->epoch0 || epoch >= hdr->epoch0 + kTracePartSize) + return; + CHECK_EQ(RoundDown(epoch, kTracePartSize), hdr->epoch0); + const u64 epoch0 = RoundDown(epoch, TraceSize()); + const u64 eend = epoch % TraceSize(); + const u64 ebegin = RoundDown(eend, kTracePartSize); + DPrintf("#%d: RestoreStack epoch=%zu ebegin=%zu eend=%zu partidx=%d\n", + tid, (uptr)epoch, (uptr)ebegin, (uptr)eend, partidx); + Vector stack; + stack.Resize(hdr->stack0.size + 64); + for (uptr i = 0; i < hdr->stack0.size; i++) { + stack[i] = hdr->stack0.trace[i]; + DPrintf2(" #%02zu: pc=%zx\n", i, stack[i]); + } + if (mset) + *mset = hdr->mset0; + uptr pos = hdr->stack0.size; + Event *events = (Event*)GetThreadTrace(tid); + for (uptr i = ebegin; i <= eend; i++) { + Event ev = events[i]; + EventType typ = (EventType)(ev >> kEventPCBits); + uptr pc = (uptr)(ev & ((1ull << kEventPCBits) - 1)); + DPrintf2(" %zu typ=%d pc=%zx\n", i, typ, pc); + if (typ == EventTypeMop) { + stack[pos] = pc; + } else if (typ == EventTypeFuncEnter) { + if (stack.Size() < pos + 2) + stack.Resize(pos + 2); + stack[pos++] = pc; + } else if (typ == EventTypeFuncExit) { + if (pos > 0) + pos--; + } + if (mset) { + if (typ == EventTypeLock) { + mset->Add(pc, true, epoch0 + i); + } else if (typ == EventTypeUnlock) { + mset->Del(pc, true); + } else if (typ == EventTypeRLock) { + mset->Add(pc, false, epoch0 + i); + } else if (typ == EventTypeRUnlock) { + mset->Del(pc, false); + } + } + for (uptr j = 0; j <= pos; j++) + DPrintf2(" #%zu: %zx\n", j, stack[j]); + } + if (pos == 0 && stack[0] == 0) + return; + pos++; + stk->Init(&stack[0], pos); + ExtractTagFromStack(stk, tag); +} + +namespace v3 { + // Replays the trace up to last_pos position in the last part // or up to the provided epoch/sid (whichever is earlier) // and calls the provided function f for each event. @@ -365,7 +469,6 @@ void TraceReplay(Trace *trace, TracePart *last, Event *last_pos, Sid sid, Event *end = &part->events[TracePart::kSize - 1]; if (part == last) end = last_pos; - f(kFreeSid, kEpochOver, nullptr); // notify about part start for (Event *evp = &part->events[0]; evp < end; evp++) { Event *evp0 = evp; if (!evp->is_access && !evp->is_func) { @@ -425,36 +528,21 @@ static constexpr bool IsWithinAccess(uptr addr1, uptr size1, uptr addr2, return addr1 >= addr2 && addr1 + size1 <= addr2 + size2; } -// Replays the trace of slot sid up to the target event identified -// by epoch/addr/size/typ and restores and returns tid, stack, mutex set +// Replays the trace of thread tid up to the target event identified +// by sid/epoch/addr/size/typ and restores and returns stack, mutex set // and tag for that event. If there are multiple such events, it returns // the last one. Returns false if the event is not present in the trace. -bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, - AccessType typ, Tid *ptid, VarSizeStackTrace *pstk, +bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr, + uptr size, AccessType typ, VarSizeStackTrace *pstk, MutexSet *pmset, uptr *ptag) { // This function restores stack trace and mutex set for the thread/epoch. // It does so by getting stack trace and mutex set at the beginning of // trace part, and then replaying the trace till the given epoch. - DPrintf2("RestoreStack: sid=%u@%u addr=0x%zx/%zu typ=%x\n", + DPrintf2("RestoreStack: tid=%u sid=%u@%u addr=0x%zx/%zu typ=%x\n", tid, static_cast(sid), static_cast(epoch), addr, size, static_cast(typ)); ctx->slot_mtx.CheckLocked(); // needed to prevent trace part recycling ctx->thread_registry.CheckLocked(); - TidSlot *slot = &ctx->slots[static_cast(sid)]; - Tid tid = kInvalidTid; - // Need to lock the slot mutex as it protects slot->journal. - slot->mtx.CheckLocked(); - for (uptr i = 0; i < slot->journal.Size(); i++) { - DPrintf2(" journal: epoch=%d tid=%d\n", - static_cast(slot->journal[i].epoch), slot->journal[i].tid); - if (i == slot->journal.Size() - 1 || slot->journal[i + 1].epoch > epoch) { - tid = slot->journal[i].tid; - break; - } - } - if (tid == kInvalidTid) - return false; - *ptid = tid; ThreadContext *tctx = static_cast(ctx->thread_registry.GetThreadLocked(tid)); Trace *trace = &tctx->trace; @@ -465,10 +553,8 @@ bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, { Lock lock(&trace->mtx); first_part = trace->parts.Front(); - if (!first_part) { - DPrintf2("RestoreStack: tid=%d trace=%p no trace parts\n", tid, trace); + if (!first_part) return false; - } last_part = trace->parts.Back(); last_pos = trace->final_pos; if (tctx->thr) @@ -481,18 +567,9 @@ bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, bool is_read = typ & kAccessRead; bool is_atomic = typ & kAccessAtomic; bool is_free = typ & kAccessFree; - DPrintf2("RestoreStack: tid=%d parts=[%p-%p] last_pos=%p\n", tid, - trace->parts.Front(), last_part, last_pos); TraceReplay( trace, last_part, last_pos, sid, epoch, [&](Sid ev_sid, Epoch ev_epoch, Event *evp) { - if (evp == nullptr) { - // Each trace part is self-consistent, so we reset state. - stack.Resize(0); - mset->Reset(); - prev_pc = 0; - return; - } bool match = ev_sid == sid && ev_epoch == epoch; if (evp->is_access) { if (evp->is_func == 0 && evp->type == EventType::kAccessExt && @@ -515,15 +592,12 @@ bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, if (evp->is_func) { auto *ev = reinterpret_cast(evp); if (ev->pc) { - DPrintf2(" FuncEnter: pc=0x%llx\n", ev->pc); + DPrintf2(" FuncEnter: pc=0x%llx\n", ev->pc); stack.PushBack(ev->pc); } else { - DPrintf2(" FuncExit\n"); - // We don't log pathologically large stacks in each part, - // if the stack was truncated we can have more func exits than - // entries. - if (stack.Size()) - stack.PopBack(); + DPrintf2(" FuncExit\n"); + CHECK(stack.Size()); + stack.PopBack(); } return; } @@ -592,6 +666,8 @@ bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size, return found; } +} // namespace v3 + bool RacyStacks::operator==(const RacyStacks &other) const { if (hash[0] == other.hash[0] && hash[1] == other.hash[1]) return true; @@ -682,7 +758,10 @@ bool OutputReport(ThreadState *thr, const ScopedReport &srep) { ctx->fired_suppressions.push_back(s); } { + bool old_is_freeing = thr->is_freeing; + thr->is_freeing = false; bool suppressed = OnReport(rep, pc_or_addr != 0); + thr->is_freeing = old_is_freeing; if (suppressed) { thr->current_report = nullptr; return false; @@ -729,91 +808,97 @@ static bool IsFiredSuppression(Context *ctx, ReportType type, uptr addr) { return false; } -// We need to lock the target slot during RestoreStack because it protects -// the slot journal. However, the target slot can be the slot of the current -// thread or a different slot. -SlotPairLocker::SlotPairLocker(ThreadState *thr, - Sid sid) NO_THREAD_SAFETY_ANALYSIS : thr_(thr), - slot_() { - CHECK_NE(sid, kFreeSid); - Lock l(&ctx->multi_slot_mtx); - SlotLock(thr); - if (sid == thr->slot->sid) - return; - slot_ = &ctx->slots[static_cast(sid)]; - slot_->mtx.Lock(); -} - -SlotPairLocker::~SlotPairLocker() NO_THREAD_SAFETY_ANALYSIS { - SlotUnlock(thr_); - if (slot_) - slot_->mtx.Unlock(); +static bool RaceBetweenAtomicAndFree(ThreadState *thr) { + Shadow s0(thr->racy_state[0]); + Shadow s1(thr->racy_state[1]); + CHECK(!(s0.IsAtomic() && s1.IsAtomic())); + if (!s0.IsAtomic() && !s1.IsAtomic()) + return true; + if (s0.IsAtomic() && s1.IsFreed()) + return true; + if (s1.IsAtomic() && thr->is_freeing) + return true; + return false; } -void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, - AccessType typ0) { +void ReportRace(ThreadState *thr) { CheckedMutex::CheckNoLocks(); // Symbolizer makes lots of intercepted calls. If we try to process them, // at best it will cause deadlocks on internal mutexes. ScopedIgnoreInterceptors ignore; - uptr addr = ShadowToMem(shadow_mem); - DPrintf("#%d: ReportRace %p\n", thr->tid, (void *)addr); if (!ShouldReport(thr, ReportTypeRace)) return; - uptr addr_off0, size0; - cur.GetAccess(&addr_off0, &size0, nullptr); - uptr addr_off1, size1, typ1; - old.GetAccess(&addr_off1, &size1, &typ1); - if (!flags()->report_atomic_races && - ((typ0 & kAccessAtomic) || (typ1 & kAccessAtomic)) && - !(typ0 & kAccessFree) && !(typ1 & kAccessFree)) + if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr)) return; - const uptr kMop = 2; - Shadow s[kMop] = {cur, old}; - uptr addr0 = addr + addr_off0; - uptr addr1 = addr + addr_off1; - uptr end0 = addr0 + size0; - uptr end1 = addr1 + size1; - uptr addr_min = min(addr0, addr1); - uptr addr_max = max(end0, end1); - if (IsExpectedReport(addr_min, addr_max - addr_min)) - return; + bool freed = false; + { + Shadow s(thr->racy_state[1]); + freed = s.GetFreedAndReset(); + thr->racy_state[1] = s.raw(); + } + + uptr addr = ShadowToMem(thr->racy_shadow_addr); + uptr addr_min = 0; + uptr addr_max = 0; + { + uptr a0 = addr + Shadow(thr->racy_state[0]).addr0(); + uptr a1 = addr + Shadow(thr->racy_state[1]).addr0(); + uptr e0 = a0 + Shadow(thr->racy_state[0]).size(); + uptr e1 = a1 + Shadow(thr->racy_state[1]).size(); + addr_min = min(a0, a1); + addr_max = max(e0, e1); + if (IsExpectedReport(addr_min, addr_max - addr_min)) + return; + } if (HandleRacyAddress(thr, addr_min, addr_max)) return; - ReportType rep_typ = ReportTypeRace; - if ((typ0 & kAccessVptr) && (typ1 & kAccessFree)) - rep_typ = ReportTypeVptrUseAfterFree; - else if (typ0 & kAccessVptr) - rep_typ = ReportTypeVptrRace; - else if (typ1 & kAccessFree) - rep_typ = ReportTypeUseAfterFree; + ReportType typ = ReportTypeRace; + if (thr->is_vptr_access && freed) + typ = ReportTypeVptrUseAfterFree; + else if (thr->is_vptr_access) + typ = ReportTypeVptrRace; + else if (freed) + typ = ReportTypeUseAfterFree; - if (IsFiredSuppression(ctx, rep_typ, addr)) + if (IsFiredSuppression(ctx, typ, addr)) return; + const uptr kMop = 2; VarSizeStackTrace traces[kMop]; - Tid tids[kMop] = {thr->tid, kInvalidTid}; - uptr tags[kMop] = {kExternalTagNone, kExternalTagNone}; - - ObtainCurrentStack(thr, thr->trace_prev_pc, &traces[0], &tags[0]); - if (IsFiredSuppression(ctx, rep_typ, traces[0])) - return; - - DynamicMutexSet mset1; - MutexSet *mset[kMop] = {&thr->mset, mset1}; - - SlotPairLocker locker(thr, s[1].sid()); - ThreadRegistryLock l0(&ctx->thread_registry); - Lock slots_lock(&ctx->slot_mtx); - if (!RestoreStack(EventType::kAccessExt, s[1].sid(), s[1].epoch(), addr1, - size1, typ1, &tids[1], &traces[1], mset[1], &tags[1])) + uptr tags[kMop] = {kExternalTagNone}; + uptr toppc = TraceTopPC(thr); + if (toppc >> kEventPCBits) { + // This is a work-around for a known issue. + // The scenario where this happens is rather elaborate and requires + // an instrumented __sanitizer_report_error_summary callback and + // a __tsan_symbolize_external callback and a race during a range memory + // access larger than 8 bytes. MemoryAccessRange adds the current PC to + // the trace and starts processing memory accesses. A first memory access + // triggers a race, we report it and call the instrumented + // __sanitizer_report_error_summary, which adds more stuff to the trace + // since it is intrumented. Then a second memory access in MemoryAccessRange + // also triggers a race and we get here and call TraceTopPC to get the + // current PC, however now it contains some unrelated events from the + // callback. Most likely, TraceTopPC will now return a EventTypeFuncExit + // event. Later we subtract -1 from it (in GetPreviousInstructionPc) + // and the resulting PC has kExternalPCBit set, so we pass it to + // __tsan_symbolize_external_ex. __tsan_symbolize_external_ex is within its + // rights to crash since the PC is completely bogus. + // test/tsan/double_race.cpp contains a test case for this. + toppc = 0; + } + ObtainCurrentStack(thr, toppc, &traces[0], &tags[0]); + if (IsFiredSuppression(ctx, typ, traces[0])) return; - if (IsFiredSuppression(ctx, rep_typ, traces[1])) + DynamicMutexSet mset2; + Shadow s2(thr->racy_state[1]); + RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2, &tags[1]); + if (IsFiredSuppression(ctx, typ, traces[1])) return; if (HandleRacyStacks(thr, traces)) @@ -823,29 +908,39 @@ void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old, uptr tag = kExternalTagNone; for (uptr i = 0; i < kMop; i++) { if (tags[i] != kExternalTagNone) { - rep_typ = ReportTypeExternalRace; + typ = ReportTypeExternalRace; tag = tags[i]; break; } } - ScopedReport rep(rep_typ, tag); - for (uptr i = 0; i < kMop; i++) - rep.AddMemoryAccess(addr, tags[i], s[i], tids[i], traces[i], mset[i]); + ThreadRegistryLock l0(&ctx->thread_registry); + ScopedReport rep(typ, tag); + for (uptr i = 0; i < kMop; i++) { + Shadow s(thr->racy_state[i]); + rep.AddMemoryAccess(addr, tags[i], s, traces[i], + i == 0 ? &thr->mset : mset2); + } for (uptr i = 0; i < kMop; i++) { + FastState s(thr->racy_state[i]); ThreadContext *tctx = static_cast( - ctx->thread_registry.GetThreadLocked(tids[i])); + ctx->thread_registry.GetThreadLocked(s.tid())); + if (s.epoch() < tctx->epoch0 || s.epoch() > tctx->epoch1) + continue; rep.AddThread(tctx); } rep.AddLocation(addr_min, addr_max - addr_min); #if !SANITIZER_GO - if (!((typ0 | typ1) & kAccessFree) && - s[1].epoch() <= thr->last_sleep_clock.Get(s[1].sid())) - rep.AddSleep(thr->last_sleep_stack_id); + { + Shadow s(thr->racy_state[1]); + if (s.epoch() <= thr->last_sleep_clock.get(s.tid())) + rep.AddSleep(thr->last_sleep_stack_id); + } #endif + OutputReport(thr, rep); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp index fc5088c336cd7..c8f7124c009d6 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp @@ -21,14 +21,20 @@ namespace __tsan { // ThreadContext implementation. -ThreadContext::ThreadContext(Tid tid) : ThreadContextBase(tid), thr(), sync() {} +ThreadContext::ThreadContext(Tid tid) + : ThreadContextBase(tid), thr(), sync(), epoch0(), epoch1() {} #if !SANITIZER_GO ThreadContext::~ThreadContext() { } #endif -void ThreadContext::OnReset() { CHECK(!sync); } +void ThreadContext::OnReset() { + CHECK_EQ(sync.size(), 0); + uptr trace_p = GetThreadTrace(tid); + ReleaseMemoryPagesToOS(trace_p, trace_p + TraceSize() * sizeof(Event)); + //!!! ReleaseMemoryToOS(GetThreadTraceHeader(tid), sizeof(Trace)); +} #if !SANITIZER_GO struct ThreadLeak { @@ -106,35 +112,30 @@ int ThreadCount(ThreadState *thr) { } struct OnCreatedArgs { - VectorClock *sync; - uptr sync_epoch; - StackID stack; + ThreadState *thr; + uptr pc; }; Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) { - // The main thread and GCD workers don't have a parent thread. - Tid parent = kInvalidTid; - OnCreatedArgs arg = {nullptr, 0, kInvalidStackID}; - if (thr) { - parent = thr->tid; - arg.stack = CurrentStackId(thr, pc); - if (!thr->ignore_sync) { - SlotLocker locker(thr); - thr->clock.ReleaseStore(&arg.sync); - arg.sync_epoch = ctx->global_epoch; - IncrementEpoch(thr); - } - } - Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent, &arg); - DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent, tid, uid); + OnCreatedArgs args = { thr, pc }; + u32 parent_tid = thr ? thr->tid : kInvalidTid; // No parent for GCD workers. + Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent_tid, &args); + DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent_tid, tid, uid); return tid; } void ThreadContext::OnCreated(void *arg) { + thr = 0; + if (tid == kMainTid) + return; OnCreatedArgs *args = static_cast(arg); - sync = args->sync; - sync_epoch = args->sync_epoch; - creation_stack_id = args->stack; + if (!args->thr) // GCD workers don't have a parent thread. + return; + args->thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0); + ReleaseImpl(args->thr, 0, &sync); + creation_stack_id = CurrentStackId(args->thr, args->pc); } extern "C" void __tsan_stack_initialization() {} @@ -149,15 +150,6 @@ struct OnStartedArgs { void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, ThreadType thread_type) { - ctx->thread_registry.StartThread(tid, os_id, thread_type, thr); - if (!thr->ignore_sync) { - SlotAttachAndLock(thr); - if (thr->tctx->sync_epoch == ctx->global_epoch) - thr->clock.Acquire(thr->tctx->sync); - SlotUnlock(thr); - } - Free(thr->tctx->sync); - uptr stk_addr = 0; uptr stk_size = 0; uptr tls_addr = 0; @@ -167,10 +159,12 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_size, &tls_addr, &tls_size); #endif - thr->stk_addr = stk_addr; - thr->stk_size = stk_size; - thr->tls_addr = tls_addr; - thr->tls_size = tls_size; + + ThreadRegistry *tr = &ctx->thread_registry; + OnStartedArgs args = { thr, stk_addr, stk_size, tls_addr, tls_size }; + tr->StartThread(tid, os_id, thread_type, &args); + + while (!thr->tctx->trace.parts.Empty()) thr->tctx->trace.parts.PopBack(); #if !SANITIZER_GO if (ctx->after_multithreaded_fork) { @@ -198,41 +192,57 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id, } void ThreadContext::OnStarted(void *arg) { - thr = static_cast(arg); - DPrintf("#%d: ThreadStart\n", tid); - new (thr) ThreadState(tid); + OnStartedArgs *args = static_cast(arg); + thr = args->thr; + // RoundUp so that one trace part does not contain events + // from different threads. + epoch0 = RoundUp(epoch1 + 1, kTracePartSize); + epoch1 = (u64)-1; + new (thr) + ThreadState(ctx, tid, unique_id, epoch0, reuse_count, args->stk_addr, + args->stk_size, args->tls_addr, args->tls_size); if (common_flags()->detect_deadlocks) - thr->dd_lt = ctx->dd->CreateLogicalThread(tid); + thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id); + thr->fast_state.SetHistorySize(flags()->history_size); + // Commit switch to the new part of the trace. + // TraceAddEvent will reset stack0/mset0 in the new part for us. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + + thr->fast_synch_epoch = epoch0; + AcquireImpl(thr, 0, &sync); + sync.Reset(&thr->proc()->clock_cache); thr->tctx = this; -#if !SANITIZER_GO thr->is_inited = true; -#endif + DPrintf( + "#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx " + "tls_addr=%zx tls_size=%zx\n", + tid, (uptr)epoch0, args->stk_addr, args->stk_size, args->tls_addr, + args->tls_size); } void ThreadFinish(ThreadState *thr) { - DPrintf("#%d: ThreadFinish\n", thr->tid); ThreadCheckIgnore(thr); if (thr->stk_addr && thr->stk_size) DontNeedShadowFor(thr->stk_addr, thr->stk_size); if (thr->tls_addr && thr->tls_size) DontNeedShadowFor(thr->tls_addr, thr->tls_size); thr->is_dead = true; -#if !SANITIZER_GO thr->is_inited = false; +#if !SANITIZER_GO thr->ignore_interceptors++; - PlatformCleanUpThreadState(thr); #endif - if (!thr->ignore_sync) { - SlotLocker locker(thr); - ThreadRegistryLock lock(&ctx->thread_registry); - // Note: detached is protected by the thread registry mutex, - // the thread may be detaching concurrently in another thread. - if (!thr->tctx->detached) { - thr->clock.ReleaseStore(&thr->tctx->sync); - thr->tctx->sync_epoch = ctx->global_epoch; - IncrementEpoch(thr); - } + ctx->thread_registry.FinishThread(thr->tid); +} + +void ThreadContext::OnFinished() { + if (!detached) { + thr->fast_state.IncrementEpoch(); + // Can't increment epoch w/o writing to the trace as well. + TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0); + ReleaseImpl(thr, 0, &sync); } + epoch1 = thr->fast_state.epoch(); + #if !SANITIZER_GO UnmapOrDie(thr->shadow_stack, kShadowStackSize * sizeof(uptr)); #else @@ -241,37 +251,18 @@ void ThreadFinish(ThreadState *thr) { thr->shadow_stack = nullptr; thr->shadow_stack_pos = nullptr; thr->shadow_stack_end = nullptr; + if (common_flags()->detect_deadlocks) ctx->dd->DestroyLogicalThread(thr->dd_lt); - SlotDetach(thr); - ctx->thread_registry.FinishThread(thr->tid); + thr->clock.ResetCached(&thr->proc()->clock_cache); +#if !SANITIZER_GO + thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache); +#endif +#if !SANITIZER_GO + PlatformCleanUpThreadState(thr); +#endif thr->~ThreadState(); -} - -void ThreadContext::OnFinished() { - Lock lock(&ctx->slot_mtx); - Lock lock1(&trace.mtx); - // Queue all trace parts into the global recycle queue. - auto parts = &trace.parts; - while (trace.local_head) { - CHECK(parts->Queued(trace.local_head)); - ctx->trace_part_recycle.PushBack(trace.local_head); - trace.local_head = parts->Next(trace.local_head); - } - ctx->trace_part_recycle_finished += parts->Size(); - if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadHi) { - ctx->trace_part_finished_excess += parts->Size(); - trace.parts_allocated = 0; - } else if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadLo && - parts->Size() > 1) { - ctx->trace_part_finished_excess += parts->Size() - 1; - trace.parts_allocated = 1; - } - // From now on replay will use trace->final_pos. - trace.final_pos = (Event *)atomic_load_relaxed(&thr->trace_pos); - atomic_store_relaxed(&thr->trace_pos, 0); - thr->tctx = nullptr; - thr = nullptr; + thr = 0; } struct ConsumeThreadContext { @@ -283,43 +274,35 @@ Tid ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid) { return ctx->thread_registry.ConsumeThreadUserId(uid); } -struct JoinArg { - VectorClock *sync; - uptr sync_epoch; -}; - void ThreadJoin(ThreadState *thr, uptr pc, Tid tid) { CHECK_GT(tid, 0); + CHECK_LT(tid, kMaxTid); DPrintf("#%d: ThreadJoin tid=%d\n", thr->tid, tid); - JoinArg arg = {}; - ctx->thread_registry.JoinThread(tid, &arg); - if (!thr->ignore_sync) { - SlotLocker locker(thr); - if (arg.sync_epoch == ctx->global_epoch) - thr->clock.Acquire(arg.sync); - } - Free(arg.sync); + ctx->thread_registry.JoinThread(tid, thr); } -void ThreadContext::OnJoined(void *ptr) { - auto arg = static_cast(ptr); - arg->sync = sync; - arg->sync_epoch = sync_epoch; - sync = nullptr; - sync_epoch = 0; +void ThreadContext::OnJoined(void *arg) { + ThreadState *caller_thr = static_cast(arg); + AcquireImpl(caller_thr, 0, &sync); + sync.Reset(&caller_thr->proc()->clock_cache); } -void ThreadContext::OnDead() { CHECK_EQ(sync, nullptr); } +void ThreadContext::OnDead() { CHECK_EQ(sync.size(), 0); } void ThreadDetach(ThreadState *thr, uptr pc, Tid tid) { CHECK_GT(tid, 0); + CHECK_LT(tid, kMaxTid); ctx->thread_registry.DetachThread(tid, thr); } -void ThreadContext::OnDetached(void *arg) { Free(sync); } +void ThreadContext::OnDetached(void *arg) { + ThreadState *thr1 = static_cast(arg); + sync.Reset(&thr1->proc()->clock_cache); +} void ThreadNotJoined(ThreadState *thr, uptr pc, Tid tid, uptr uid) { CHECK_GT(tid, 0); + CHECK_LT(tid, kMaxTid); ctx->thread_registry.SetThreadUserId(tid, uid); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_shadow.h b/compiler-rt/lib/tsan/rtl/tsan_shadow.h index 843573ecf5d30..8b7bc341713e8 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_shadow.h +++ b/compiler-rt/lib/tsan/rtl/tsan_shadow.h @@ -10,170 +10,223 @@ #define TSAN_SHADOW_H #include "tsan_defs.h" +#include "tsan_trace.h" namespace __tsan { +// FastState (from most significant bit): +// ignore : 1 +// tid : kTidBits +// unused : - +// history_size : 3 +// epoch : kClkBits class FastState { public: - FastState() { Reset(); } + FastState(u64 tid, u64 epoch) { + x_ = tid << kTidShift; + x_ |= epoch; + DCHECK_EQ(tid, this->tid()); + DCHECK_EQ(epoch, this->epoch()); + DCHECK_EQ(GetIgnoreBit(), false); + } + + explicit FastState(u64 x) : x_(x) {} + + u64 raw() const { return x_; } + + u64 tid() const { + u64 res = (x_ & ~kIgnoreBit) >> kTidShift; + return res; + } + + u64 TidWithIgnore() const { + u64 res = x_ >> kTidShift; + return res; + } + + u64 epoch() const { + u64 res = x_ & ((1ull << kClkBits) - 1); + return res; + } - void Reset() { - part_.unused0_ = 0; - part_.sid_ = static_cast(kFreeSid); - part_.epoch_ = static_cast(kEpochLast); - part_.unused1_ = 0; - part_.ignore_accesses_ = false; + void IncrementEpoch() { + u64 old_epoch = epoch(); + x_ += 1; + DCHECK_EQ(old_epoch + 1, epoch()); + (void)old_epoch; } - void SetSid(Sid sid) { part_.sid_ = static_cast(sid); } + void SetIgnoreBit() { x_ |= kIgnoreBit; } + void ClearIgnoreBit() { x_ &= ~kIgnoreBit; } + bool GetIgnoreBit() const { return (s64)x_ < 0; } - Sid sid() const { return static_cast(part_.sid_); } + void SetHistorySize(int hs) { + CHECK_GE(hs, 0); + CHECK_LE(hs, 7); + x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift); + } - Epoch epoch() const { return static_cast(part_.epoch_); } + ALWAYS_INLINE + int GetHistorySize() const { + return (int)((x_ >> kHistoryShift) & kHistoryMask); + } - void SetEpoch(Epoch epoch) { part_.epoch_ = static_cast(epoch); } + void ClearHistorySize() { SetHistorySize(0); } - void SetIgnoreBit() { part_.ignore_accesses_ = 1; } - void ClearIgnoreBit() { part_.ignore_accesses_ = 0; } - bool GetIgnoreBit() const { return part_.ignore_accesses_; } + ALWAYS_INLINE + u64 GetTracePos() const { + const int hs = GetHistorySize(); + // When hs == 0, the trace consists of 2 parts. + const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1; + return epoch() & mask; + } private: friend class Shadow; - struct Parts { - u32 unused0_ : 8; - u32 sid_ : 8; - u32 epoch_ : kEpochBits; - u32 unused1_ : 1; - u32 ignore_accesses_ : 1; - }; - union { - Parts part_; - u32 raw_; - }; + static const int kTidShift = 64 - kTidBits - 1; + static const u64 kIgnoreBit = 1ull << 63; + static const u64 kFreedBit = 1ull << 63; + static const u64 kHistoryShift = kClkBits; + static const u64 kHistoryMask = 7; + u64 x_; }; -static_assert(sizeof(FastState) == kShadowSize, "bad FastState size"); - -class Shadow { +// Shadow (from most significant bit): +// freed : 1 +// tid : kTidBits +// is_atomic : 1 +// is_read : 1 +// size_log : 2 +// addr0 : 3 +// epoch : kClkBits +class Shadow : public FastState { public: - static constexpr RawShadow kEmpty = static_cast(0); - - Shadow(FastState state, u32 addr, u32 size, AccessType typ) { - raw_ = state.raw_; - DCHECK_GT(size, 0); - DCHECK_LE(size, 8); - UNUSED Sid sid0 = part_.sid_; - UNUSED u16 epoch0 = part_.epoch_; - raw_ |= (!!(typ & kAccessAtomic) << kIsAtomicShift) | - (!!(typ & kAccessRead) << kIsReadShift) | - (((((1u << size) - 1) << (addr & 0x7)) & 0xff) << kAccessShift); - // Note: we don't check kAccessAtomic because it overlaps with - // FastState::ignore_accesses_ and it may be set spuriously. - DCHECK_EQ(part_.is_read_, !!(typ & kAccessRead)); - DCHECK_EQ(sid(), sid0); - DCHECK_EQ(epoch(), epoch0); - } - - explicit Shadow(RawShadow x = Shadow::kEmpty) { raw_ = static_cast(x); } - - RawShadow raw() const { return static_cast(raw_); } - Sid sid() const { return part_.sid_; } - Epoch epoch() const { return static_cast(part_.epoch_); } - u8 access() const { return part_.access_; } - - void GetAccess(uptr *addr, uptr *size, AccessType *typ) const { - DCHECK(part_.access_ != 0 || raw_ == static_cast(Shadow::kRodata)); - if (addr) - *addr = part_.access_ ? __builtin_ffs(part_.access_) - 1 : 0; - if (size) - *size = part_.access_ == kFreeAccess ? kShadowCell - : __builtin_popcount(part_.access_); - if (typ) - *typ = (part_.is_read_ ? kAccessRead : kAccessWrite) | - (part_.is_atomic_ ? kAccessAtomic : 0) | - (part_.access_ == kFreeAccess ? kAccessFree : 0); + explicit Shadow(u64 x) : FastState(x) {} + + explicit Shadow(const FastState &s) : FastState(s.x_) { ClearHistorySize(); } + + void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) { + DCHECK_EQ((x_ >> kClkBits) & 31, 0); + DCHECK_LE(addr0, 7); + DCHECK_LE(kAccessSizeLog, 3); + x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits; + DCHECK_EQ(kAccessSizeLog, size_log()); + DCHECK_EQ(addr0, this->addr0()); } - ALWAYS_INLINE - bool IsBothReadsOrAtomic(AccessType typ) const { - u32 is_read = !!(typ & kAccessRead); - u32 is_atomic = !!(typ & kAccessAtomic); - bool res = - raw_ & ((is_atomic << kIsAtomicShift) | (is_read << kIsReadShift)); - DCHECK_EQ(res, - (part_.is_read_ && is_read) || (part_.is_atomic_ && is_atomic)); + void SetWrite(unsigned kAccessIsWrite) { + DCHECK_EQ(x_ & kReadBit, 0); + if (!kAccessIsWrite) + x_ |= kReadBit; + DCHECK_EQ(kAccessIsWrite, IsWrite()); + } + + void SetAtomic(bool kIsAtomic) { + DCHECK(!IsAtomic()); + if (kIsAtomic) + x_ |= kAtomicBit; + DCHECK_EQ(IsAtomic(), kIsAtomic); + } + + bool IsAtomic() const { return x_ & kAtomicBit; } + + bool IsZero() const { return x_ == 0; } + + static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) { + u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift; + DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore()); + return shifted_xor == 0; + } + + static ALWAYS_INLINE bool Addr0AndSizeAreEqual(const Shadow s1, + const Shadow s2) { + u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31; + return masked_xor == 0; + } + + static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2, + unsigned kS2AccessSize) { + bool res = false; + u64 diff = s1.addr0() - s2.addr0(); + if ((s64)diff < 0) { // s1.addr0 < s2.addr0 + // if (s1.addr0() + size1) > s2.addr0()) return true; + if (s1.size() > -diff) + res = true; + } else { + // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true; + if (kS2AccessSize > diff) + res = true; + } + DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2)); + DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1)); return res; } - ALWAYS_INLINE - bool IsRWWeakerOrEqual(AccessType typ) const { - u32 is_read = !!(typ & kAccessRead); - u32 is_atomic = !!(typ & kAccessAtomic); - UNUSED u32 res0 = - (part_.is_atomic_ > is_atomic) || - (part_.is_atomic_ == is_atomic && part_.is_read_ >= is_read); -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - const u32 kAtomicReadMask = (1 << kIsAtomicShift) | (1 << kIsReadShift); - bool res = (raw_ & kAtomicReadMask) >= - ((is_atomic << kIsAtomicShift) | (is_read << kIsReadShift)); - - DCHECK_EQ(res, res0); + u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; } + u64 ALWAYS_INLINE size() const { return 1ull << size_log(); } + bool ALWAYS_INLINE IsWrite() const { return !IsRead(); } + bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; } + + // The idea behind the freed bit is as follows. + // When the memory is freed (or otherwise unaccessible) we write to the shadow + // values with tid/epoch related to the free and the freed bit set. + // During memory accesses processing the freed bit is considered + // as msb of tid. So any access races with shadow with freed bit set + // (it is as if write from a thread with which we never synchronized before). + // This allows us to detect accesses to freed memory w/o additional + // overheads in memory access processing and at the same time restore + // tid/epoch of free. + void MarkAsFreed() { x_ |= kFreedBit; } + + bool IsFreed() const { return x_ & kFreedBit; } + + bool GetFreedAndReset() { + bool res = x_ & kFreedBit; + x_ &= ~kFreedBit; return res; -#else - return res0; -#endif } - // The FreedMarker must not pass "the same access check" so that we don't - // return from the race detection algorithm early. - static RawShadow FreedMarker() { - FastState fs; - fs.SetSid(kFreeSid); - fs.SetEpoch(kEpochLast); - Shadow s(fs, 0, 8, kAccessWrite); - return s.raw(); + bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const { + bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift) | + (u64(kIsAtomic) << kAtomicShift)); + DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic)); + return v; } - static RawShadow FreedInfo(Sid sid, Epoch epoch) { - Shadow s; - s.part_.sid_ = sid; - s.part_.epoch_ = static_cast(epoch); - s.part_.access_ = kFreeAccess; - return s.raw(); + bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const { + bool v = ((x_ >> kReadShift) & 3) <= u64((kIsWrite ^ 1) | (kIsAtomic << 1)); + DCHECK_EQ(v, (IsAtomic() < kIsAtomic) || + (IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite)); + return v; } - private: - struct Parts { - u8 access_; - Sid sid_; - u16 epoch_ : kEpochBits; - u16 is_read_ : 1; - u16 is_atomic_ : 1; - }; - union { - Parts part_; - u32 raw_; - }; - - static constexpr u8 kFreeAccess = 0x81; - -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - static constexpr uptr kAccessShift = 0; - static constexpr uptr kIsReadShift = 30; - static constexpr uptr kIsAtomicShift = 31; -#else - static constexpr uptr kAccessShift = 24; - static constexpr uptr kIsReadShift = 1; - static constexpr uptr kIsAtomicShift = 0; -#endif + bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const { + bool v = ((x_ >> kReadShift) & 3) >= u64((kIsWrite ^ 1) | (kIsAtomic << 1)); + DCHECK_EQ(v, (IsAtomic() > kIsAtomic) || + (IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite)); + return v; + } - public: - // .rodata shadow marker, see MapRodata and ContainsSameAccessFast. - static constexpr RawShadow kRodata = - static_cast(1 << kIsReadShift); + private: + static const u64 kReadShift = 5 + kClkBits; + static const u64 kReadBit = 1ull << kReadShift; + static const u64 kAtomicShift = 6 + kClkBits; + static const u64 kAtomicBit = 1ull << kAtomicShift; + + u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; } + + static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) { + if (s1.addr0() == s2.addr0()) + return true; + if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0()) + return true; + if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0()) + return true; + return false; + } }; -static_assert(sizeof(Shadow) == kShadowSize, "bad Shadow size"); +const RawShadow kShadowRodata = (RawShadow)-1; // .rodata shadow marker } // namespace __tsan diff --git a/compiler-rt/lib/tsan/rtl/tsan_sync.cpp b/compiler-rt/lib/tsan/rtl/tsan_sync.cpp index 09d41780d188a..f042abab74e5e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_sync.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_sync.cpp @@ -18,31 +18,43 @@ namespace __tsan { void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s); -SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(); } +SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(0); } -void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack) { - Reset(); +void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, + bool save_stack) { this->addr = addr; - next = 0; + this->uid = uid; + this->next = 0; + + creation_stack_id = kInvalidStackID; if (save_stack && !SANITIZER_GO) // Go does not use them creation_stack_id = CurrentStackId(thr, pc); if (common_flags()->detect_deadlocks) DDMutexInit(thr, pc, this); } -void SyncVar::Reset() { - CHECK(!ctx->resetting); +void SyncVar::Reset(Processor *proc) { + uid = 0; creation_stack_id = kInvalidStackID; owner_tid = kInvalidTid; - last_lock.Reset(); + last_lock = 0; recursion = 0; atomic_store_relaxed(&flags, 0); - Free(clock); - Free(read_clock); + + if (proc == 0) { + CHECK_EQ(clock.size(), 0); + CHECK_EQ(read_clock.size(), 0); + } else { + clock.Reset(&proc->clock_cache); + read_clock.Reset(&proc->clock_cache); + } } MetaMap::MetaMap() - : block_alloc_("heap block allocator"), sync_alloc_("sync allocator") {} + : block_alloc_(LINKER_INITIALIZED, "heap block allocator"), + sync_alloc_(LINKER_INITIALIZED, "sync allocator") { + atomic_store(&uid_gen_, 0, memory_order_relaxed); +} void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) { u32 idx = block_alloc_.Alloc(&thr->proc()->block_cache); @@ -56,16 +68,16 @@ void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) { *meta = idx | kFlagBlock; } -uptr MetaMap::FreeBlock(Processor *proc, uptr p, bool reset) { +uptr MetaMap::FreeBlock(Processor *proc, uptr p) { MBlock* b = GetBlock(p); if (b == 0) return 0; uptr sz = RoundUpTo(b->siz, kMetaShadowCell); - FreeRange(proc, p, sz, reset); + FreeRange(proc, p, sz); return sz; } -bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz, bool reset) { +bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) { bool has_something = false; u32 *meta = MemToMeta(p); u32 *end = MemToMeta(p + sz); @@ -87,8 +99,7 @@ bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz, bool reset) { DCHECK(idx & kFlagSync); SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask); u32 next = s->next; - if (reset) - s->Reset(); + s->Reset(proc); sync_alloc_.Free(&proc->sync_cache, idx & ~kFlagMask); idx = next; } else { @@ -105,30 +116,30 @@ bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz, bool reset) { // which can be huge. The function probes pages one-by-one until it finds a page // without meta objects, at this point it stops freeing meta objects. Because // thread stacks grow top-down, we do the same starting from end as well. -void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) { +void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) { if (SANITIZER_GO) { // UnmapOrDie/MmapFixedNoReserve does not work on Windows, // so we do the optimization only for C/C++. - FreeRange(proc, p, sz, reset); + FreeRange(proc, p, sz); return; } const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize; const uptr kPageSize = GetPageSizeCached() * kMetaRatio; if (sz <= 4 * kPageSize) { // If the range is small, just do the normal free procedure. - FreeRange(proc, p, sz, reset); + FreeRange(proc, p, sz); return; } // First, round both ends of the range to page size. uptr diff = RoundUp(p, kPageSize) - p; if (diff != 0) { - FreeRange(proc, p, diff, reset); + FreeRange(proc, p, diff); p += diff; sz -= diff; } diff = p + sz - RoundDown(p + sz, kPageSize); if (diff != 0) { - FreeRange(proc, p + sz - diff, diff, reset); + FreeRange(proc, p + sz - diff, diff); sz -= diff; } // Now we must have a non-empty page-aligned range. @@ -139,7 +150,7 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) { const uptr sz0 = sz; // Probe start of the range. for (uptr checked = 0; sz > 0; checked += kPageSize) { - bool has_something = FreeRange(proc, p, kPageSize, reset); + bool has_something = FreeRange(proc, p, kPageSize); p += kPageSize; sz -= kPageSize; if (!has_something && checked > (128 << 10)) @@ -147,7 +158,7 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) { } // Probe end of the range. for (uptr checked = 0; sz > 0; checked += kPageSize) { - bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize, reset); + bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize); sz -= kPageSize; // Stacks grow down, so sync object are most likely at the end of the region // (if it is a stack). The very end of the stack is TLS and tsan increases @@ -166,27 +177,6 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) { Die(); } -void MetaMap::ResetClocks() { - // This can be called from the background thread - // which does not have proc/cache. - // The cache is too large for stack. - static InternalAllocatorCache cache; - internal_memset(&cache, 0, sizeof(cache)); - internal_allocator()->InitCache(&cache); - sync_alloc_.ForEach([&](SyncVar *s) { - if (s->clock) { - InternalFree(s->clock, &cache); - s->clock = nullptr; - } - if (s->read_clock) { - InternalFree(s->read_clock, &cache); - s->read_clock = nullptr; - } - s->last_lock.Reset(); - }); - internal_allocator()->DestroyCache(&cache); -} - MBlock* MetaMap::GetBlock(uptr p) { u32 *meta = MemToMeta(p); u32 idx = *meta; @@ -203,7 +193,6 @@ MBlock* MetaMap::GetBlock(uptr p) { SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, bool save_stack) { - DCHECK(!create || thr->slot_locked); u32 *meta = MemToMeta(addr); u32 idx0 = *meta; u32 myidx = 0; @@ -214,7 +203,7 @@ SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask); if (LIKELY(s->addr == addr)) { if (UNLIKELY(myidx != 0)) { - mys->Reset(); + mys->Reset(thr->proc()); sync_alloc_.Free(&thr->proc()->sync_cache, myidx); } return s; @@ -229,9 +218,10 @@ SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, } if (LIKELY(myidx == 0)) { + const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed); myidx = sync_alloc_.Alloc(&thr->proc()->sync_cache); mys = sync_alloc_.Map(myidx); - mys->Init(thr, pc, addr, save_stack); + mys->Init(thr, pc, addr, uid, save_stack); } mys->next = idx0; if (atomic_compare_exchange_strong((atomic_uint32_t*)meta, &idx0, diff --git a/compiler-rt/lib/tsan/rtl/tsan_sync.h b/compiler-rt/lib/tsan/rtl/tsan_sync.h index 1e5f828349c69..fc8fa288a8418 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_sync.h +++ b/compiler-rt/lib/tsan/rtl/tsan_sync.h @@ -15,11 +15,9 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_deadlock_detector_interface.h" -#include "tsan_clock.h" #include "tsan_defs.h" +#include "tsan_clock.h" #include "tsan_dense_alloc.h" -#include "tsan_shadow.h" -#include "tsan_vector_clock.h" namespace __tsan { @@ -55,18 +53,34 @@ struct SyncVar { uptr addr; // overwritten by DenseSlabAlloc freelist Mutex mtx; + u64 uid; // Globally unique id. StackID creation_stack_id; Tid owner_tid; // Set only by exclusive owners. - FastState last_lock; + u64 last_lock; int recursion; atomic_uint32_t flags; u32 next; // in MetaMap DDMutex dd; - VectorClock *read_clock; // Used for rw mutexes only. - VectorClock *clock; + SyncClock read_clock; // Used for rw mutexes only. + // The clock is placed last, so that it is situated on a different cache line + // with the mtx. This reduces contention for hot sync objects. + SyncClock clock; - void Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack); - void Reset(); + void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, bool save_stack); + void Reset(Processor *proc); + + u64 GetId() const { + // 48 lsb is addr, then 14 bits is low part of uid, then 2 zero bits. + return GetLsb((u64)addr | (uid << 48), 60); + } + bool CheckId(u64 uid) const { + CHECK_EQ(uid, GetLsb(uid, 14)); + return GetLsb(this->uid, 14) == uid; + } + static uptr SplitId(u64 id, u64 *uid) { + *uid = id >> 48; + return (uptr)GetLsb(id, 48); + } bool IsFlagSet(u32 f) const { return atomic_load_relaxed(&flags) & f; @@ -96,20 +110,9 @@ class MetaMap { MetaMap(); void AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz); - - // FreeBlock resets all sync objects in the range if reset=true and must not - // run concurrently with ResetClocks which resets all sync objects - // w/o any synchronization (as part of DoReset). - // If we don't have a thread slot (very early/late in thread lifetime or - // Go/Java callbacks) or the slot is not locked, then reset must be set to - // false. In such case sync object clocks will be reset later (when it's - // reused or during the next ResetClocks). - uptr FreeBlock(Processor *proc, uptr p, bool reset); - bool FreeRange(Processor *proc, uptr p, uptr sz, bool reset); - void ResetRange(Processor *proc, uptr p, uptr sz, bool reset); - // Reset vector clocks of all sync objects. - // Must be called when no other threads access sync objects. - void ResetClocks(); + uptr FreeBlock(Processor *proc, uptr p); + bool FreeRange(Processor *proc, uptr p, uptr sz); + void ResetRange(Processor *proc, uptr p, uptr sz); MBlock* GetBlock(uptr p); SyncVar *GetSyncOrCreate(ThreadState *thr, uptr pc, uptr addr, @@ -139,6 +142,7 @@ class MetaMap { typedef DenseSlabAlloc SyncAlloc; BlockAlloc block_alloc_; SyncAlloc sync_alloc_; + atomic_uint64_t uid_gen_; SyncVar *GetSync(ThreadState *thr, uptr pc, uptr addr, bool create, bool save_stack); diff --git a/compiler-rt/lib/tsan/rtl/tsan_trace.h b/compiler-rt/lib/tsan/rtl/tsan_trace.h index 01bb7b34f43a2..ffc8c991ece0b 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_trace.h +++ b/compiler-rt/lib/tsan/rtl/tsan_trace.h @@ -19,6 +19,57 @@ namespace __tsan { +const int kTracePartSizeBits = 13; +const int kTracePartSize = 1 << kTracePartSizeBits; +const int kTraceParts = 2 * 1024 * 1024 / kTracePartSize; +const int kTraceSize = kTracePartSize * kTraceParts; + +// Must fit into 3 bits. +enum EventType { + EventTypeMop, + EventTypeFuncEnter, + EventTypeFuncExit, + EventTypeLock, + EventTypeUnlock, + EventTypeRLock, + EventTypeRUnlock +}; + +// Represents a thread event (from most significant bit): +// u64 typ : 3; // EventType. +// u64 addr : 61; // Associated pc. +typedef u64 Event; + +const uptr kEventPCBits = 61; + +struct TraceHeader { +#if !SANITIZER_GO + BufferedStackTrace stack0; // Start stack for the trace. +#else + VarSizeStackTrace stack0; +#endif + u64 epoch0; // Start epoch for the trace. + MutexSet mset0; + + TraceHeader() : stack0(), epoch0() {} +}; + +struct Trace { + Mutex mtx; +#if !SANITIZER_GO + // Must be last to catch overflow as paging fault. + // Go shadow stack is dynamically allocated. + uptr shadow_stack[kShadowStackSize]; +#endif + // Must be the last field, because we unmap the unused part in + // CreateThreadContext. + TraceHeader headers[kTraceParts]; + + Trace() : mtx(MutexTypeTrace) {} +}; + +namespace v3 { + enum class EventType : u64 { kAccessExt, kAccessRange, @@ -166,7 +217,6 @@ struct Trace; struct TraceHeader { Trace* trace = nullptr; // back-pointer to Trace containing this part INode trace_parts; // in Trace::parts - INode global; // in Contex::trace_part_recycle }; struct TracePart : TraceHeader { @@ -189,27 +239,14 @@ static_assert(sizeof(TracePart) == TracePart::kByteSize, "bad TracePart size"); struct Trace { Mutex mtx; IList parts; - // First node non-queued into ctx->trace_part_recycle. - TracePart* local_head; - // Final position in the last part for finished threads. - Event* final_pos = nullptr; - // Number of trace parts allocated on behalf of this trace specifically. - // Total number of parts in this trace can be larger if we retake some - // parts from other traces. - uptr parts_allocated = 0; + Event* final_pos = + nullptr; // final position in the last part for finished threads Trace() : mtx(MutexTypeTrace) {} - - // We need at least 3 parts per thread, because we want to keep at last - // 2 parts per thread that are not queued into ctx->trace_part_recycle - // (the current one being filled and one full part that ensures that - // we always have at least one part worth of previous memory accesses). - static constexpr uptr kMinParts = 3; - - static constexpr uptr kFinishedThreadLo = 16; - static constexpr uptr kFinishedThreadHi = 64; }; +} // namespace v3 + } // namespace __tsan #endif // TSAN_TRACE_H diff --git a/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc b/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc new file mode 100644 index 0000000000000..a58ef0f17efa1 --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc @@ -0,0 +1,59 @@ +//===-- tsan_update_shadow_word.inc -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer (TSan), a race detector. +// +// Body of the hottest inner loop. +// If we wrap this body into a function, compilers (both gcc and clang) +// produce sligtly less efficient code. +//===----------------------------------------------------------------------===// +do { + const unsigned kAccessSize = 1 << kAccessSizeLog; + u64 *sp = &shadow_mem[idx]; + old = LoadShadow(sp); + if (LIKELY(old.IsZero())) { + if (!stored) { + StoreIfNotYetStored(sp, &store_word); + stored = true; + } + break; + } + // is the memory access equal to the previous? + if (LIKELY(Shadow::Addr0AndSizeAreEqual(cur, old))) { + // same thread? + if (LIKELY(Shadow::TidsAreEqual(old, cur))) { + if (LIKELY(old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))) { + StoreIfNotYetStored(sp, &store_word); + stored = true; + } + break; + } + if (HappensBefore(old, thr)) { + if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) { + StoreIfNotYetStored(sp, &store_word); + stored = true; + } + break; + } + if (LIKELY(old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))) + break; + goto RACE; + } + // Do the memory access intersect? + if (Shadow::TwoRangesIntersect(old, cur, kAccessSize)) { + if (Shadow::TidsAreEqual(old, cur)) + break; + if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic)) + break; + if (LIKELY(HappensBefore(old, thr))) + break; + goto RACE; + } + // The accesses do not intersect. + break; +} while (0); diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_flags_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_flags_test.cpp index 46c7fee82d6c8..cb8ce91e9743e 100644 --- a/compiler-rt/lib/tsan/tests/unit/tsan_flags_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_flags_test.cpp @@ -98,7 +98,7 @@ void VerifyOptions1(Flags *f) { EXPECT_EQ(f->memory_limit_mb, 666); EXPECT_EQ(f->stop_on_start, 0); EXPECT_EQ(f->running_on_valgrind, 0); - EXPECT_EQ(f->history_size, (uptr)5); + EXPECT_EQ(f->history_size, 5); EXPECT_EQ(f->io_sync, 1); EXPECT_EQ(f->die_after_fork, true); } diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_shadow_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_shadow_test.cpp index ba49df7deda3c..890a12213bf3e 100644 --- a/compiler-rt/lib/tsan/tests/unit/tsan_shadow_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_shadow_test.cpp @@ -15,70 +15,34 @@ namespace __tsan { -void CheckShadow(const Shadow *s, Sid sid, Epoch epoch, uptr addr, uptr size, - AccessType typ) { - uptr addr1 = 0; - uptr size1 = 0; - AccessType typ1 = 0; - s->GetAccess(&addr1, &size1, &typ1); - CHECK_EQ(s->sid(), sid); - CHECK_EQ(s->epoch(), epoch); - CHECK_EQ(addr1, addr); - CHECK_EQ(size1, size); - CHECK_EQ(typ1, typ); -} - -TEST(Shadow, Shadow) { - Sid sid = static_cast(11); - Epoch epoch = static_cast(22); - FastState fs; - fs.SetSid(sid); - fs.SetEpoch(epoch); - CHECK_EQ(fs.sid(), sid); - CHECK_EQ(fs.epoch(), epoch); - CHECK_EQ(fs.GetIgnoreBit(), false); - fs.SetIgnoreBit(); - CHECK_EQ(fs.GetIgnoreBit(), true); - fs.ClearIgnoreBit(); - CHECK_EQ(fs.GetIgnoreBit(), false); - - Shadow s0(fs, 1, 2, kAccessWrite); - CheckShadow(&s0, sid, epoch, 1, 2, kAccessWrite); - Shadow s1(fs, 2, 3, kAccessRead); - CheckShadow(&s1, sid, epoch, 2, 3, kAccessRead); - Shadow s2(fs, 0xfffff8 + 4, 1, kAccessWrite | kAccessAtomic); - CheckShadow(&s2, sid, epoch, 4, 1, kAccessWrite | kAccessAtomic); - Shadow s3(fs, 0xfffff8 + 0, 8, kAccessRead | kAccessAtomic); - CheckShadow(&s3, sid, epoch, 0, 8, kAccessRead | kAccessAtomic); - - CHECK(!s0.IsBothReadsOrAtomic(kAccessRead | kAccessAtomic)); - CHECK(!s1.IsBothReadsOrAtomic(kAccessAtomic)); - CHECK(!s1.IsBothReadsOrAtomic(kAccessWrite)); - CHECK(s1.IsBothReadsOrAtomic(kAccessRead)); - CHECK(s2.IsBothReadsOrAtomic(kAccessAtomic)); - CHECK(!s2.IsBothReadsOrAtomic(kAccessWrite)); - CHECK(!s2.IsBothReadsOrAtomic(kAccessRead)); - CHECK(s3.IsBothReadsOrAtomic(kAccessAtomic)); - CHECK(!s3.IsBothReadsOrAtomic(kAccessWrite)); - CHECK(s3.IsBothReadsOrAtomic(kAccessRead)); - - CHECK(!s0.IsRWWeakerOrEqual(kAccessRead | kAccessAtomic)); - CHECK(s1.IsRWWeakerOrEqual(kAccessWrite)); - CHECK(s1.IsRWWeakerOrEqual(kAccessRead)); - CHECK(!s1.IsRWWeakerOrEqual(kAccessWrite | kAccessAtomic)); - - CHECK(!s2.IsRWWeakerOrEqual(kAccessRead | kAccessAtomic)); - CHECK(s2.IsRWWeakerOrEqual(kAccessWrite | kAccessAtomic)); - CHECK(s2.IsRWWeakerOrEqual(kAccessRead)); - CHECK(s2.IsRWWeakerOrEqual(kAccessWrite)); - - CHECK(s3.IsRWWeakerOrEqual(kAccessRead | kAccessAtomic)); - CHECK(s3.IsRWWeakerOrEqual(kAccessWrite | kAccessAtomic)); - CHECK(s3.IsRWWeakerOrEqual(kAccessRead)); - CHECK(s3.IsRWWeakerOrEqual(kAccessWrite)); - - Shadow sro(Shadow::kRodata); - CheckShadow(&sro, static_cast(0), kEpochZero, 0, 0, kAccessRead); +TEST(Shadow, FastState) { + Shadow s(FastState(11, 22)); + EXPECT_EQ(s.tid(), (u64)11); + EXPECT_EQ(s.epoch(), (u64)22); + EXPECT_EQ(s.GetIgnoreBit(), false); + EXPECT_EQ(s.GetFreedAndReset(), false); + EXPECT_EQ(s.GetHistorySize(), 0); + EXPECT_EQ(s.addr0(), (u64)0); + EXPECT_EQ(s.size(), (u64)1); + EXPECT_EQ(s.IsWrite(), true); + + s.IncrementEpoch(); + EXPECT_EQ(s.epoch(), (u64)23); + s.IncrementEpoch(); + EXPECT_EQ(s.epoch(), (u64)24); + + s.SetIgnoreBit(); + EXPECT_EQ(s.GetIgnoreBit(), true); + s.ClearIgnoreBit(); + EXPECT_EQ(s.GetIgnoreBit(), false); + + for (int i = 0; i < 8; i++) { + s.SetHistorySize(i); + EXPECT_EQ(s.GetHistorySize(), i); + } + s.SetHistorySize(2); + s.ClearHistorySize(); + EXPECT_EQ(s.GetHistorySize(), 0); } TEST(Shadow, Mapping) { diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp index ba3fbb35999f7..23590caa3019f 100644 --- a/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp @@ -18,7 +18,7 @@ namespace __tsan { template static void TestStackTrace(StackTraceTy *trace) { - ThreadState thr(kMainTid); + ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0); ObtainCurrentStack(&thr, 0, trace); EXPECT_EQ(0U, trace->size); @@ -43,7 +43,7 @@ static void TestStackTrace(StackTraceTy *trace) { template static void TestTrim(StackTraceTy *trace) { - ThreadState thr(kMainTid); + ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0); for (uptr i = 0; i < 2 * kStackTraceMax; ++i) *thr.shadow_stack_pos++ = 100 + i; diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_sync_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_sync_test.cpp index 87a28f2bc2b10..8e6c985905304 100644 --- a/compiler-rt/lib/tsan/tests/unit/tsan_sync_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_sync_test.cpp @@ -17,7 +17,6 @@ namespace __tsan { TEST(MetaMap, Basic) { ThreadState *thr = cur_thread(); - SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block[1] = {}; // fake malloc block m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64)); @@ -25,7 +24,7 @@ TEST(MetaMap, Basic) { CHECK_NE(mb, (MBlock *)0); CHECK_EQ(mb->siz, 1 * sizeof(u64)); CHECK_EQ(mb->tid, thr->tid); - uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0], true); + uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0]); CHECK_EQ(sz, 1 * sizeof(u64)); mb = m->GetBlock((uptr)&block[0]); CHECK_EQ(mb, (MBlock *)0); @@ -33,7 +32,6 @@ TEST(MetaMap, Basic) { TEST(MetaMap, FreeRange) { ThreadState *thr = cur_thread(); - SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block[4] = {}; // fake malloc block m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64)); @@ -42,7 +40,7 @@ TEST(MetaMap, FreeRange) { CHECK_EQ(mb1->siz, 1 * sizeof(u64)); MBlock *mb2 = m->GetBlock((uptr)&block[1]); CHECK_EQ(mb2->siz, 3 * sizeof(u64)); - m->FreeRange(thr->proc(), (uptr)&block[0], 4 * sizeof(u64), true); + m->FreeRange(thr->proc(), (uptr)&block[0], 4 * sizeof(u64)); mb1 = m->GetBlock((uptr)&block[0]); CHECK_EQ(mb1, (MBlock *)0); mb2 = m->GetBlock((uptr)&block[1]); @@ -54,7 +52,6 @@ TEST(MetaMap, Sync) { // them from detecting that we exit runtime with mutexes held. ScopedIgnoreInterceptors ignore; ThreadState *thr = cur_thread(); - SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block[4] = {}; // fake malloc block m->AllocBlock(thr, 0, (uptr)&block[0], 4 * sizeof(u64)); @@ -66,7 +63,7 @@ TEST(MetaMap, Sync) { SyncVar *s2 = m->GetSyncOrCreate(thr, 0, (uptr)&block[1], false); CHECK_NE(s2, (SyncVar *)0); CHECK_EQ(s2->addr, (uptr)&block[1]); - m->FreeBlock(thr->proc(), (uptr)&block[0], true); + m->FreeBlock(thr->proc(), (uptr)&block[0]); s1 = m->GetSyncIfExists((uptr)&block[0]); CHECK_EQ(s1, (SyncVar *)0); s2 = m->GetSyncIfExists((uptr)&block[1]); @@ -77,7 +74,6 @@ TEST(MetaMap, Sync) { TEST(MetaMap, MoveMemory) { ScopedIgnoreInterceptors ignore; ThreadState *thr = cur_thread(); - SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block1[4] = {}; // fake malloc block u64 block2[4] = {}; // fake malloc block @@ -106,19 +102,18 @@ TEST(MetaMap, MoveMemory) { s2 = m->GetSyncIfExists((uptr)&block2[1]); CHECK_NE(s2, (SyncVar *)0); CHECK_EQ(s2->addr, (uptr)&block2[1]); - m->FreeRange(thr->proc(), (uptr)&block2[0], 4 * sizeof(u64), true); + m->FreeRange(thr->proc(), (uptr)&block2[0], 4 * sizeof(u64)); } TEST(MetaMap, ResetSync) { ScopedIgnoreInterceptors ignore; ThreadState *thr = cur_thread(); - SlotLocker locker(thr); MetaMap *m = &ctx->metamap; u64 block[1] = {}; // fake malloc block m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64)); SyncVar *s = m->GetSyncOrCreate(thr, 0, (uptr)&block[0], false); - s->Reset(); - uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0], true); + s->Reset(thr->proc()); + uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0]); CHECK_EQ(sz, 1 * sizeof(u64)); } diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp index 13c03353e70e1..c2e852d941c04 100644 --- a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp +++ b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp @@ -31,6 +31,8 @@ namespace __tsan { +using namespace v3; + // We need to run all trace tests in a new thread, // so that the thread trace is empty initially. template @@ -76,30 +78,27 @@ TRACE_TEST(Trace, RestoreAccess) { ThreadArray<1> thr; TraceFunc(thr, 0x1000); TraceFunc(thr, 0x1001); - TraceMutexLock(thr, EventType::kLock, 0x4000, 0x5000, 0x6000); - TraceMutexLock(thr, EventType::kLock, 0x4001, 0x5001, 0x6001); + TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); + TraceMutexLock(thr, v3::EventType::kLock, 0x4001, 0x5001, 0x6001); TraceMutexUnlock(thr, 0x5000); TraceFunc(thr); CHECK(TryTraceMemoryAccess(thr, 0x2001, 0x3001, 8, kAccessRead)); - TraceMutexLock(thr, EventType::kRLock, 0x4002, 0x5002, 0x6002); + TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5002, 0x6002); TraceFunc(thr, 0x1002); CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, 8, kAccessRead)); // This is the access we want to find. // The previous one is equivalent, but RestoreStack must prefer // the last of the matchig accesses. CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead)); - SlotPairLocker locker(thr, thr->fast_state.sid()); - ThreadRegistryLock lock1(&ctx->thread_registry); - Lock lock2(&ctx->slot_mtx); - Tid tid = kInvalidTid; + Lock lock1(&ctx->slot_mtx); + ThreadRegistryLock lock2(&ctx->thread_registry); VarSizeStackTrace stk; MutexSet mset; uptr tag = kExternalTagNone; - bool res = RestoreStack(EventType::kAccessExt, thr->fast_state.sid(), - thr->fast_state.epoch(), 0x3000, 8, kAccessRead, &tid, - &stk, &mset, &tag); + bool res = + RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch, + 0x3000, 8, kAccessRead, &stk, &mset, &tag); CHECK(res); - CHECK_EQ(tid, thr->tid); CHECK_EQ(stk.size, 3); CHECK_EQ(stk.trace[0], 0x1000); CHECK_EQ(stk.trace[1], 0x1002); @@ -148,17 +147,14 @@ TRACE_TEST(Trace, MemoryAccessSize) { kAccessRead); break; } - SlotPairLocker locker(thr, thr->fast_state.sid()); - ThreadRegistryLock lock1(&ctx->thread_registry); - Lock lock2(&ctx->slot_mtx); - Tid tid = kInvalidTid; + Lock lock1(&ctx->slot_mtx); + ThreadRegistryLock lock2(&ctx->thread_registry); VarSizeStackTrace stk; MutexSet mset; uptr tag = kExternalTagNone; - bool res = - RestoreStack(EventType::kAccessExt, thr->fast_state.sid(), - thr->fast_state.epoch(), 0x3000 + params.offset, - params.size, kAccessRead, &tid, &stk, &mset, &tag); + bool res = RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, + thr->epoch, 0x3000 + params.offset, params.size, + kAccessRead, &stk, &mset, &tag); CHECK_EQ(res, params.res); if (params.res) { CHECK_EQ(stk.size, 2); @@ -173,19 +169,16 @@ TRACE_TEST(Trace, RestoreMutexLock) { // Check of restoration of a mutex lock event. ThreadArray<1> thr; TraceFunc(thr, 0x1000); - TraceMutexLock(thr, EventType::kLock, 0x4000, 0x5000, 0x6000); - TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001); - TraceMutexLock(thr, EventType::kRLock, 0x4002, 0x5001, 0x6002); - SlotPairLocker locker(thr, thr->fast_state.sid()); - ThreadRegistryLock lock1(&ctx->thread_registry); - Lock lock2(&ctx->slot_mtx); - Tid tid = kInvalidTid; + TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); + TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001); + TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5001, 0x6002); + Lock lock1(&ctx->slot_mtx); + ThreadRegistryLock lock2(&ctx->thread_registry); VarSizeStackTrace stk; MutexSet mset; uptr tag = kExternalTagNone; - bool res = RestoreStack(EventType::kLock, thr->fast_state.sid(), - thr->fast_state.epoch(), 0x5001, 0, 0, &tid, &stk, - &mset, &tag); + bool res = RestoreStack(thr->tid, v3::EventType::kLock, thr->sid, thr->epoch, + 0x5001, 0, 0, &stk, &mset, &tag); CHECK(res); CHECK_EQ(stk.size, 2); CHECK_EQ(stk.trace[0], 0x1000); @@ -202,35 +195,28 @@ TRACE_TEST(Trace, RestoreMutexLock) { TRACE_TEST(Trace, MultiPart) { // Check replay of a trace with multiple parts. ThreadArray<1> thr; - FuncEntry(thr, 0x1000); - FuncEntry(thr, 0x2000); - MutexPreLock(thr, 0x4000, 0x5000, 0); - MutexPostLock(thr, 0x4000, 0x5000, 0); - MutexPreLock(thr, 0x4000, 0x5000, 0); - MutexPostLock(thr, 0x4000, 0x5000, 0); - const uptr kEvents = 3 * sizeof(TracePart) / sizeof(Event); + TraceFunc(thr, 0x1000); + TraceFunc(thr, 0x2000); + TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000); + const uptr kEvents = 3 * sizeof(TracePart) / sizeof(v3::Event); for (uptr i = 0; i < kEvents; i++) { - FuncEntry(thr, 0x3000); - MutexPreLock(thr, 0x4002, 0x5002, 0); - MutexPostLock(thr, 0x4002, 0x5002, 0); - MutexUnlock(thr, 0x4003, 0x5002, 0); - FuncExit(thr); + TraceFunc(thr, 0x3000); + TraceMutexLock(thr, v3::EventType::kLock, 0x4002, 0x5002, 0x6002); + TraceMutexUnlock(thr, 0x5002); + TraceFunc(thr); } - FuncEntry(thr, 0x4000); - TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001); + TraceFunc(thr, 0x4000); + TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001); CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead)); - SlotPairLocker locker(thr, thr->fast_state.sid()); - ThreadRegistryLock lock1(&ctx->thread_registry); - Lock lock2(&ctx->slot_mtx); - Tid tid = kInvalidTid; + Lock lock1(&ctx->slot_mtx); + ThreadRegistryLock lock2(&ctx->thread_registry); VarSizeStackTrace stk; MutexSet mset; uptr tag = kExternalTagNone; - bool res = RestoreStack(EventType::kAccessExt, thr->fast_state.sid(), - thr->fast_state.epoch(), 0x3000, 8, kAccessRead, &tid, - &stk, &mset, &tag); + bool res = + RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch, + 0x3000, 8, kAccessRead, &stk, &mset, &tag); CHECK(res); - CHECK_EQ(tid, thr->tid); CHECK_EQ(stk.size, 4); CHECK_EQ(stk.trace[0], 0x1000); CHECK_EQ(stk.trace[1], 0x2000); @@ -238,94 +224,11 @@ TRACE_TEST(Trace, MultiPart) { CHECK_EQ(stk.trace[3], 0x2002); CHECK_EQ(mset.Size(), 2); CHECK_EQ(mset.Get(0).addr, 0x5000); + CHECK_EQ(mset.Get(0).stack_id, 0x6000); CHECK_EQ(mset.Get(0).write, true); - CHECK_EQ(mset.Get(0).count, 2); CHECK_EQ(mset.Get(1).addr, 0x5001); + CHECK_EQ(mset.Get(1).stack_id, 0x6001); CHECK_EQ(mset.Get(1).write, false); - CHECK_EQ(mset.Get(1).count, 1); -} - -void CheckTraceState(uptr count, uptr finished, uptr excess, uptr recycle) { - Lock l(&ctx->slot_mtx); - Printf("CheckTraceState(%zu/%zu, %zu/%zu, %zu/%zu, %zu/%zu)\n", - ctx->trace_part_total_allocated, count, - ctx->trace_part_recycle_finished, finished, - ctx->trace_part_finished_excess, excess, - ctx->trace_part_recycle.Size(), recycle); - CHECK_EQ(ctx->trace_part_total_allocated, count); - CHECK_EQ(ctx->trace_part_recycle_finished, finished); - CHECK_EQ(ctx->trace_part_finished_excess, excess); - CHECK_EQ(ctx->trace_part_recycle.Size(), recycle); -} - -TRACE_TEST(TraceAlloc, SingleThread) { - TraceResetForTesting(); - auto check_thread = [&](ThreadState *thr, uptr size, uptr count, - uptr finished, uptr excess, uptr recycle) { - CHECK_EQ(thr->tctx->trace.parts.Size(), size); - CheckTraceState(count, finished, excess, recycle); - }; - ThreadArray<2> threads; - check_thread(threads[0], 0, 0, 0, 0, 0); - TraceSwitchPartImpl(threads[0]); - check_thread(threads[0], 1, 1, 0, 0, 0); - TraceSwitchPartImpl(threads[0]); - check_thread(threads[0], 2, 2, 0, 0, 0); - TraceSwitchPartImpl(threads[0]); - check_thread(threads[0], 3, 3, 0, 0, 1); - TraceSwitchPartImpl(threads[0]); - check_thread(threads[0], 3, 3, 0, 0, 1); - threads.Finish(0); - CheckTraceState(3, 3, 0, 3); - threads.Finish(1); - CheckTraceState(3, 3, 0, 3); -} - -TRACE_TEST(TraceAlloc, FinishedThreadReuse) { - TraceResetForTesting(); - constexpr uptr Hi = Trace::kFinishedThreadHi; - constexpr uptr kThreads = 4 * Hi; - ThreadArray threads; - for (uptr i = 0; i < kThreads; i++) { - Printf("thread %zu\n", i); - TraceSwitchPartImpl(threads[i]); - if (i <= Hi) - CheckTraceState(i + 1, i, 0, i); - else if (i <= 2 * Hi) - CheckTraceState(Hi + 1, Hi, i - Hi, Hi); - else - CheckTraceState(Hi + 1, Hi, Hi, Hi); - threads.Finish(i); - if (i < Hi) - CheckTraceState(i + 1, i + 1, 0, i + 1); - else if (i < 2 * Hi) - CheckTraceState(Hi + 1, Hi + 1, i - Hi + 1, Hi + 1); - else - CheckTraceState(Hi + 1, Hi + 1, Hi + 1, Hi + 1); - } -} - -TRACE_TEST(TraceAlloc, FinishedThreadReuse2) { - TraceResetForTesting(); - // constexpr uptr Lo = Trace::kFinishedThreadLo; - // constexpr uptr Hi = Trace::kFinishedThreadHi; - constexpr uptr Min = Trace::kMinParts; - constexpr uptr kThreads = 10; - constexpr uptr kParts = 2 * Min; - ThreadArray threads; - for (uptr i = 0; i < kThreads; i++) { - Printf("thread %zu\n", i); - for (uptr j = 0; j < kParts; j++) TraceSwitchPartImpl(threads[i]); - if (i == 0) - CheckTraceState(Min, 0, 0, 1); - else - CheckTraceState(2 * Min, 0, Min, Min + 1); - threads.Finish(i); - if (i == 0) - CheckTraceState(Min, Min, 0, Min); - else - CheckTraceState(2 * Min, 2 * Min, Min, 2 * Min); - } } } // namespace __tsan diff --git a/compiler-rt/test/tsan/free_race2.c b/compiler-rt/test/tsan/free_race2.c index ddba22c63701f..a2137a7cdc709 100644 --- a/compiler-rt/test/tsan/free_race2.c +++ b/compiler-rt/test/tsan/free_race2.c @@ -28,7 +28,7 @@ int main() { } // CHECK: WARNING: ThreadSanitizer: heap-use-after-free -// CHECK: Write of size {{.*}} at {{.*}} by main thread: +// CHECK: Write of size 8 at {{.*}} by main thread: // CHECK: #0 bar // CHECK: #1 main // CHECK: Previous write of size 8 at {{.*}} by main thread: diff --git a/compiler-rt/test/tsan/memcmp_race.cpp b/compiler-rt/test/tsan/memcmp_race.cpp index 911c33524dd62..40b11a77b364c 100644 --- a/compiler-rt/test/tsan/memcmp_race.cpp +++ b/compiler-rt/test/tsan/memcmp_race.cpp @@ -34,7 +34,7 @@ int main() { // CHECK: addr=[[ADDR:0x[0-9,a-f]+]] // CHECK: WARNING: ThreadSanitizer: data race -// CHECK: Write of size 3 at [[ADDR]] by thread T2: +// CHECK: Write of size 1 at [[ADDR]] by thread T2: // CHECK: #0 {{(memcpy|memmove)}} // CHECK: #{{[12]}} Thread2 // CHECK: Previous read of size 1 at [[ADDR]] by thread T1: diff --git a/compiler-rt/test/tsan/memcpy_race.cpp b/compiler-rt/test/tsan/memcpy_race.cpp index cfdec7cd642f6..09b2a319e2056 100644 --- a/compiler-rt/test/tsan/memcpy_race.cpp +++ b/compiler-rt/test/tsan/memcpy_race.cpp @@ -22,8 +22,7 @@ void *Thread2(void *x) { int main() { barrier_init(&barrier, 2); - print_address("addr1=", 1, &data[3]); - print_address("addr2=", 1, &data[5]); + print_address("addr=", 1, &data[5]); pthread_t t[2]; pthread_create(&t[0], NULL, Thread1, NULL); pthread_create(&t[1], NULL, Thread2, NULL); @@ -32,12 +31,11 @@ int main() { return 0; } -// CHECK: addr1=[[ADDR1:0x[0-9,a-f]+]] -// CHECK: addr2=[[ADDR2:0x[0-9,a-f]+]] +// CHECK: addr=[[ADDR:0x[0-9,a-f]+]] // CHECK: WARNING: ThreadSanitizer: data race -// CHECK: Write of size 4 at [[ADDR1]] by thread T2: +// CHECK: Write of size 1 at [[ADDR]] by thread T2: // CHECK: #0 {{(memcpy|memmove)}} // CHECK: #{{[12]}} Thread2 -// CHECK: Previous write of size 1 at [[ADDR2]] by thread T1: +// CHECK: Previous write of size 1 at [[ADDR]] by thread T1: // CHECK: #0 {{(memcpy|memmove)}} // CHECK: #{{[12]}} Thread1 diff --git a/compiler-rt/test/tsan/mutexset7.cpp b/compiler-rt/test/tsan/mutexset7.cpp index 5b4c7b9bb38cb..d3729659717e7 100644 --- a/compiler-rt/test/tsan/mutexset7.cpp +++ b/compiler-rt/test/tsan/mutexset7.cpp @@ -36,6 +36,6 @@ int main() { // CHECK: Write of size 4 at {{.*}} by thread T1: // CHECK: Previous write of size 4 at {{.*}} by thread T2 // CHECK: (mutexes: write [[M1:M[0-9]+]]): -// CHECK: Mutex [[M1]] (0x{{.*}}) created at: -// CHECK: #0 pthread_mutex_init -// CHECK: #1 Thread2 +// CHECK: Mutex [[M1]] is already destroyed +// CHECK-NOT: Mutex {{.*}} created at +