54 changes: 53 additions & 1 deletion compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,57 @@ namespace __tsan {
MutexSet::MutexSet() {
}

void MutexSet::Reset() { internal_memset(this, 0, sizeof(*this)); }
void MutexSet::Add(u64 id, bool write, u64 epoch) {
// Look up existing mutex with the same id.
for (uptr i = 0; i < size_; i++) {
if (descs_[i].id == id) {
descs_[i].count++;
descs_[i].epoch = epoch;
return;
}
}
// On overflow, find the oldest mutex and drop it.
if (size_ == kMaxSize) {
u64 minepoch = (u64)-1;
u64 mini = (u64)-1;
for (uptr i = 0; i < size_; i++) {
if (descs_[i].epoch < minepoch) {
minepoch = descs_[i].epoch;
mini = i;
}
}
RemovePos(mini);
CHECK_EQ(size_, kMaxSize - 1);
}
// Add new mutex descriptor.
descs_[size_].addr = 0;
descs_[size_].stack_id = kInvalidStackID;
descs_[size_].id = id;
descs_[size_].write = write;
descs_[size_].epoch = epoch;
descs_[size_].seq = seq_++;
descs_[size_].count = 1;
size_++;
}

void MutexSet::Del(u64 id, bool write) {
for (uptr i = 0; i < size_; i++) {
if (descs_[i].id == id) {
if (--descs_[i].count == 0)
RemovePos(i);
return;
}
}
}

void MutexSet::Remove(u64 id) {
for (uptr i = 0; i < size_; i++) {
if (descs_[i].id == id) {
RemovePos(i);
return;
}
}
}

void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {
// Look up existing mutex with the same id.
Expand All @@ -43,7 +93,9 @@ void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {
// Add new mutex descriptor.
descs_[size_].addr = addr;
descs_[size_].stack_id = stack_id;
descs_[size_].id = 0;
descs_[size_].write = write;
descs_[size_].epoch = 0;
descs_[size_].seq = seq_++;
descs_[size_].count = 1;
size_++;
Expand Down
11 changes: 9 additions & 2 deletions compiler-rt/lib/tsan/rtl/tsan_mutexset.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ class MutexSet {
struct Desc {
uptr addr;
StackID stack_id;
u64 id;
u64 epoch;
u32 seq;
u32 count;
bool write;
Expand All @@ -38,7 +40,10 @@ class MutexSet {
};

MutexSet();
void Reset();
// The 'id' is obtained from SyncVar::GetId().
void Add(u64 id, bool write, u64 epoch);
void Del(u64 id, bool write);
void Remove(u64 id); // Removes the mutex completely (if it's destroyed).
void AddAddr(uptr addr, StackID stack_id, bool write);
void DelAddr(uptr addr, bool destroy = false);
uptr Size() const;
Expand Down Expand Up @@ -77,7 +82,9 @@ class DynamicMutexSet {
// in different goroutine).
#if SANITIZER_GO
MutexSet::MutexSet() {}
void MutexSet::Reset() {}
void MutexSet::Add(u64 id, bool write, u64 epoch) {}
void MutexSet::Del(u64 id, bool write) {}
void MutexSet::Remove(u64 id) {}
void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {}
void MutexSet::DelAddr(uptr addr, bool destroy) {}
uptr MutexSet::Size() const { return 0; }
Expand Down
173 changes: 140 additions & 33 deletions compiler-rt/lib/tsan/rtl/tsan_platform.h

Large diffs are not rendered by default.

42 changes: 27 additions & 15 deletions compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ enum {
MemMeta,
MemFile,
MemMmap,
MemTrace,
MemHeap,
MemOther,
MemCount,
Expand All @@ -111,6 +112,8 @@ void FillProfileCallback(uptr p, uptr rss, bool file, uptr *mem) {
mem[file ? MemFile : MemMmap] += rss;
else if (p >= HeapMemBeg() && p < HeapMemEnd())
mem[MemHeap] += rss;
else if (p >= TraceMemBeg() && p < TraceMemEnd())
mem[MemTrace] += rss;
else
mem[MemOther] += rss;
}
Expand All @@ -123,29 +126,38 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
StackDepotStats stacks = StackDepotGetStats();
uptr nthread, nlive;
ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive);
uptr trace_mem;
{
Lock l(&ctx->slot_mtx);
trace_mem = ctx->trace_part_total_allocated * sizeof(TracePart);
}
uptr internal_stats[AllocatorStatCount];
internal_allocator()->GetStats(internal_stats);
// All these are allocated from the common mmap region.
mem[MemMmap] -= meta.mem_block + meta.sync_obj + trace_mem +
stacks.allocated + internal_stats[AllocatorStatMapped];
mem[MemMmap] -= meta.mem_block + meta.sync_obj + stacks.allocated +
internal_stats[AllocatorStatMapped];
if (s64(mem[MemMmap]) < 0)
mem[MemMmap] = 0;
internal_snprintf(
buf, buf_size,
"==%zu== %llus [%zu]: RSS %zd MB: shadow:%zd meta:%zd file:%zd"
" mmap:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu"
" trace:%zu stacks=%zd threads=%zu/%zu\n",
internal_getpid(), uptime_ns / (1000 * 1000 * 1000), ctx->global_epoch,
mem[MemTotal] >> 20, mem[MemShadow] >> 20, mem[MemMeta] >> 20,
mem[MemFile] >> 20, mem[MemMmap] >> 20, mem[MemHeap] >> 20,
"%llus: RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd"
" trace:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu"
" stacks=%zd[%zd] nthr=%zd/%zd\n",
uptime_ns / (1000 * 1000 * 1000), mem[MemTotal] >> 20,
mem[MemShadow] >> 20, mem[MemMeta] >> 20, mem[MemFile] >> 20,
mem[MemMmap] >> 20, mem[MemTrace] >> 20, mem[MemHeap] >> 20,
mem[MemOther] >> 20, internal_stats[AllocatorStatMapped] >> 20,
meta.mem_block >> 20, meta.sync_obj >> 20, trace_mem >> 20,
stacks.allocated >> 20, nlive, nthread);
meta.mem_block >> 20, meta.sync_obj >> 20, stacks.allocated >> 20,
stacks.n_uniq_ids, nlive, nthread);
}

# if SANITIZER_LINUX
void FlushShadowMemoryCallback(
const SuspendedThreadsList &suspended_threads_list,
void *argument) {
ReleaseMemoryPagesToOS(ShadowBeg(), ShadowEnd());
}
#endif

void FlushShadowMemory() {
#if SANITIZER_LINUX
StopTheWorld(FlushShadowMemoryCallback, 0);
#endif
}

#if !SANITIZER_GO
Expand Down
3 changes: 3 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ void cur_thread_finalize() {
}
#endif

void FlushShadowMemory() {
}

static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) {
vm_address_t address = start;
vm_address_t end_address = end;
Expand Down
8 changes: 6 additions & 2 deletions compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,14 @@ void CheckAndProtect() {
ProtectRange(ShadowEnd(), MetaShadowBeg());
if (MidAppMemBeg()) {
ProtectRange(MetaShadowEnd(), MidAppMemBeg());
ProtectRange(MidAppMemEnd(), HeapMemBeg());
ProtectRange(MidAppMemEnd(), TraceMemBeg());
} else {
ProtectRange(MetaShadowEnd(), HeapMemBeg());
ProtectRange(MetaShadowEnd(), TraceMemBeg());
}
// Memory for traces is mapped lazily in MapThreadTrace.
// Protect the whole range for now, so that user does not map something here.
ProtectRange(TraceMemBeg(), TraceMemEnd());
ProtectRange(TraceMemEnd(), HeapMemBeg());
ProtectRange(HeapEnd(), HiAppMemBeg());
#endif

Expand Down
3 changes: 3 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

namespace __tsan {

void FlushShadowMemory() {
}

void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {}

void InitializePlatformEarly() {
Expand Down
26 changes: 16 additions & 10 deletions compiler-rt/lib/tsan/rtl/tsan_report.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ static void PrintMutexSet(Vector<ReportMopMutex> const& mset) {
if (i == 0)
Printf(" (mutexes:");
const ReportMopMutex m = mset[i];
Printf(" %s M%u", m.write ? "write" : "read", m.id);
Printf(" %s M%llu", m.write ? "write" : "read", m.id);
Printf(i == mset.Size() - 1 ? ")" : ",");
}
}
Expand Down Expand Up @@ -211,23 +211,29 @@ static void PrintLocation(const ReportLocation *loc) {

static void PrintMutexShort(const ReportMutex *rm, const char *after) {
Decorator d;
Printf("%sM%d%s%s", d.Mutex(), rm->id, d.Default(), after);
Printf("%sM%lld%s%s", d.Mutex(), rm->id, d.Default(), after);
}

static void PrintMutexShortWithAddress(const ReportMutex *rm,
const char *after) {
Decorator d;
Printf("%sM%d (%p)%s%s", d.Mutex(), rm->id,
Printf("%sM%lld (%p)%s%s", d.Mutex(), rm->id,
reinterpret_cast<void *>(rm->addr), d.Default(), after);
}

static void PrintMutex(const ReportMutex *rm) {
Decorator d;
Printf("%s", d.Mutex());
Printf(" Mutex M%u (%p) created at:\n", rm->id,
reinterpret_cast<void *>(rm->addr));
Printf("%s", d.Default());
PrintStack(rm->stack);
if (rm->destroyed) {
Printf("%s", d.Mutex());
Printf(" Mutex M%llu is already destroyed.\n\n", rm->id);
Printf("%s", d.Default());
} else {
Printf("%s", d.Mutex());
Printf(" Mutex M%llu (%p) created at:\n", rm->id,
reinterpret_cast<void *>(rm->addr));
Printf("%s", d.Default());
PrintStack(rm->stack);
}
}

static void PrintThread(const ReportThread *rt) {
Expand Down Expand Up @@ -454,12 +460,12 @@ void PrintReport(const ReportDesc *rep) {
} else if (rep->typ == ReportTypeDeadlock) {
Printf("WARNING: DEADLOCK\n");
for (uptr i = 0; i < rep->mutexes.Size(); i++) {
Printf("Goroutine %d lock mutex %u while holding mutex %u:\n", 999,
Printf("Goroutine %d lock mutex %llu while holding mutex %llu:\n", 999,
rep->mutexes[i]->id,
rep->mutexes[(i + 1) % rep->mutexes.Size()]->id);
PrintStack(rep->stacks[2*i]);
Printf("\n");
Printf("Mutex %u was previously locked here:\n",
Printf("Mutex %llu was previously locked here:\n",
rep->mutexes[(i + 1) % rep->mutexes.Size()]->id);
PrintStack(rep->stacks[2*i + 1]);
Printf("\n");
Expand Down
5 changes: 3 additions & 2 deletions compiler-rt/lib/tsan/rtl/tsan_report.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ struct ReportStack {
};

struct ReportMopMutex {
int id;
u64 id;
bool write;
};

Expand Down Expand Up @@ -91,8 +91,9 @@ struct ReportThread {
};

struct ReportMutex {
int id;
u64 id;
uptr addr;
bool destroyed;
ReportStack *stack;
};

Expand Down
650 changes: 200 additions & 450 deletions compiler-rt/lib/tsan/rtl/tsan_rtl.cpp

Large diffs are not rendered by default.

317 changes: 156 additions & 161 deletions compiler-rt/lib/tsan/rtl/tsan_rtl.h

Large diffs are not rendered by default.

859 changes: 377 additions & 482 deletions compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp

Large diffs are not rendered by default.

642 changes: 311 additions & 331 deletions compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ void ProcDestroy(Processor *proc) {
#if !SANITIZER_GO
AllocatorProcFinish(proc);
#endif
ctx->clock_alloc.FlushCache(&proc->clock_cache);
ctx->metamap.OnProcIdle(proc);
if (common_flags()->detect_deadlocks)
ctx->dd->DestroyPhysicalThread(proc->dd_pt);
Expand Down
368 changes: 232 additions & 136 deletions compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp

Large diffs are not rendered by default.

191 changes: 85 additions & 106 deletions compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,20 @@ namespace __tsan {

// ThreadContext implementation.

ThreadContext::ThreadContext(Tid tid) : ThreadContextBase(tid), thr(), sync() {}
ThreadContext::ThreadContext(Tid tid)
: ThreadContextBase(tid), thr(), sync(), epoch0(), epoch1() {}

#if !SANITIZER_GO
ThreadContext::~ThreadContext() {
}
#endif

void ThreadContext::OnReset() { CHECK(!sync); }
void ThreadContext::OnReset() {
CHECK_EQ(sync.size(), 0);
uptr trace_p = GetThreadTrace(tid);
ReleaseMemoryPagesToOS(trace_p, trace_p + TraceSize() * sizeof(Event));
//!!! ReleaseMemoryToOS(GetThreadTraceHeader(tid), sizeof(Trace));
}

#if !SANITIZER_GO
struct ThreadLeak {
Expand Down Expand Up @@ -106,35 +112,30 @@ int ThreadCount(ThreadState *thr) {
}

struct OnCreatedArgs {
VectorClock *sync;
uptr sync_epoch;
StackID stack;
ThreadState *thr;
uptr pc;
};

Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) {
// The main thread and GCD workers don't have a parent thread.
Tid parent = kInvalidTid;
OnCreatedArgs arg = {nullptr, 0, kInvalidStackID};
if (thr) {
parent = thr->tid;
arg.stack = CurrentStackId(thr, pc);
if (!thr->ignore_sync) {
SlotLocker locker(thr);
thr->clock.ReleaseStore(&arg.sync);
arg.sync_epoch = ctx->global_epoch;
IncrementEpoch(thr);
}
}
Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent, &arg);
DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent, tid, uid);
OnCreatedArgs args = { thr, pc };
u32 parent_tid = thr ? thr->tid : kInvalidTid; // No parent for GCD workers.
Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent_tid, &args);
DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent_tid, tid, uid);
return tid;
}

void ThreadContext::OnCreated(void *arg) {
thr = 0;
if (tid == kMainTid)
return;
OnCreatedArgs *args = static_cast<OnCreatedArgs *>(arg);
sync = args->sync;
sync_epoch = args->sync_epoch;
creation_stack_id = args->stack;
if (!args->thr) // GCD workers don't have a parent thread.
return;
args->thr->fast_state.IncrementEpoch();
// Can't increment epoch w/o writing to the trace as well.
TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0);
ReleaseImpl(args->thr, 0, &sync);
creation_stack_id = CurrentStackId(args->thr, args->pc);
}

extern "C" void __tsan_stack_initialization() {}
Expand All @@ -149,15 +150,6 @@ struct OnStartedArgs {

void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
ThreadType thread_type) {
ctx->thread_registry.StartThread(tid, os_id, thread_type, thr);
if (!thr->ignore_sync) {
SlotAttachAndLock(thr);
if (thr->tctx->sync_epoch == ctx->global_epoch)
thr->clock.Acquire(thr->tctx->sync);
SlotUnlock(thr);
}
Free(thr->tctx->sync);

uptr stk_addr = 0;
uptr stk_size = 0;
uptr tls_addr = 0;
Expand All @@ -167,10 +159,12 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_size, &tls_addr,
&tls_size);
#endif
thr->stk_addr = stk_addr;
thr->stk_size = stk_size;
thr->tls_addr = tls_addr;
thr->tls_size = tls_size;

ThreadRegistry *tr = &ctx->thread_registry;
OnStartedArgs args = { thr, stk_addr, stk_size, tls_addr, tls_size };
tr->StartThread(tid, os_id, thread_type, &args);

while (!thr->tctx->trace.parts.Empty()) thr->tctx->trace.parts.PopBack();

#if !SANITIZER_GO
if (ctx->after_multithreaded_fork) {
Expand Down Expand Up @@ -198,76 +192,69 @@ void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
}

void ThreadContext::OnStarted(void *arg) {
thr = static_cast<ThreadState *>(arg);
DPrintf("#%d: ThreadStart\n", tid);
new (thr) ThreadState(tid);
OnStartedArgs *args = static_cast<OnStartedArgs *>(arg);
thr = args->thr;
// RoundUp so that one trace part does not contain events
// from different threads.
epoch0 = RoundUp(epoch1 + 1, kTracePartSize);
epoch1 = (u64)-1;
new (thr)
ThreadState(ctx, tid, unique_id, epoch0, reuse_count, args->stk_addr,
args->stk_size, args->tls_addr, args->tls_size);
if (common_flags()->detect_deadlocks)
thr->dd_lt = ctx->dd->CreateLogicalThread(tid);
thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id);
thr->fast_state.SetHistorySize(flags()->history_size);
// Commit switch to the new part of the trace.
// TraceAddEvent will reset stack0/mset0 in the new part for us.
TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);

thr->fast_synch_epoch = epoch0;
AcquireImpl(thr, 0, &sync);
sync.Reset(&thr->proc()->clock_cache);
thr->tctx = this;
#if !SANITIZER_GO
thr->is_inited = true;
#endif
DPrintf(
"#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx "
"tls_addr=%zx tls_size=%zx\n",
tid, (uptr)epoch0, args->stk_addr, args->stk_size, args->tls_addr,
args->tls_size);
}

void ThreadFinish(ThreadState *thr) {
DPrintf("#%d: ThreadFinish\n", thr->tid);
ThreadCheckIgnore(thr);
if (thr->stk_addr && thr->stk_size)
DontNeedShadowFor(thr->stk_addr, thr->stk_size);
if (thr->tls_addr && thr->tls_size)
DontNeedShadowFor(thr->tls_addr, thr->tls_size);
thr->is_dead = true;
#if !SANITIZER_GO
thr->ignore_interceptors = true;
PlatformCleanUpThreadState(thr);
#endif
ctx->thread_registry.FinishThread(thr->tid);
}

void ThreadContext::OnFinished() {
#if SANITIZER_GO
Free(thr->shadow_stack);
thr->shadow_stack_pos = nullptr;
thr->shadow_stack_end = nullptr;
#endif
if (!thr->ignore_sync) {
SlotLocker locker(thr);
ThreadRegistryLock lock(&ctx->thread_registry);
// Note: detached is protected by the thread registry mutex,
// the thread may be detaching concurrently in another thread.
if (!thr->tctx->detached) {
thr->clock.ReleaseStore(&thr->tctx->sync);
thr->tctx->sync_epoch = ctx->global_epoch;
IncrementEpoch(thr);
}
if (!detached) {
thr->fast_state.IncrementEpoch();
// Can't increment epoch w/o writing to the trace as well.
TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
ReleaseImpl(thr, 0, &sync);
}
epoch1 = thr->fast_state.epoch();

if (common_flags()->detect_deadlocks)
ctx->dd->DestroyLogicalThread(thr->dd_lt);
SlotDetach(thr);
ctx->thread_registry.FinishThread(thr->tid);
thr->clock.ResetCached(&thr->proc()->clock_cache);
#if !SANITIZER_GO
thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
#endif
#if !SANITIZER_GO
PlatformCleanUpThreadState(thr);
#endif
thr->~ThreadState();
}

void ThreadContext::OnFinished() {
Lock lock(&ctx->slot_mtx);
Lock lock1(&trace.mtx);
// Queue all trace parts into the global recycle queue.
auto parts = &trace.parts;
while (trace.local_head) {
CHECK(parts->Queued(trace.local_head));
ctx->trace_part_recycle.PushBack(trace.local_head);
trace.local_head = parts->Next(trace.local_head);
}
ctx->trace_part_recycle_finished += parts->Size();
if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadHi) {
ctx->trace_part_finished_excess += parts->Size();
trace.parts_allocated = 0;
} else if (ctx->trace_part_recycle_finished > Trace::kFinishedThreadLo &&
parts->Size() > 1) {
ctx->trace_part_finished_excess += parts->Size() - 1;
trace.parts_allocated = 1;
}
// From now on replay will use trace->final_pos.
trace.final_pos = (Event *)atomic_load_relaxed(&thr->trace_pos);
atomic_store_relaxed(&thr->trace_pos, 0);
thr->tctx = nullptr;
thr = nullptr;
thr = 0;
}

struct ConsumeThreadContext {
Expand Down Expand Up @@ -300,43 +287,35 @@ Tid ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid) {
return tid;
}

struct JoinArg {
VectorClock *sync;
uptr sync_epoch;
};

void ThreadJoin(ThreadState *thr, uptr pc, Tid tid) {
CHECK_GT(tid, 0);
CHECK_LT(tid, kMaxTid);
DPrintf("#%d: ThreadJoin tid=%d\n", thr->tid, tid);
JoinArg arg = {};
ctx->thread_registry.JoinThread(tid, &arg);
if (!thr->ignore_sync) {
SlotLocker locker(thr);
if (arg.sync_epoch == ctx->global_epoch)
thr->clock.Acquire(arg.sync);
}
Free(arg.sync);
ctx->thread_registry.JoinThread(tid, thr);
}

void ThreadContext::OnJoined(void *ptr) {
auto arg = static_cast<JoinArg *>(ptr);
arg->sync = sync;
arg->sync_epoch = sync_epoch;
sync = nullptr;
sync_epoch = 0;
void ThreadContext::OnJoined(void *arg) {
ThreadState *caller_thr = static_cast<ThreadState *>(arg);
AcquireImpl(caller_thr, 0, &sync);
sync.Reset(&caller_thr->proc()->clock_cache);
}

void ThreadContext::OnDead() { CHECK_EQ(sync, nullptr); }
void ThreadContext::OnDead() { CHECK_EQ(sync.size(), 0); }

void ThreadDetach(ThreadState *thr, uptr pc, Tid tid) {
CHECK_GT(tid, 0);
CHECK_LT(tid, kMaxTid);
ctx->thread_registry.DetachThread(tid, thr);
}

void ThreadContext::OnDetached(void *arg) { Free(sync); }
void ThreadContext::OnDetached(void *arg) {
ThreadState *thr1 = static_cast<ThreadState *>(arg);
sync.Reset(&thr1->proc()->clock_cache);
}

void ThreadNotJoined(ThreadState *thr, uptr pc, Tid tid, uptr uid) {
CHECK_GT(tid, 0);
CHECK_LT(tid, kMaxTid);
ctx->thread_registry.SetThreadUserId(tid, uid);
}

Expand Down
294 changes: 183 additions & 111 deletions compiler-rt/lib/tsan/rtl/tsan_shadow.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,151 +10,223 @@
#define TSAN_SHADOW_H

#include "tsan_defs.h"
#include "tsan_trace.h"

namespace __tsan {

// FastState (from most significant bit):
// ignore : 1
// tid : kTidBits
// unused : -
// history_size : 3
// epoch : kClkBits
class FastState {
public:
FastState() { Reset(); }
FastState(u64 tid, u64 epoch) {
x_ = tid << kTidShift;
x_ |= epoch;
DCHECK_EQ(tid, this->tid());
DCHECK_EQ(epoch, this->epoch());
DCHECK_EQ(GetIgnoreBit(), false);
}

explicit FastState(u64 x) : x_(x) {}

u64 raw() const { return x_; }

void Reset() {
part_.unused0_ = 0;
part_.sid_ = kFreeSid;
part_.epoch_ = static_cast<u16>(kEpochLast);
part_.unused1_ = 0;
part_.ignore_accesses_ = false;
u64 tid() const {
u64 res = (x_ & ~kIgnoreBit) >> kTidShift;
return res;
}

void SetSid(Sid sid) { part_.sid_ = sid; }
u64 TidWithIgnore() const {
u64 res = x_ >> kTidShift;
return res;
}

Sid sid() const { return part_.sid_; }
u64 epoch() const {
u64 res = x_ & ((1ull << kClkBits) - 1);
return res;
}

Epoch epoch() const { return static_cast<Epoch>(part_.epoch_); }
void IncrementEpoch() {
u64 old_epoch = epoch();
x_ += 1;
DCHECK_EQ(old_epoch + 1, epoch());
(void)old_epoch;
}

void SetEpoch(Epoch epoch) { part_.epoch_ = static_cast<u16>(epoch); }
void SetIgnoreBit() { x_ |= kIgnoreBit; }
void ClearIgnoreBit() { x_ &= ~kIgnoreBit; }
bool GetIgnoreBit() const { return (s64)x_ < 0; }

void SetIgnoreBit() { part_.ignore_accesses_ = 1; }
void ClearIgnoreBit() { part_.ignore_accesses_ = 0; }
bool GetIgnoreBit() const { return (s32)raw_ < 0; }
void SetHistorySize(int hs) {
CHECK_GE(hs, 0);
CHECK_LE(hs, 7);
x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift);
}

ALWAYS_INLINE
int GetHistorySize() const {
return (int)((x_ >> kHistoryShift) & kHistoryMask);
}

void ClearHistorySize() { SetHistorySize(0); }

ALWAYS_INLINE
u64 GetTracePos() const {
const int hs = GetHistorySize();
// When hs == 0, the trace consists of 2 parts.
const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1;
return epoch() & mask;
}

private:
friend class Shadow;
struct Parts {
u8 unused0_;
Sid sid_;
u16 epoch_ : kEpochBits;
u16 unused1_ : 1;
u16 ignore_accesses_ : 1;
};
union {
Parts part_;
u32 raw_;
};
static const int kTidShift = 64 - kTidBits - 1;
static const u64 kIgnoreBit = 1ull << 63;
static const u64 kFreedBit = 1ull << 63;
static const u64 kHistoryShift = kClkBits;
static const u64 kHistoryMask = 7;
u64 x_;
};

static_assert(sizeof(FastState) == kShadowSize, "bad FastState size");
// Shadow (from most significant bit):
// freed : 1
// tid : kTidBits
// is_atomic : 1
// is_read : 1
// size_log : 2
// addr0 : 3
// epoch : kClkBits
class Shadow : public FastState {
public:
explicit Shadow(u64 x) : FastState(x) {}

constexpr RawShadow kShadowEmpty = static_cast<RawShadow>(0);
// .rodata shadow marker, see MapRodata and ContainsSameAccessFast.
constexpr RawShadow kShadowRodata = static_cast<RawShadow>(0x40000000);
explicit Shadow(const FastState &s) : FastState(s.x_) { ClearHistorySize(); }

class Shadow {
public:
Shadow(FastState state, u32 addr, u32 size, AccessType typ) {
raw_ = state.raw_;
SetAccess(addr, size, typ);
}

explicit Shadow(RawShadow x = kShadowEmpty) { raw_ = static_cast<u32>(x); }

RawShadow raw() const { return static_cast<RawShadow>(raw_); }
Sid sid() const { return part_.sid_; }
Epoch epoch() const { return static_cast<Epoch>(part_.epoch_); }
u8 access() const { return part_.access_; }

void SetAccess(u32 addr, u32 size, AccessType typ) {
DCHECK_GT(size, 0);
DCHECK_LE(size, 8);
UNUSED Sid sid0 = part_.sid_;
UNUSED u16 epoch0 = part_.epoch_;
raw_ |= (!!(typ & kAccessAtomic) << 31) | (!!(typ & kAccessRead) << 30) |
((((1u << size) - 1) << (addr & 0x7)) & 0xff);
// Note: we don't check kAccessAtomic because it overlaps with
// FastState::ignore_accesses_ and it may be set spuriously.
DCHECK_EQ(part_.is_read_, !!(typ & kAccessRead));
DCHECK_EQ(sid(), sid0);
DCHECK_EQ(epoch(), epoch0);
}

void GetAccess(uptr *addr, uptr *size, AccessType *typ) const {
DCHECK(part_.access_);
if (addr)
*addr = __builtin_ffs(part_.access_) - 1;
if (size)
*size = part_.access_ == kFreeAccess ? kShadowCell
: __builtin_popcount(part_.access_);
if (typ)
*typ = (part_.is_read_ ? kAccessRead : kAccessWrite) |
(part_.is_atomic_ ? kAccessAtomic : 0) |
(part_.access_ == kFreeAccess ? kAccessFree : 0);
void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
DCHECK_EQ((x_ >> kClkBits) & 31, 0);
DCHECK_LE(addr0, 7);
DCHECK_LE(kAccessSizeLog, 3);
x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits;
DCHECK_EQ(kAccessSizeLog, size_log());
DCHECK_EQ(addr0, this->addr0());
}

ALWAYS_INLINE
bool IsBothReadsOrAtomic(AccessType typ) const {
u32 is_read = !!(typ & kAccessRead);
u32 is_atomic = !!(typ & kAccessAtomic);
bool res = raw_ & ((is_atomic << 31) | (is_read << 30));
DCHECK_EQ(res,
(part_.is_read_ && is_read) || (part_.is_atomic_ && is_atomic));
void SetWrite(unsigned kAccessIsWrite) {
DCHECK_EQ(x_ & kReadBit, 0);
if (!kAccessIsWrite)
x_ |= kReadBit;
DCHECK_EQ(kAccessIsWrite, IsWrite());
}

void SetAtomic(bool kIsAtomic) {
DCHECK(!IsAtomic());
if (kIsAtomic)
x_ |= kAtomicBit;
DCHECK_EQ(IsAtomic(), kIsAtomic);
}

bool IsAtomic() const { return x_ & kAtomicBit; }

bool IsZero() const { return x_ == 0; }

static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) {
u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift;
DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore());
return shifted_xor == 0;
}

static ALWAYS_INLINE bool Addr0AndSizeAreEqual(const Shadow s1,
const Shadow s2) {
u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31;
return masked_xor == 0;
}

static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2,
unsigned kS2AccessSize) {
bool res = false;
u64 diff = s1.addr0() - s2.addr0();
if ((s64)diff < 0) { // s1.addr0 < s2.addr0
// if (s1.addr0() + size1) > s2.addr0()) return true;
if (s1.size() > -diff)
res = true;
} else {
// if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
if (kS2AccessSize > diff)
res = true;
}
DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2));
DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1));
return res;
}

ALWAYS_INLINE
bool IsRWWeakerOrEqual(AccessType typ) const {
u32 is_read = !!(typ & kAccessRead);
u32 is_atomic = !!(typ & kAccessAtomic);
bool res = (raw_ & 0xc0000000) >= ((is_atomic << 31) | (is_read << 30));
DCHECK_EQ(res,
(part_.is_atomic_ > is_atomic) ||
(part_.is_atomic_ == is_atomic && part_.is_read_ >= is_read));
u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; }
u64 ALWAYS_INLINE size() const { return 1ull << size_log(); }
bool ALWAYS_INLINE IsWrite() const { return !IsRead(); }
bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; }

// The idea behind the freed bit is as follows.
// When the memory is freed (or otherwise unaccessible) we write to the shadow
// values with tid/epoch related to the free and the freed bit set.
// During memory accesses processing the freed bit is considered
// as msb of tid. So any access races with shadow with freed bit set
// (it is as if write from a thread with which we never synchronized before).
// This allows us to detect accesses to freed memory w/o additional
// overheads in memory access processing and at the same time restore
// tid/epoch of free.
void MarkAsFreed() { x_ |= kFreedBit; }

bool IsFreed() const { return x_ & kFreedBit; }

bool GetFreedAndReset() {
bool res = x_ & kFreedBit;
x_ &= ~kFreedBit;
return res;
}

// The FreedMarker must not pass "the same access check" so that we don't
// return from the race detection algorithm early.
static RawShadow FreedMarker() {
Shadow s;
s.part_.sid_ = kFreeSid;
s.part_.epoch_ = static_cast<u16>(kEpochLast);
s.SetAccess(0, 8, kAccessWrite);
return s.raw();
bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift) |
(u64(kIsAtomic) << kAtomicShift));
DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic));
return v;
}

bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
bool v = ((x_ >> kReadShift) & 3) <= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
DCHECK_EQ(v, (IsAtomic() < kIsAtomic) ||
(IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite));
return v;
}

static RawShadow FreedInfo(Sid sid, Epoch epoch) {
Shadow s;
s.part_.sid_ = sid;
s.part_.epoch_ = static_cast<u16>(epoch);
s.part_.access_ = kFreeAccess;
return s.raw();
bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
bool v = ((x_ >> kReadShift) & 3) >= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
DCHECK_EQ(v, (IsAtomic() > kIsAtomic) ||
(IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite));
return v;
}

private:
struct Parts {
u8 access_;
Sid sid_;
u16 epoch_ : kEpochBits;
u16 is_read_ : 1;
u16 is_atomic_ : 1;
};
union {
Parts part_;
u32 raw_;
};

static constexpr u8 kFreeAccess = 0x81;
static const u64 kReadShift = 5 + kClkBits;
static const u64 kReadBit = 1ull << kReadShift;
static const u64 kAtomicShift = 6 + kClkBits;
static const u64 kAtomicBit = 1ull << kAtomicShift;

u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; }

static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) {
if (s1.addr0() == s2.addr0())
return true;
if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
return true;
if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0())
return true;
return false;
}
};

static_assert(sizeof(Shadow) == kShadowSize, "bad Shadow size");
const RawShadow kShadowRodata = (RawShadow)-1; // .rodata shadow marker

} // namespace __tsan

Expand Down
82 changes: 36 additions & 46 deletions compiler-rt/lib/tsan/rtl/tsan_sync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,43 @@ namespace __tsan {

void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s);

SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(); }
SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(0); }

void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack) {
Reset();
void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid,
bool save_stack) {
this->addr = addr;
next = 0;
this->uid = uid;
this->next = 0;

creation_stack_id = kInvalidStackID;
if (save_stack && !SANITIZER_GO) // Go does not use them
creation_stack_id = CurrentStackId(thr, pc);
if (common_flags()->detect_deadlocks)
DDMutexInit(thr, pc, this);
}

void SyncVar::Reset() {
CHECK(!ctx->resetting);
void SyncVar::Reset(Processor *proc) {
uid = 0;
creation_stack_id = kInvalidStackID;
owner_tid = kInvalidTid;
last_lock.Reset();
last_lock = 0;
recursion = 0;
atomic_store_relaxed(&flags, 0);
Free(clock);
Free(read_clock);

if (proc == 0) {
CHECK_EQ(clock.size(), 0);
CHECK_EQ(read_clock.size(), 0);
} else {
clock.Reset(&proc->clock_cache);
read_clock.Reset(&proc->clock_cache);
}
}

MetaMap::MetaMap()
: block_alloc_("heap block allocator"), sync_alloc_("sync allocator") {}
: block_alloc_(LINKER_INITIALIZED, "heap block allocator"),
sync_alloc_(LINKER_INITIALIZED, "sync allocator") {
atomic_store(&uid_gen_, 0, memory_order_relaxed);
}

void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) {
u32 idx = block_alloc_.Alloc(&thr->proc()->block_cache);
Expand All @@ -56,16 +68,16 @@ void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) {
*meta = idx | kFlagBlock;
}

uptr MetaMap::FreeBlock(Processor *proc, uptr p, bool reset) {
uptr MetaMap::FreeBlock(Processor *proc, uptr p) {
MBlock* b = GetBlock(p);
if (b == 0)
return 0;
uptr sz = RoundUpTo(b->siz, kMetaShadowCell);
FreeRange(proc, p, sz, reset);
FreeRange(proc, p, sz);
return sz;
}

bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz, bool reset) {
bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) {
bool has_something = false;
u32 *meta = MemToMeta(p);
u32 *end = MemToMeta(p + sz);
Expand All @@ -87,8 +99,7 @@ bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz, bool reset) {
DCHECK(idx & kFlagSync);
SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
u32 next = s->next;
if (reset)
s->Reset();
s->Reset(proc);
sync_alloc_.Free(&proc->sync_cache, idx & ~kFlagMask);
idx = next;
} else {
Expand All @@ -105,30 +116,30 @@ bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz, bool reset) {
// which can be huge. The function probes pages one-by-one until it finds a page
// without meta objects, at this point it stops freeing meta objects. Because
// thread stacks grow top-down, we do the same starting from end as well.
void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) {
void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
if (SANITIZER_GO) {
// UnmapOrDie/MmapFixedNoReserve does not work on Windows,
// so we do the optimization only for C/C++.
FreeRange(proc, p, sz, reset);
FreeRange(proc, p, sz);
return;
}
const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize;
const uptr kPageSize = GetPageSizeCached() * kMetaRatio;
if (sz <= 4 * kPageSize) {
// If the range is small, just do the normal free procedure.
FreeRange(proc, p, sz, reset);
FreeRange(proc, p, sz);
return;
}
// First, round both ends of the range to page size.
uptr diff = RoundUp(p, kPageSize) - p;
if (diff != 0) {
FreeRange(proc, p, diff, reset);
FreeRange(proc, p, diff);
p += diff;
sz -= diff;
}
diff = p + sz - RoundDown(p + sz, kPageSize);
if (diff != 0) {
FreeRange(proc, p + sz - diff, diff, reset);
FreeRange(proc, p + sz - diff, diff);
sz -= diff;
}
// Now we must have a non-empty page-aligned range.
Expand All @@ -139,15 +150,15 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) {
const uptr sz0 = sz;
// Probe start of the range.
for (uptr checked = 0; sz > 0; checked += kPageSize) {
bool has_something = FreeRange(proc, p, kPageSize, reset);
bool has_something = FreeRange(proc, p, kPageSize);
p += kPageSize;
sz -= kPageSize;
if (!has_something && checked > (128 << 10))
break;
}
// Probe end of the range.
for (uptr checked = 0; sz > 0; checked += kPageSize) {
bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize, reset);
bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize);
sz -= kPageSize;
// Stacks grow down, so sync object are most likely at the end of the region
// (if it is a stack). The very end of the stack is TLS and tsan increases
Expand All @@ -166,27 +177,6 @@ void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz, bool reset) {
Die();
}

void MetaMap::ResetClocks() {
// This can be called from the background thread
// which does not have proc/cache.
// The cache is too large for stack.
static InternalAllocatorCache cache;
internal_memset(&cache, 0, sizeof(cache));
internal_allocator()->InitCache(&cache);
sync_alloc_.ForEach([&](SyncVar *s) {
if (s->clock) {
InternalFree(s->clock, &cache);
s->clock = nullptr;
}
if (s->read_clock) {
InternalFree(s->read_clock, &cache);
s->read_clock = nullptr;
}
s->last_lock.Reset();
});
internal_allocator()->DestroyCache(&cache);
}

MBlock* MetaMap::GetBlock(uptr p) {
u32 *meta = MemToMeta(p);
u32 idx = *meta;
Expand All @@ -203,7 +193,6 @@ MBlock* MetaMap::GetBlock(uptr p) {

SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
bool save_stack) {
DCHECK(!create || thr->slot_locked);
u32 *meta = MemToMeta(addr);
u32 idx0 = *meta;
u32 myidx = 0;
Expand All @@ -214,7 +203,7 @@ SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
if (LIKELY(s->addr == addr)) {
if (UNLIKELY(myidx != 0)) {
mys->Reset();
mys->Reset(thr->proc());
sync_alloc_.Free(&thr->proc()->sync_cache, myidx);
}
return s;
Expand All @@ -229,9 +218,10 @@ SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
}

if (LIKELY(myidx == 0)) {
const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
myidx = sync_alloc_.Alloc(&thr->proc()->sync_cache);
mys = sync_alloc_.Map(myidx);
mys->Init(thr, pc, addr, save_stack);
mys->Init(thr, pc, addr, uid, save_stack);
}
mys->next = idx0;
if (atomic_compare_exchange_strong((atomic_uint32_t*)meta, &idx0,
Expand Down
48 changes: 26 additions & 22 deletions compiler-rt/lib/tsan/rtl/tsan_sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@
#include "sanitizer_common/sanitizer_atomic.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_deadlock_detector_interface.h"
#include "tsan_clock.h"
#include "tsan_defs.h"
#include "tsan_clock.h"
#include "tsan_dense_alloc.h"
#include "tsan_shadow.h"
#include "tsan_vector_clock.h"

namespace __tsan {

Expand Down Expand Up @@ -55,18 +53,34 @@ struct SyncVar {

uptr addr; // overwritten by DenseSlabAlloc freelist
Mutex mtx;
u64 uid; // Globally unique id.
StackID creation_stack_id;
Tid owner_tid; // Set only by exclusive owners.
FastState last_lock;
u64 last_lock;
int recursion;
atomic_uint32_t flags;
u32 next; // in MetaMap
DDMutex dd;
VectorClock *read_clock; // Used for rw mutexes only.
VectorClock *clock;
SyncClock read_clock; // Used for rw mutexes only.
// The clock is placed last, so that it is situated on a different cache line
// with the mtx. This reduces contention for hot sync objects.
SyncClock clock;

void Init(ThreadState *thr, uptr pc, uptr addr, bool save_stack);
void Reset();
void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, bool save_stack);
void Reset(Processor *proc);

u64 GetId() const {
// 48 lsb is addr, then 14 bits is low part of uid, then 2 zero bits.
return GetLsb((u64)addr | (uid << 48), 60);
}
bool CheckId(u64 uid) const {
CHECK_EQ(uid, GetLsb(uid, 14));
return GetLsb(this->uid, 14) == uid;
}
static uptr SplitId(u64 id, u64 *uid) {
*uid = id >> 48;
return (uptr)GetLsb(id, 48);
}

bool IsFlagSet(u32 f) const {
return atomic_load_relaxed(&flags) & f;
Expand Down Expand Up @@ -96,20 +110,9 @@ class MetaMap {
MetaMap();

void AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz);

// FreeBlock resets all sync objects in the range if reset=true and must not
// run concurrently with ResetClocks which resets all sync objects
// w/o any synchronization (as part of DoReset).
// If we don't have a thread slot (very early/late in thread lifetime or
// Go/Java callbacks) or the slot is not locked, then reset must be set to
// false. In such case sync object clocks will be reset later (when it's
// reused or during the next ResetClocks).
uptr FreeBlock(Processor *proc, uptr p, bool reset);
bool FreeRange(Processor *proc, uptr p, uptr sz, bool reset);
void ResetRange(Processor *proc, uptr p, uptr sz, bool reset);
// Reset vector clocks of all sync objects.
// Must be called when no other threads access sync objects.
void ResetClocks();
uptr FreeBlock(Processor *proc, uptr p);
bool FreeRange(Processor *proc, uptr p, uptr sz);
void ResetRange(Processor *proc, uptr p, uptr sz);
MBlock* GetBlock(uptr p);

SyncVar *GetSyncOrCreate(ThreadState *thr, uptr pc, uptr addr,
Expand Down Expand Up @@ -139,6 +142,7 @@ class MetaMap {
typedef DenseSlabAlloc<SyncVar, 1 << 20, 1 << 10, kFlagMask> SyncAlloc;
BlockAlloc block_alloc_;
SyncAlloc sync_alloc_;
atomic_uint64_t uid_gen_;

SyncVar *GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
bool save_stack);
Expand Down
115 changes: 59 additions & 56 deletions compiler-rt/lib/tsan/rtl/tsan_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,57 @@

namespace __tsan {

const int kTracePartSizeBits = 13;
const int kTracePartSize = 1 << kTracePartSizeBits;
const int kTraceParts = 2 * 1024 * 1024 / kTracePartSize;
const int kTraceSize = kTracePartSize * kTraceParts;

// Must fit into 3 bits.
enum EventType {
EventTypeMop,
EventTypeFuncEnter,
EventTypeFuncExit,
EventTypeLock,
EventTypeUnlock,
EventTypeRLock,
EventTypeRUnlock
};

// Represents a thread event (from most significant bit):
// u64 typ : 3; // EventType.
// u64 addr : 61; // Associated pc.
typedef u64 Event;

const uptr kEventPCBits = 61;

struct TraceHeader {
#if !SANITIZER_GO
BufferedStackTrace stack0; // Start stack for the trace.
#else
VarSizeStackTrace stack0;
#endif
u64 epoch0; // Start epoch for the trace.
MutexSet mset0;

TraceHeader() : stack0(), epoch0() {}
};

struct Trace {
Mutex mtx;
#if !SANITIZER_GO
// Must be last to catch overflow as paging fault.
// Go shadow stack is dynamically allocated.
uptr shadow_stack[kShadowStackSize];
#endif
// Must be the last field, because we unmap the unused part in
// CreateThreadContext.
TraceHeader headers[kTraceParts];

Trace() : mtx(MutexTypeTrace) {}
};

namespace v3 {

enum class EventType : u64 {
kAccessExt,
kAccessRange,
Expand Down Expand Up @@ -48,8 +99,6 @@ static constexpr Event NopEvent = {1, 0, EventType::kAccessExt, 0};
// close enough to each other. Otherwise we fall back to EventAccessExt.
struct EventAccess {
static constexpr uptr kPCBits = 15;
static_assert(kPCBits + kCompressedAddrBits + 5 == 64,
"unused bits in EventAccess");

u64 is_access : 1; // = 1
u64 is_read : 1;
Expand All @@ -70,23 +119,13 @@ static_assert(sizeof(EventFunc) == 8, "bad EventFunc size");

// Extended memory access with full PC.
struct EventAccessExt {
// Note: precisely specifying the unused parts of the bitfield is critical for
// performance. If we don't specify them, compiler will generate code to load
// the old value and shuffle it to extract the unused bits to apply to the new
// value. If we specify the unused part and store 0 in there, all that
// unnecessary code goes away (store of the 0 const is combined with other
// constant parts).
static constexpr uptr kUnusedBits = 11;
static_assert(kCompressedAddrBits + kUnusedBits + 9 == 64,
"unused bits in EventAccessExt");

u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
EventType type : 3; // = EventType::kAccessExt
u64 is_read : 1;
u64 is_atomic : 1;
u64 size_log : 2;
u64 _ : kUnusedBits;
u64 _ : 11;
u64 addr : kCompressedAddrBits;
u64 pc;
};
Expand All @@ -95,8 +134,6 @@ static_assert(sizeof(EventAccessExt) == 16, "bad EventAccessExt size");
// Access to a memory range.
struct EventAccessRange {
static constexpr uptr kSizeLoBits = 13;
static_assert(kCompressedAddrBits + kSizeLoBits + 7 == 64,
"unused bits in EventAccessRange");

u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
Expand All @@ -113,51 +150,36 @@ static_assert(sizeof(EventAccessRange) == 16, "bad EventAccessRange size");
// Mutex lock.
struct EventLock {
static constexpr uptr kStackIDLoBits = 15;
static constexpr uptr kStackIDHiBits =
sizeof(StackID) * kByteBits - kStackIDLoBits;
static constexpr uptr kUnusedBits = 3;
static_assert(kCompressedAddrBits + kStackIDLoBits + 5 == 64,
"unused bits in EventLock");
static_assert(kCompressedAddrBits + kStackIDHiBits + kUnusedBits == 64,
"unused bits in EventLock");

u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
EventType type : 3; // = EventType::kLock or EventType::kRLock
u64 pc : kCompressedAddrBits;
u64 stack_lo : kStackIDLoBits;
u64 stack_hi : sizeof(StackID) * kByteBits - kStackIDLoBits;
u64 _ : kUnusedBits;
u64 _ : 3;
u64 addr : kCompressedAddrBits;
};
static_assert(sizeof(EventLock) == 16, "bad EventLock size");

// Mutex unlock.
struct EventUnlock {
static constexpr uptr kUnusedBits = 15;
static_assert(kCompressedAddrBits + kUnusedBits + 5 == 64,
"unused bits in EventUnlock");

u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
EventType type : 3; // = EventType::kUnlock
u64 _ : kUnusedBits;
u64 _ : 15;
u64 addr : kCompressedAddrBits;
};
static_assert(sizeof(EventUnlock) == 8, "bad EventUnlock size");

// Time change event.
struct EventTime {
static constexpr uptr kUnusedBits = 37;
static_assert(kUnusedBits + sizeof(Sid) * kByteBits + kEpochBits + 5 == 64,
"unused bits in EventTime");

u64 is_access : 1; // = 0
u64 is_func : 1; // = 0
EventType type : 3; // = EventType::kTime
u64 sid : sizeof(Sid) * kByteBits;
u64 epoch : kEpochBits;
u64 _ : kUnusedBits;
u64 _ : 64 - 5 - sizeof(Sid) * kByteBits - kEpochBits;
};
static_assert(sizeof(EventTime) == 8, "bad EventTime size");

Expand All @@ -166,16 +188,10 @@ struct Trace;
struct TraceHeader {
Trace* trace = nullptr; // back-pointer to Trace containing this part
INode trace_parts; // in Trace::parts
INode global; // in Contex::trace_part_recycle
};

struct TracePart : TraceHeader {
// There are a lot of goroutines in Go, so we use smaller parts.
#if SANITIZER_GO
static constexpr uptr kByteSize = 128 << 10;
#else
static constexpr uptr kByteSize = 256 << 10;
#endif
static constexpr uptr kSize =
(kByteSize - sizeof(TraceHeader)) / sizeof(Event);
// TraceAcquire does a fast event pointer overflow check by comparing
Expand All @@ -193,27 +209,14 @@ static_assert(sizeof(TracePart) == TracePart::kByteSize, "bad TracePart size");
struct Trace {
Mutex mtx;
IList<TraceHeader, &TraceHeader::trace_parts, TracePart> parts;
// First node non-queued into ctx->trace_part_recycle.
TracePart* local_head;
// Final position in the last part for finished threads.
Event* final_pos = nullptr;
// Number of trace parts allocated on behalf of this trace specifically.
// Total number of parts in this trace can be larger if we retake some
// parts from other traces.
uptr parts_allocated = 0;
Event* final_pos =
nullptr; // final position in the last part for finished threads

Trace() : mtx(MutexTypeTrace) {}

// We need at least 3 parts per thread, because we want to keep at last
// 2 parts per thread that are not queued into ctx->trace_part_recycle
// (the current one being filled and one full part that ensures that
// we always have at least one part worth of previous memory accesses).
static constexpr uptr kMinParts = 3;

static constexpr uptr kFinishedThreadLo = 16;
static constexpr uptr kFinishedThreadHi = 64;
};

} // namespace v3

} // namespace __tsan

#endif // TSAN_TRACE_H
59 changes: 59 additions & 0 deletions compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//===-- tsan_update_shadow_word.inc -----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer (TSan), a race detector.
//
// Body of the hottest inner loop.
// If we wrap this body into a function, compilers (both gcc and clang)
// produce sligtly less efficient code.
//===----------------------------------------------------------------------===//
do {
const unsigned kAccessSize = 1 << kAccessSizeLog;
u64 *sp = &shadow_mem[idx];
old = LoadShadow(sp);
if (LIKELY(old.IsZero())) {
if (!stored) {
StoreIfNotYetStored(sp, &store_word);
stored = true;
}
break;
}
// is the memory access equal to the previous?
if (LIKELY(Shadow::Addr0AndSizeAreEqual(cur, old))) {
// same thread?
if (LIKELY(Shadow::TidsAreEqual(old, cur))) {
if (LIKELY(old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))) {
StoreIfNotYetStored(sp, &store_word);
stored = true;
}
break;
}
if (HappensBefore(old, thr)) {
if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) {
StoreIfNotYetStored(sp, &store_word);
stored = true;
}
break;
}
if (LIKELY(old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic)))
break;
goto RACE;
}
// Do the memory access intersect?
if (Shadow::TwoRangesIntersect(old, cur, kAccessSize)) {
if (Shadow::TidsAreEqual(old, cur))
break;
if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))
break;
if (LIKELY(HappensBefore(old, thr)))
break;
goto RACE;
}
// The accesses do not intersect.
break;
} while (0);
30 changes: 30 additions & 0 deletions compiler-rt/lib/tsan/tests/unit/tsan_shadow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,36 @@

namespace __tsan {

TEST(Shadow, FastState) {
Shadow s(FastState(11, 22));
EXPECT_EQ(s.tid(), (u64)11);
EXPECT_EQ(s.epoch(), (u64)22);
EXPECT_EQ(s.GetIgnoreBit(), false);
EXPECT_EQ(s.GetFreedAndReset(), false);
EXPECT_EQ(s.GetHistorySize(), 0);
EXPECT_EQ(s.addr0(), (u64)0);
EXPECT_EQ(s.size(), (u64)1);
EXPECT_EQ(s.IsWrite(), true);

s.IncrementEpoch();
EXPECT_EQ(s.epoch(), (u64)23);
s.IncrementEpoch();
EXPECT_EQ(s.epoch(), (u64)24);

s.SetIgnoreBit();
EXPECT_EQ(s.GetIgnoreBit(), true);
s.ClearIgnoreBit();
EXPECT_EQ(s.GetIgnoreBit(), false);

for (int i = 0; i < 8; i++) {
s.SetHistorySize(i);
EXPECT_EQ(s.GetHistorySize(), i);
}
s.SetHistorySize(2);
s.ClearHistorySize();
EXPECT_EQ(s.GetHistorySize(), 0);
}

TEST(Shadow, Mapping) {
static int global;
int stack;
Expand Down
4 changes: 2 additions & 2 deletions compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace __tsan {

template <typename StackTraceTy>
static void TestStackTrace(StackTraceTy *trace) {
ThreadState thr(kMainTid);
ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0);

ObtainCurrentStack(&thr, 0, trace);
EXPECT_EQ(0U, trace->size);
Expand All @@ -43,7 +43,7 @@ static void TestStackTrace(StackTraceTy *trace) {

template<typename StackTraceTy>
static void TestTrim(StackTraceTy *trace) {
ThreadState thr(kMainTid);
ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0);

for (uptr i = 0; i < 2 * kStackTraceMax; ++i)
*thr.shadow_stack_pos++ = 100 + i;
Expand Down
17 changes: 6 additions & 11 deletions compiler-rt/lib/tsan/tests/unit/tsan_sync_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,21 @@ namespace __tsan {

TEST(MetaMap, Basic) {
ThreadState *thr = cur_thread();
SlotLocker locker(thr);
MetaMap *m = &ctx->metamap;
u64 block[1] = {}; // fake malloc block
m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64));
MBlock *mb = m->GetBlock((uptr)&block[0]);
CHECK_NE(mb, (MBlock *)0);
CHECK_EQ(mb->siz, 1 * sizeof(u64));
CHECK_EQ(mb->tid, thr->tid);
uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0], true);
uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0]);
CHECK_EQ(sz, 1 * sizeof(u64));
mb = m->GetBlock((uptr)&block[0]);
CHECK_EQ(mb, (MBlock *)0);
}

TEST(MetaMap, FreeRange) {
ThreadState *thr = cur_thread();
SlotLocker locker(thr);
MetaMap *m = &ctx->metamap;
u64 block[4] = {}; // fake malloc block
m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64));
Expand All @@ -42,7 +40,7 @@ TEST(MetaMap, FreeRange) {
CHECK_EQ(mb1->siz, 1 * sizeof(u64));
MBlock *mb2 = m->GetBlock((uptr)&block[1]);
CHECK_EQ(mb2->siz, 3 * sizeof(u64));
m->FreeRange(thr->proc(), (uptr)&block[0], 4 * sizeof(u64), true);
m->FreeRange(thr->proc(), (uptr)&block[0], 4 * sizeof(u64));
mb1 = m->GetBlock((uptr)&block[0]);
CHECK_EQ(mb1, (MBlock *)0);
mb2 = m->GetBlock((uptr)&block[1]);
Expand All @@ -54,7 +52,6 @@ TEST(MetaMap, Sync) {
// them from detecting that we exit runtime with mutexes held.
ScopedIgnoreInterceptors ignore;
ThreadState *thr = cur_thread();
SlotLocker locker(thr);
MetaMap *m = &ctx->metamap;
u64 block[4] = {}; // fake malloc block
m->AllocBlock(thr, 0, (uptr)&block[0], 4 * sizeof(u64));
Expand All @@ -66,7 +63,7 @@ TEST(MetaMap, Sync) {
SyncVar *s2 = m->GetSyncOrCreate(thr, 0, (uptr)&block[1], false);
CHECK_NE(s2, (SyncVar *)0);
CHECK_EQ(s2->addr, (uptr)&block[1]);
m->FreeBlock(thr->proc(), (uptr)&block[0], true);
m->FreeBlock(thr->proc(), (uptr)&block[0]);
s1 = m->GetSyncIfExists((uptr)&block[0]);
CHECK_EQ(s1, (SyncVar *)0);
s2 = m->GetSyncIfExists((uptr)&block[1]);
Expand All @@ -77,7 +74,6 @@ TEST(MetaMap, Sync) {
TEST(MetaMap, MoveMemory) {
ScopedIgnoreInterceptors ignore;
ThreadState *thr = cur_thread();
SlotLocker locker(thr);
MetaMap *m = &ctx->metamap;
u64 block1[4] = {}; // fake malloc block
u64 block2[4] = {}; // fake malloc block
Expand Down Expand Up @@ -106,19 +102,18 @@ TEST(MetaMap, MoveMemory) {
s2 = m->GetSyncIfExists((uptr)&block2[1]);
CHECK_NE(s2, (SyncVar *)0);
CHECK_EQ(s2->addr, (uptr)&block2[1]);
m->FreeRange(thr->proc(), (uptr)&block2[0], 4 * sizeof(u64), true);
m->FreeRange(thr->proc(), (uptr)&block2[0], 4 * sizeof(u64));
}

TEST(MetaMap, ResetSync) {
ScopedIgnoreInterceptors ignore;
ThreadState *thr = cur_thread();
SlotLocker locker(thr);
MetaMap *m = &ctx->metamap;
u64 block[1] = {}; // fake malloc block
m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64));
SyncVar *s = m->GetSyncOrCreate(thr, 0, (uptr)&block[0], false);
s->Reset();
uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0], true);
s->Reset(thr->proc());
uptr sz = m->FreeBlock(thr->proc(), (uptr)&block[0]);
CHECK_EQ(sz, 1 * sizeof(u64));
}

Expand Down
176 changes: 39 additions & 137 deletions compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

namespace __tsan {

using namespace v3;

// We need to run all trace tests in a new thread,
// so that the thread trace is empty initially.
template <uptr N>
Expand Down Expand Up @@ -76,30 +78,27 @@ TRACE_TEST(Trace, RestoreAccess) {
ThreadArray<1> thr;
TraceFunc(thr, 0x1000);
TraceFunc(thr, 0x1001);
TraceMutexLock(thr, EventType::kLock, 0x4000, 0x5000, 0x6000);
TraceMutexLock(thr, EventType::kLock, 0x4001, 0x5001, 0x6001);
TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4001, 0x5001, 0x6001);
TraceMutexUnlock(thr, 0x5000);
TraceFunc(thr);
CHECK(TryTraceMemoryAccess(thr, 0x2001, 0x3001, 8, kAccessRead));
TraceMutexLock(thr, EventType::kRLock, 0x4002, 0x5002, 0x6002);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5002, 0x6002);
TraceFunc(thr, 0x1002);
CHECK(TryTraceMemoryAccess(thr, 0x2000, 0x3000, 8, kAccessRead));
// This is the access we want to find.
// The previous one is equivalent, but RestoreStack must prefer
// the last of the matchig accesses.
CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
SlotPairLocker locker(thr, thr->fast_state.sid());
ThreadRegistryLock lock1(&ctx->thread_registry);
Lock lock2(&ctx->slot_mtx);
Tid tid = kInvalidTid;
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res = RestoreStack(EventType::kAccessExt, thr->fast_state.sid(),
thr->fast_state.epoch(), 0x3000, 8, kAccessRead, &tid,
&stk, &mset, &tag);
bool res =
RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch,
0x3000, 8, kAccessRead, &stk, &mset, &tag);
CHECK(res);
CHECK_EQ(tid, thr->tid);
CHECK_EQ(stk.size, 3);
CHECK_EQ(stk.trace[0], 0x1000);
CHECK_EQ(stk.trace[1], 0x1002);
Expand Down Expand Up @@ -148,17 +147,14 @@ TRACE_TEST(Trace, MemoryAccessSize) {
kAccessRead);
break;
}
SlotPairLocker locker(thr, thr->fast_state.sid());
ThreadRegistryLock lock1(&ctx->thread_registry);
Lock lock2(&ctx->slot_mtx);
Tid tid = kInvalidTid;
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res =
RestoreStack(EventType::kAccessExt, thr->fast_state.sid(),
thr->fast_state.epoch(), 0x3000 + params.offset,
params.size, kAccessRead, &tid, &stk, &mset, &tag);
bool res = RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid,
thr->epoch, 0x3000 + params.offset, params.size,
kAccessRead, &stk, &mset, &tag);
CHECK_EQ(res, params.res);
if (params.res) {
CHECK_EQ(stk.size, 2);
Expand All @@ -173,19 +169,16 @@ TRACE_TEST(Trace, RestoreMutexLock) {
// Check of restoration of a mutex lock event.
ThreadArray<1> thr;
TraceFunc(thr, 0x1000);
TraceMutexLock(thr, EventType::kLock, 0x4000, 0x5000, 0x6000);
TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001);
TraceMutexLock(thr, EventType::kRLock, 0x4002, 0x5001, 0x6002);
SlotPairLocker locker(thr, thr->fast_state.sid());
ThreadRegistryLock lock1(&ctx->thread_registry);
Lock lock2(&ctx->slot_mtx);
Tid tid = kInvalidTid;
TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4002, 0x5001, 0x6002);
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res = RestoreStack(EventType::kLock, thr->fast_state.sid(),
thr->fast_state.epoch(), 0x5001, 0, 0, &tid, &stk,
&mset, &tag);
bool res = RestoreStack(thr->tid, v3::EventType::kLock, thr->sid, thr->epoch,
0x5001, 0, 0, &stk, &mset, &tag);
CHECK(res);
CHECK_EQ(stk.size, 2);
CHECK_EQ(stk.trace[0], 0x1000);
Expand All @@ -202,131 +195,40 @@ TRACE_TEST(Trace, RestoreMutexLock) {
TRACE_TEST(Trace, MultiPart) {
// Check replay of a trace with multiple parts.
ThreadArray<1> thr;
FuncEntry(thr, 0x1000);
FuncEntry(thr, 0x2000);
MutexPreLock(thr, 0x4000, 0x5000, 0);
MutexPostLock(thr, 0x4000, 0x5000, 0);
MutexPreLock(thr, 0x4000, 0x5000, 0);
MutexPostLock(thr, 0x4000, 0x5000, 0);
const uptr kEvents = 3 * sizeof(TracePart) / sizeof(Event);
TraceFunc(thr, 0x1000);
TraceFunc(thr, 0x2000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4000, 0x5000, 0x6000);
const uptr kEvents = 3 * sizeof(TracePart) / sizeof(v3::Event);
for (uptr i = 0; i < kEvents; i++) {
FuncEntry(thr, 0x3000);
MutexPreLock(thr, 0x4002, 0x5002, 0);
MutexPostLock(thr, 0x4002, 0x5002, 0);
MutexUnlock(thr, 0x4003, 0x5002, 0);
FuncExit(thr);
TraceFunc(thr, 0x3000);
TraceMutexLock(thr, v3::EventType::kLock, 0x4002, 0x5002, 0x6002);
TraceMutexUnlock(thr, 0x5002);
TraceFunc(thr);
}
FuncEntry(thr, 0x4000);
TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001);
TraceFunc(thr, 0x4000);
TraceMutexLock(thr, v3::EventType::kRLock, 0x4001, 0x5001, 0x6001);
CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
SlotPairLocker locker(thr, thr->fast_state.sid());
ThreadRegistryLock lock1(&ctx->thread_registry);
Lock lock2(&ctx->slot_mtx);
Tid tid = kInvalidTid;
Lock lock1(&ctx->slot_mtx);
ThreadRegistryLock lock2(&ctx->thread_registry);
VarSizeStackTrace stk;
MutexSet mset;
uptr tag = kExternalTagNone;
bool res = RestoreStack(EventType::kAccessExt, thr->fast_state.sid(),
thr->fast_state.epoch(), 0x3000, 8, kAccessRead, &tid,
&stk, &mset, &tag);
bool res =
RestoreStack(thr->tid, v3::EventType::kAccessExt, thr->sid, thr->epoch,
0x3000, 8, kAccessRead, &stk, &mset, &tag);
CHECK(res);
CHECK_EQ(tid, thr->tid);
CHECK_EQ(stk.size, 4);
CHECK_EQ(stk.trace[0], 0x1000);
CHECK_EQ(stk.trace[1], 0x2000);
CHECK_EQ(stk.trace[2], 0x4000);
CHECK_EQ(stk.trace[3], 0x2002);
CHECK_EQ(mset.Size(), 2);
CHECK_EQ(mset.Get(0).addr, 0x5000);
CHECK_EQ(mset.Get(0).stack_id, 0x6000);
CHECK_EQ(mset.Get(0).write, true);
CHECK_EQ(mset.Get(0).count, 2);
CHECK_EQ(mset.Get(1).addr, 0x5001);
CHECK_EQ(mset.Get(1).stack_id, 0x6001);
CHECK_EQ(mset.Get(1).write, false);
CHECK_EQ(mset.Get(1).count, 1);
}

void CheckTraceState(uptr count, uptr finished, uptr excess, uptr recycle) {
Lock l(&ctx->slot_mtx);
Printf("CheckTraceState(%zu/%zu, %zu/%zu, %zu/%zu, %zu/%zu)\n",
ctx->trace_part_total_allocated, count,
ctx->trace_part_recycle_finished, finished,
ctx->trace_part_finished_excess, excess,
ctx->trace_part_recycle.Size(), recycle);
CHECK_EQ(ctx->trace_part_total_allocated, count);
CHECK_EQ(ctx->trace_part_recycle_finished, finished);
CHECK_EQ(ctx->trace_part_finished_excess, excess);
CHECK_EQ(ctx->trace_part_recycle.Size(), recycle);
}

TRACE_TEST(TraceAlloc, SingleThread) {
TraceResetForTesting();
auto check_thread = [&](ThreadState *thr, uptr size, uptr count,
uptr finished, uptr excess, uptr recycle) {
CHECK_EQ(thr->tctx->trace.parts.Size(), size);
CheckTraceState(count, finished, excess, recycle);
};
ThreadArray<2> threads;
check_thread(threads[0], 0, 0, 0, 0, 0);
TraceSwitchPartImpl(threads[0]);
check_thread(threads[0], 1, 1, 0, 0, 0);
TraceSwitchPartImpl(threads[0]);
check_thread(threads[0], 2, 2, 0, 0, 0);
TraceSwitchPartImpl(threads[0]);
check_thread(threads[0], 3, 3, 0, 0, 1);
TraceSwitchPartImpl(threads[0]);
check_thread(threads[0], 3, 3, 0, 0, 1);
threads.Finish(0);
CheckTraceState(3, 3, 0, 3);
threads.Finish(1);
CheckTraceState(3, 3, 0, 3);
}

TRACE_TEST(TraceAlloc, FinishedThreadReuse) {
TraceResetForTesting();
constexpr uptr Lo = Trace::kFinishedThreadLo;
constexpr uptr Hi = Trace::kFinishedThreadHi;
constexpr uptr kThreads = 4 * Hi;
ThreadArray<kThreads> threads;
for (uptr i = 0; i < kThreads; i++) {
Printf("thread %zu\n", i);
TraceSwitchPartImpl(threads[i]);
if (i <= Hi)
CheckTraceState(i + 1, i, 0, i);
else if (i <= 2 * Hi)
CheckTraceState(Hi + 1, Hi, i - Hi, Hi);
else
CheckTraceState(Hi + 1, Hi, Hi, Hi);
threads.Finish(i);
if (i < Hi)
CheckTraceState(i + 1, i + 1, 0, i + 1);
else if (i < 2 * Hi)
CheckTraceState(Hi + 1, Hi + 1, i - Hi + 1, Hi + 1);
else
CheckTraceState(Hi + 1, Hi + 1, Hi + 1, Hi + 1);
}
}

TRACE_TEST(TraceAlloc, FinishedThreadReuse2) {
TraceResetForTesting();
// constexpr uptr Lo = Trace::kFinishedThreadLo;
// constexpr uptr Hi = Trace::kFinishedThreadHi;
constexpr uptr Min = Trace::kMinParts;
constexpr uptr kThreads = 10;
constexpr uptr kParts = 2 * Min;
ThreadArray<kThreads> threads;
for (uptr i = 0; i < kThreads; i++) {
Printf("thread %zu\n", i);
for (uptr j = 0; j < kParts; j++) TraceSwitchPartImpl(threads[i]);
if (i == 0)
CheckTraceState(Min, 0, 0, 1);
else
CheckTraceState(2 * Min, 0, Min, Min + 1);
threads.Finish(i);
if (i == 0)
CheckTraceState(Min, Min, 0, Min);
else
CheckTraceState(2 * Min, 2 * Min, Min, 2 * Min);
}
}

} // namespace __tsan
2 changes: 1 addition & 1 deletion compiler-rt/test/tsan/free_race2.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ int main() {
}

// CHECK: WARNING: ThreadSanitizer: heap-use-after-free
// CHECK: Write of size {{.*}} at {{.*}} by main thread:
// CHECK: Write of size 8 at {{.*}} by main thread:
// CHECK: #0 bar
// CHECK: #1 main
// CHECK: Previous write of size 8 at {{.*}} by main thread:
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/test/tsan/memcmp_race.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ int main() {

// CHECK: addr=[[ADDR:0x[0-9,a-f]+]]
// CHECK: WARNING: ThreadSanitizer: data race
// CHECK: Write of size 3 at [[ADDR]] by thread T2:
// CHECK: Write of size 1 at [[ADDR]] by thread T2:
// CHECK: #0 {{(memcpy|memmove)}}
// CHECK: #{{[12]}} Thread2
// CHECK: Previous read of size 1 at [[ADDR]] by thread T1:
Expand Down
10 changes: 4 additions & 6 deletions compiler-rt/test/tsan/memcpy_race.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ void *Thread2(void *x) {

int main() {
barrier_init(&barrier, 2);
print_address("addr1=", 1, &data[3]);
print_address("addr2=", 1, &data[5]);
print_address("addr=", 1, &data[5]);
pthread_t t[2];
pthread_create(&t[0], NULL, Thread1, NULL);
pthread_create(&t[1], NULL, Thread2, NULL);
Expand All @@ -32,12 +31,11 @@ int main() {
return 0;
}

// CHECK: addr1=[[ADDR1:0x[0-9,a-f]+]]
// CHECK: addr2=[[ADDR2:0x[0-9,a-f]+]]
// CHECK: addr=[[ADDR:0x[0-9,a-f]+]]
// CHECK: WARNING: ThreadSanitizer: data race
// CHECK: Write of size 4 at [[ADDR1]] by thread T2:
// CHECK: Write of size 1 at [[ADDR]] by thread T2:
// CHECK: #0 {{(memcpy|memmove)}}
// CHECK: #{{[12]}} Thread2
// CHECK: Previous write of size 1 at [[ADDR2]] by thread T1:
// CHECK: Previous write of size 1 at [[ADDR]] by thread T1:
// CHECK: #0 {{(memcpy|memmove)}}
// CHECK: #{{[12]}} Thread1
6 changes: 3 additions & 3 deletions compiler-rt/test/tsan/mutexset7.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ int main() {
// CHECK: Write of size 4 at {{.*}} by thread T1:
// CHECK: Previous write of size 4 at {{.*}} by thread T2
// CHECK: (mutexes: write [[M1:M[0-9]+]]):
// CHECK: Mutex [[M1]] (0x{{.*}}) created at:
// CHECK: #0 pthread_mutex_init
// CHECK: #1 Thread2
// CHECK: Mutex [[M1]] is already destroyed
// CHECK-NOT: Mutex {{.*}} created at