diff --git a/libkineto/include/ActivityProfilerInterface.h b/libkineto/include/ActivityProfilerInterface.h index 92cfc14d7..9098c09bf 100644 --- a/libkineto/include/ActivityProfilerInterface.h +++ b/libkineto/include/ActivityProfilerInterface.h @@ -9,6 +9,7 @@ #include #include +#include #include #include "ActivityType.h" @@ -76,6 +77,10 @@ class ActivityProfilerInterface { virtual bool enableForRegion(const std::string& match) { return true; } + + // Maps kernel thread id -> pthread id for CPU ops. + // Client must record any new kernel thread where the activity has occured. + virtual void recordThreadInfo(pid_t tid, pthread_t pthreadId) {} }; } // namespace libkineto diff --git a/libkineto/include/ClientTraceActivity.h b/libkineto/include/ClientTraceActivity.h index 3c4e54094..d98156f35 100644 --- a/libkineto/include/ClientTraceActivity.h +++ b/libkineto/include/ClientTraceActivity.h @@ -73,7 +73,6 @@ struct ClientTraceActivity : TraceActivity { int64_t correlation{0}; int device{-1}; // TODO: Add OS abstraction - pthread_t pthreadId{}; int32_t sysThreadId{0}; std::string opType; diff --git a/libkineto/src/ActivityProfiler.cpp b/libkineto/src/ActivityProfiler.cpp index d4e343946..96e39a43d 100644 --- a/libkineto/src/ActivityProfiler.cpp +++ b/libkineto/src/ActivityProfiler.cpp @@ -183,11 +183,9 @@ void ActivityProfiler::processCpuTrace( CpuGpuSpanPair& span_pair = recordTraceSpan(cpuTrace.span, cpuTrace.gpuOpCount); TraceSpan& cpu_span = span_pair.first; for (auto const& act : cpuTrace.activities) { - VLOG(2) << act.correlationId() << ": OP " << act.opType - << " tid: " << act.pthreadId; + VLOG(2) << act.correlationId() << ": OP " << act.opType; if (logTrace) { logger.handleCpuActivity(act, cpu_span); - recordThreadInfo(act.sysThreadId, act.pthreadId); } // Stash event so we can look it up later when processing GPU trace externalEvents_.insertEvent(&act); diff --git a/libkineto/src/ActivityProfiler.h b/libkineto/src/ActivityProfiler.h index 9d9958ee6..094faef4c 100644 --- a/libkineto/src/ActivityProfiler.h +++ b/libkineto/src/ActivityProfiler.h @@ -102,6 +102,15 @@ class ActivityProfiler { return *config_; } + inline void recordThreadInfo(pid_t tid, pthread_t pthreadId) { + std::lock_guard guard(mutex_); + if (threadInfo_.find((int32_t)pthreadId) == threadInfo_.end()) { + threadInfo_.emplace( + (int32_t)pthreadId, + ThreadInfo((int32_t) tid, getThreadName(tid))); + } + } + private: class ExternalEventMap { public: @@ -256,14 +265,6 @@ class ActivityProfiler { disabledTraceSpans_.end(); } - inline void recordThreadInfo(pid_t tid, pthread_t pthreadId) { - if (threadInfo_.find((int32_t)pthreadId) == threadInfo_.end()) { - threadInfo_.emplace( - (int32_t)pthreadId, - ThreadInfo((int32_t) tid, getThreadName(tid))); - } - } - void resetTraceData(); void addOverheadSample(profilerOverhead& counter, int64_t overhead) { diff --git a/libkineto/src/ActivityProfilerController.h b/libkineto/src/ActivityProfilerController.h index 33540109f..c14e61466 100644 --- a/libkineto/src/ActivityProfilerController.h +++ b/libkineto/src/ActivityProfilerController.h @@ -57,6 +57,10 @@ class ActivityProfilerController { return profiler_->transferCpuTrace(std::move(cpuTrace)); } + void recordThreadInfo(pid_t tid, pthread_t pthreadId) { + profiler_->recordThreadInfo(tid, pthreadId); + } + private: void profilerLoop(); diff --git a/libkineto/src/ActivityProfilerProxy.cpp b/libkineto/src/ActivityProfilerProxy.cpp index 3eddbe41c..304b225fd 100644 --- a/libkineto/src/ActivityProfilerProxy.cpp +++ b/libkineto/src/ActivityProfilerProxy.cpp @@ -84,4 +84,8 @@ bool ActivityProfilerProxy::enableForRegion(const std::string& match) { return controller_->traceInclusionFilter(match); } +void ActivityProfilerProxy::recordThreadInfo(pid_t tid, pthread_t pthreadId) { + controller_->recordThreadInfo(tid, pthreadId); +} + } // namespace libkineto diff --git a/libkineto/src/ActivityProfilerProxy.h b/libkineto/src/ActivityProfilerProxy.h index 1eebfd63c..c6a6d18f6 100644 --- a/libkineto/src/ActivityProfilerProxy.h +++ b/libkineto/src/ActivityProfilerProxy.h @@ -40,6 +40,8 @@ class ActivityProfilerProxy : public ActivityProfilerInterface { bool isActive() override; + void recordThreadInfo(pid_t tid, pthread_t pthreadId) override; + void scheduleTrace(const std::string& configStr) override; void scheduleTrace(const Config& config); diff --git a/libkineto/test/ActivityProfilerTest.cpp b/libkineto/test/ActivityProfilerTest.cpp index b99d4ef0f..5cab255c0 100644 --- a/libkineto/test/ActivityProfilerTest.cpp +++ b/libkineto/test/ActivityProfilerTest.cpp @@ -45,7 +45,6 @@ struct MockCpuActivityBuffer : public CpuTraceBuffer { op.startTime = startTime; op.endTime = endTime; op.device = 0; - op.pthreadId = pthread_self(); op.sysThreadId = 123; op.correlation = correlation; activities.push_back(std::move(op)); @@ -253,6 +252,8 @@ TEST_F(ActivityProfilerTest, SyncTrace) { profiler.startTrace(start_time); profiler.stopTrace(start_time + microseconds(duration_us)); + profiler.recordThreadInfo(123, pthread_self()); + // Log some cpu ops auto cpuOps = std::make_unique( start_time_us, start_time_us + duration_us); @@ -338,6 +339,8 @@ TEST_F(ActivityProfilerTest, CorrelatedTimestampTest) { // When launching kernel, the CPU event should always precede the GPU event. int64_t kernelLaunchTime = 120; + profiler.recordThreadInfo(123, pthread_self()); + // set up CPU event auto cpuOps = std::make_unique( start_time_us, start_time_us + duration_us);