diff --git a/scalene/scalene_profiler.py b/scalene/scalene_profiler.py index 230b6c994..c697c8886 100644 --- a/scalene/scalene_profiler.py +++ b/scalene/scalene_profiler.py @@ -1393,6 +1393,7 @@ def alloc_sigqueue_processor(x: Optional[List[int]]) -> None: reported_fname, reported_lineno, bytei_str, + thread_id, ) = count_str.split(",") if int(curr_pid) != int(pid): continue @@ -1406,6 +1407,7 @@ def alloc_sigqueue_processor(x: Optional[List[int]]) -> None: Filename(reported_fname), LineNumber(int(reported_lineno)), ByteCodeIndex(int(bytei_str)), + thread_id ) ) @@ -1427,6 +1429,7 @@ def alloc_sigqueue_processor(x: Optional[List[int]]) -> None: fname, lineno, bytei, + thread_id, ) = item is_malloc = action == Scalene.MALLOC_ACTION count /= Scalene.BYTES_PER_MB @@ -1491,6 +1494,7 @@ def alloc_sigqueue_processor(x: Optional[List[int]]) -> None: fname, lineno, bytei, + thread_id ) = item is_malloc = action == Scalene.MALLOC_ACTION diff --git a/src/include/sampleheap.hpp b/src/include/sampleheap.hpp index 3102a04b4..e97d655b5 100644 --- a/src/include/sampleheap.hpp +++ b/src/include/sampleheap.hpp @@ -291,18 +291,33 @@ class SampleHeap : public SuperHeap { if (_pythonCount == 0) { _pythonCount = 1; // prevent 0/0 } + // Get the thread ID, which must match the logic used by Python. + uint64_t thread_id; +#if defined(__APPLE__) || defined(BSD) + // Use the OS X / BSD thread identifier function to get "actual" thread ID. + pthread_threadid_np(pthread_self(), &thread_id); +#elif defined(__linux__) + // On Linux, use gettid(). + thread_id = (uint64_t) gettid(); +#else + // On other systems, cast pthread_self and hope for the best. + thread_id = (uint64_t) pthread_self(); +#endif + snprintf_( buf, sizeof(buf), #if defined(__APPLE__) - "%c,%llu,%llu,%f,%d,%p,%s,%d,%d\n\n", + "%c,%llu,%llu,%f,%d,%p,%s,%d,%d,%lu\n\n", #else - "%c,%lu,%lu,%f,%d,%p,%s,%d,%d\n\n", + "%c,%lu,%lu,%f,%d,%p,%s,%d,%d,%lu\n\n", #endif ((sig == MallocSignal) ? 'M' : ((_freedLastMallocTrigger) ? 'f' : 'F')), mallocTriggered() + freeTriggered(), count, (float)_pythonCount / (_pythonCount + _cCount), getpid(), _freedLastMallocTrigger ? _lastMallocTrigger : ptr, filename.c_str(), - lineno, bytei); + lineno, + bytei, + thread_id); // Ensure we don't report last-malloc-freed multiple times. _freedLastMallocTrigger = false; getSampleFile().writeToFile(buf); diff --git a/src/source/pywhere.cpp b/src/source/pywhere.cpp index 16ab27d59..50fe4a1e7 100644 --- a/src/source/pywhere.cpp +++ b/src/source/pywhere.cpp @@ -251,12 +251,16 @@ int whereInPython(std::string& filename, int& lineno, int& bytei) { PyPtr frame = threadState ? PyThreadState_GetFrame(threadState) : nullptr; + // EDB: Below commented out to allow correct attribution to individual threads. + // As long as the frames contain file and line number information, we are good. +#if 0 if (static_cast(frame) == nullptr) { // Various packages may create native threads; attribute what they do // to what the main thread is doing, as it's likely to have requested it. frame = findMainPythonThread_frame(); // note this may be nullptr } - +#endif + auto traceConfig = TraceConfig::getInstance(); if (!traceConfig) { return 0;