Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect more info on cuduEventRecord for stream wait sync events #808

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 7 additions & 2 deletions libkineto/src/CuptiActivity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,16 @@ inline bool isEventSync(CUpti_ActivitySynchronizationType type) {
}

inline std::string eventSyncInfo(
const CUpti_ActivitySynchronization& act, int32_t srcStream) {
const CUpti_ActivitySynchronization& act,
int32_t srcStream,
int32_t srcCorrId
) {
return fmt::format(R"JSON(
"wait_on_stream": {},
"wait_on_cuda_event_record_corr_id": {},
"wait_on_cuda_event_id": {},)JSON",
srcStream,
srcCorrId,
act.cudaEventId
);
}
Expand Down Expand Up @@ -75,7 +80,7 @@ inline const std::string CudaSyncActivity::metadataJson() const {
"stream": {}, "correlation": {},
"device": {}, "context": {})JSON",
syncTypeString(sync.type),
isEventSync(raw().type) ? eventSyncInfo(raw(), srcStream_) : "",
isEventSync(raw().type) ? eventSyncInfo(raw(), srcStream_, srcCorrId_) : "",
sync.streamId, sync.correlationId,
deviceId(), sync.contextId);
// clang-format on
Expand Down
8 changes: 6 additions & 2 deletions libkineto/src/CuptiActivity.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,11 @@ struct CudaSyncActivity : public CuptiActivity<CUpti_ActivitySynchronization> {
explicit CudaSyncActivity(
const CUpti_ActivitySynchronization* activity,
const ITraceActivity* linked,
int32_t srcStream)
: CuptiActivity(activity, linked), srcStream_(srcStream) {}
int32_t srcStream,
int32_t srcCorrId)
: CuptiActivity(activity, linked),
srcStream_(srcStream),
srcCorrId_(srcCorrId) {}
int64_t correlationId() const override {return raw().correlationId;}
int64_t deviceId() const override;
int64_t resourceId() const override;
Expand All @@ -143,6 +146,7 @@ struct CudaSyncActivity : public CuptiActivity<CUpti_ActivitySynchronization> {

private:
const int32_t srcStream_;
const int32_t srcCorrId_;
};


Expand Down
44 changes: 30 additions & 14 deletions libkineto/src/CuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <atomic>
#include <functional>
#include <iomanip>
#include <optional>
#include <string>
#include <thread>
#include <type_traits>
Expand Down Expand Up @@ -66,11 +67,18 @@ struct std::hash<CtxEventPair> {
}
};

struct WaitEventInfo {
// CUDA stream that the CUDA event was recorded on
uint32_t stream;
// Correlation ID of the cudaEventRecord event
uint32_t correlationId;
};

namespace {

// Map (ctx, eventId) -> stream that recorded the cudaEvent
std::unordered_map<CtxEventPair, uint32_t>& waitEventMap() {
static std::unordered_map<CtxEventPair, uint32_t> waitEventMap_;
// Map (ctx, eventId) -> (stream, corr Id) that recorded the CUDA event
std::unordered_map<CtxEventPair, WaitEventInfo>& waitEventMap() {
static std::unordered_map<CtxEventPair, WaitEventInfo> waitEventMap_;
return waitEventMap_;
}

Expand Down Expand Up @@ -415,12 +423,10 @@ inline static bool isBlockListedRuntimeCbid(CUpti_CallbackId cbid) {
if (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020 ||
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaSetDevice_v3020 ||
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaGetLastError_v3020 ||
// Don't care about cudaEvents
// Support cudaEventRecord and cudaEventSynchronize, revisit if others are needed
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventCreate_v3020 ||
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateWithFlags_v3020 ||
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventRecord_v3020 ||
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020 ||
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventSynchronize_v3020) {
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020) {
return true;
}

Expand Down Expand Up @@ -499,13 +505,14 @@ void CuptiActivityProfiler::handleOverheadActivity(
}


int32_t getStreamForWaitEvent(uint32_t ctx, uint32_t eventId) {
std::optional<WaitEventInfo> getWaitEventInfo(
uint32_t ctx, uint32_t eventId) {
auto key = CtxEventPair{ctx, eventId};
auto it = waitEventMap().find(key);
if (it != waitEventMap().end()) {
return it->second;
}
return -1;
return std::nullopt;
}

void CuptiActivityProfiler::handleCudaEventActivity(
Expand All @@ -516,9 +523,9 @@ void CuptiActivityProfiler::handleCudaEventActivity(
<< " streamId=" << activity->streamId
<< " contextId=" << activity->contextId;

// Update the stream the cudaEvent was last recorded on
// Update the stream, corrID the cudaEvent was last recorded on
auto key = CtxEventPair{activity->contextId, activity->eventId};
waitEventMap()[key] = activity->streamId;
waitEventMap()[key] = WaitEventInfo{activity->streamId, activity->correlationId};
}

void CuptiActivityProfiler::handleCudaSyncActivity(
Expand Down Expand Up @@ -555,10 +562,19 @@ void CuptiActivityProfiler::handleCudaSyncActivity(

const ITraceActivity* linked =
linkedActivity(activity->correlationId, cpuCorrelationMap_);
int32_t src_stream = getStreamForWaitEvent(
activity->contextId, activity->cudaEventId);
int32_t src_stream = -1, src_corrid = -1;

if (isEventSync(activity->type)) {
auto maybe_wait_event_info = getWaitEventInfo(
activity->contextId, activity->cudaEventId);
if (maybe_wait_event_info) {
src_stream = maybe_wait_event_info->stream;
src_corrid = maybe_wait_event_info->correlationId;
}
}

const auto& cuda_sync_activity = traceBuffers_->addActivityWrapper(
CudaSyncActivity(activity, linked, src_stream));
CudaSyncActivity(activity, linked, src_stream, src_corrid));

if (outOfRange(cuda_sync_activity)) {
return;
Expand Down