Skip to content

Commit

Permalink
Fix xprofilez integration_tests:xprofilez_handler_gpu_test fail
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 642298315
  • Loading branch information
tensorflower-gardener committed Jun 11, 2024
1 parent 0f05e18 commit 17cedab
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 11 deletions.
12 changes: 9 additions & 3 deletions third_party/xla/xla/backends/profiler/gpu/cupti_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,8 @@ class EventInQueue {
} // namespace

void CuptiTraceCollector::OnTracerCollectedCallbackData(
std::vector<CallbackAnnotationsAndEvents> callback_annotations_and_events) {
std::vector<CallbackAnnotationsAndEvents> callback_annotations_and_events,
bool need_callback_events) {
// Build merged annotation first.
std::priority_queue<EventInQueue> min_heap;
for (auto& annotations_and_events : callback_annotations_and_events) {
Expand Down Expand Up @@ -545,6 +546,9 @@ void CuptiTraceCollector::OnTracerCollectedCallbackData(
}
}

// If we are not collecting CPU events from Callback API, we can return now.
if (!need_callback_events) return;

size_t total_dropped_callback_event_count = 0;
for (auto& annotations_and_events : callback_annotations_and_events) {
for (auto& event : annotations_and_events.event_queue()) {
Expand Down Expand Up @@ -604,8 +608,10 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
}

void OnTracerCollectedCallbackData(
std::vector<CallbackAnnotationsAndEvents> callback_events) override {
std::vector<CallbackAnnotationsAndEvents> callback_events,
bool need_callback_events) override {
callback_events_ = std::move(callback_events);
need_callback_events_ = need_callback_events;
}

void OnTracerCachedActivityBuffers(
Expand All @@ -620,7 +626,7 @@ class CuptiTraceCollectorImpl : public CuptiTraceCollector {
// because the AnnotationMap is populated from the callback API events and
// queried by the activity API events.
CuptiTraceCollector::OnTracerCollectedCallbackData(
std::move(callback_events_));
std::move(callback_events_), need_callback_events_);
CuptiTraceCollector::OnTracerCachedActivityBuffers(
std::move(activity_buffers_));

Expand Down
7 changes: 5 additions & 2 deletions third_party/xla/xla/backends/profiler/gpu/cupti_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,11 @@ class CuptiTraceCollector {
// (for later activity event usage), and b) direct add all event by calling
// AddEvent(). Yet collector could just save those callback events without
// processing now, but merge annotation and AddEvent() later when needed, such
// as during export().
// as during export(). If need_callback_events is false, only annotation map
// will be merged, all events will be dropped.
virtual void OnTracerCollectedCallbackData(
std::vector<CallbackAnnotationsAndEvents> callback_events);
std::vector<CallbackAnnotationsAndEvents> callback_events,
bool need_callback_events);

// CuptiTracer tracer now cache all activity buffers during tracing.
// After tracing stop, the cached activity buffers will be send here.
Expand All @@ -86,6 +88,7 @@ class CuptiTraceCollector {

protected:
CuptiTracerCollectorOptions options_;
bool need_callback_events_ = false;

private:
AnnotationMap annotation_map_;
Expand Down
8 changes: 2 additions & 6 deletions third_party/xla/xla/backends/profiler/gpu/cupti_tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -822,11 +822,6 @@ class CuptiDriverApiHookWithActivityApi : public CuptiDriverApiHook {
absl::Status OnDriverApiExit(int device_id, CUpti_CallbackDomain domain,
CUpti_CallbackId cbid,
const CUpti_CallbackData *cbdata) override {
// If we are not collecting CPU events from Callback API, we can return now.
if (!option_.required_callback_api_events) {
return absl::OkStatus();
}

// Grab timestamp for API exit. API entry timestamp saved in cbdata.
uint64_t end_tsc = CuptiTracer::GetTimestamp();
uint64_t start_tsc = *cbdata->correlationData;
Expand Down Expand Up @@ -968,7 +963,8 @@ void CuptiTracer::Disable() {
cupti_driver_api_hook_->SyncAndFlush().IgnoreError();

collector_->OnTracerCollectedCallbackData(
GatherCallbackAnnotationsAndEvents());
GatherCallbackAnnotationsAndEvents(),
option_.has_value() ? option_->required_callback_api_events : false);
collector_->OnTracerCachedActivityBuffers(std::move(activity_buffers_));
if (cupti_dropped_activity_event_count_ > 0) {
collector_->OnEventsDropped("Activity Event dropped by Cupti Lib:",
Expand Down

0 comments on commit 17cedab

Please sign in to comment.