Skip to content
This repository was archived by the owner on Jul 1, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/Backends/NNPI/NNPIDeviceManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,8 @@ Error NNPIDeviceManager::startDeviceTrace(TraceContext *traceContext) {
traceContext, device_, true /* Software traces are always enabled. */,
deviceOptions_->hardwareTraces,
deviceOptions_->softwareTracesMaxBuffer,
deviceOptions_->hardwareTracesMaxBuffer)) {
deviceOptions_->hardwareTracesMaxBuffer,
deviceOptions_->rawTracesDumpPath)) {
return MAKE_ERR("Failed to start NNPI device trace.");
}
return Error::success();
Expand Down
45 changes: 41 additions & 4 deletions lib/Backends/NNPI/NNPIMLTraceWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@
#include <unordered_map>
#include <vector>

/// Dump event entry to stream.
static void dumpTraceEntry(const NNPITraceEntry &entry,
std::ostream &outstream) {
outstream << "HTime:" << entry.hostTime << " ETime:" << entry.engineTime;
for (const auto &param : entry.params) {
outstream << " " << param.first << ":" << param.second;
}
outstream << std::endl;
}

/// Set default Hardware buffer size to 3GB.
#define MAX_HW_TRACE_BUFFER_SIZE_MB (1024 * 3)
/// Set default Software buffer size to 400MB.
Expand Down Expand Up @@ -58,20 +68,21 @@ static eIceCapsSwTraceEvent swEventTypes[] = {
eIceCapsSwTraceEvent::ICE_CAPS_SW_EVENT_USER_DATA};

NNPITraceContext::NNPITraceContext(unsigned devID)
: capsSession_(0), devID_(devID), devIDSet_(false) {}
: capsSession_(0), devID_(devID), devIDSet_(false), dumpRawEventsPath_() {}

NNPITraceContext::~NNPITraceContext() { destroyInternalContext(); }

bool NNPITraceContext::startCapture(NNPIDeviceContext deviceContext,
bool swTraces, bool hwTraces,
uint32_t softwareBufferSizeMB,
uint32_t hardwareBufferSizeMB) {
uint32_t hardwareBufferSizeMB,
const std::string &dumpRawEventsPath) {
if (!createInternalContext(swTraces, hwTraces, softwareBufferSizeMB,
hardwareBufferSizeMB)) {
LOG(WARNING) << "nnpi_trace: Failed to create trace device context.";
return false;
}

dumpRawEventsPath_ = dumpRawEventsPath;
nnpimlStatus mlStatus = nnpiIceCapsStart(capsSession_);
if (mlStatus != NNPIML_SUCCESS) {
LOG(WARNING) << "nnpi_trace: Failed to start trace, err=" << mlStatus;
Expand Down Expand Up @@ -127,6 +138,14 @@ bool NNPITraceContext::readTraceOutput() {
return false;
}

std::ofstream outputFile;
if (!dumpRawEventsPath_.empty()) {
outputFile.open(dumpRawEventsPath_);
if (!outputFile.is_open()) {
LOG(WARNING) << "Failed to open fail for raw events dump";
}
}

bool started = false;
uint64_t glowStart = 0;
uint64_t glowEnd = 0;
Expand All @@ -149,6 +168,10 @@ bool NNPITraceContext::readTraceOutput() {
// Set parameters.
if (entry.event_name) {
traceEntry.params["name"] = std::string(entry.event_name);
if (traceEntry.params["name"] == "MASTER_FIRMWARE") {
// Skip MASTER_FIRMWARE events.
continue;
}
} else {
traceEntry.params["name"] = "NA";
}
Expand Down Expand Up @@ -198,16 +221,30 @@ bool NNPITraceContext::readTraceOutput() {
params << param.name << ":" << param.value << ", ";
}

traceEntry.params["org_state"] = state;
if (state == "created" || state == "queued" || state == "req" ||
state == "add") {
state == "cbnwc" || state == "po" || state == "a" ||
state == "destroyed") {
state = "q";
} else if (traceEntry.params["name"] == "icedrvEventGeneration") {
// Exception for Event Generation.
if (state == "s") {
state = "q";
} else if (state == "add" || state == "start") {
state = "s";
} else if (state == "c") {
state = "c";
}
} else if (state == "executed" || state == "cbs" || state == "start") {
state = "s";
} else if (state == "completed" || state == "cbc") {
state = "c";
}
traceEntry.params["state"] = state;
entries_.push_back(traceEntry);
if (outputFile.is_open()) {
dumpTraceEntry(traceEntry, outputFile);
}
if (traceEntry.params["name"] == "user_data") {
if (!started && traceEntry.params["key"] == "BG") {
glowStart = std::stol(traceEntry.params["user_data"]);
Expand Down
4 changes: 3 additions & 1 deletion lib/Backends/NNPI/NNPIMLTraceWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class NNPITraceContext {
/// Start capturing traces from the HW device.
bool startCapture(NNPIDeviceContext deviceContext, bool swTraces,
bool hwTraces, uint32_t softwareBufferSizeMB,
uint32_t hardwareBufferSizeMB);
uint32_t hardwareBufferSizeMB,
const std::string &dumpRawEventsPath);
/// Start capturing.
bool stopCapture(NNPIDeviceContext deviceContext) const;
/// Load traces (valid only after stopCapture()).
Expand Down Expand Up @@ -64,6 +65,7 @@ class NNPITraceContext {
unsigned devID_{0};
bool devIDSet_{false};
std::vector<NNPITraceEntry> entries_;
std::string dumpRawEventsPath_;
};

#endif // NNPI_NNPITRACING_ML_WRAPPER_H
7 changes: 7 additions & 0 deletions lib/Backends/NNPI/NNPIOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,12 @@ class NNPIDeviceOptions : public NNPIOptions {
"Set the max internal buffer size for device hardware traces."
"Enabled only when hardwareTraces=1 (use 0 for hard coded default).",
"NNPI_HW_TRACES_BUFFER_SIZE", "0");
/// Path to dump raw trace events from NNP-I.
DECLARE_NNPI_OPTION(
rawTracesDumpPath, std::string, "RawTracesDumpPath",
"Set a path (including a file name) to dump raw device events into a "
"file. If empty, raw events are not dumped.",
"NNPI_DEVICE_TRACES_DUMP_PATH", "");
/// Override the max NNPI device memory.
DECLARE_NNPI_OPTION(
deviceMemory, unsigned, "DeviceMemory",
Expand Down Expand Up @@ -451,6 +457,7 @@ class NNPIDeviceOptions : public NNPIOptions {
INIT_NNPI_OPTIONS(hardwareTraces, parameters);
INIT_NNPI_OPTIONS(softwareTracesMaxBuffer, parameters);
INIT_NNPI_OPTIONS(hardwareTracesMaxBuffer, parameters);
INIT_NNPI_OPTIONS(rawTracesDumpPath, parameters);
INIT_NNPI_OPTIONS(deviceMemory, parameters);
INIT_NNPI_OPTIONS(dumpIOtoFiles, parameters);
INIT_NNPI_OPTIONS(avxType, parameters);
Expand Down
63 changes: 48 additions & 15 deletions lib/Backends/NNPI/NNPITracing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ NNPIDeviceTracing::NNPIDeviceTracing(unsigned deviceId) : deviceId_(deviceId) {
bool NNPIDeviceTracing::start(TraceContext *traceContext,
NNPIDeviceContext deviceContext, bool swTraces,
bool hwTraces, uint32_t softwareBufferSizeMB,
uint32_t hardwareBufferSizeMB) {
uint32_t hardwareBufferSizeMB,
const std::string &dumpRawEventsPath) {
if (!traceContext ||
!traceContext->shouldLog(TraceEvent::TraceLevel::OPERATOR)) {
return false;
Expand All @@ -43,7 +44,8 @@ bool NNPIDeviceTracing::start(TraceContext *traceContext,
}
bool isFirstToStart = NNPIDeviceTracing::isFirstToChangeCaptureStart(true);
if (!traceCtx_->startCapture(deviceContext, swTraces, hwTraces,
softwareBufferSizeMB, hardwareBufferSizeMB)) {
softwareBufferSizeMB, hardwareBufferSizeMB,
dumpRawEventsPath)) {
LOG(WARNING) << "Failed to start trace capture for device " << deviceId_
<< " is first = " << (isFirstToStart);
return false;
Expand Down Expand Up @@ -73,19 +75,22 @@ std::string NNPIDeviceTracing::getEntryName(NNPITraceEntry &entry) {
}
}
auto params = entry.params;
if (entry.params.count("context_id") > 0) {
name << " CTX 0x" << std::hex << std::stol(entry.params["context_id"]);
}
if (entry.params.count("ice_id") > 0) {
name << " ICE_" << entry.params["ice_id"];
}
if (entry.params.count("core_id") > 0) {
name << " CORE_" << entry.params["core_id"];
}
if (entry.params.count("network_id") > 0) {
name << " Net " << entry.params["network_id"];
}
if (entry.params.count("network_name") > 0 &&
entry.params["network_name"] != "NA") {
name << " NetName " << entry.params["network_name"];
}
if (entry.params.count("context_id") > 0) {
name << " CTX 0x" << std::hex << std::stol(entry.params["context_id"]);
}
if (entry.params.count("subNetId") > 0) {
name << " Subnet " << entry.params["subNetId"];
}
Expand Down Expand Up @@ -134,7 +139,9 @@ int NNPIDeviceTracing::getAffinityID(NNPITraceEntry &entry, std::string name,
}
// Use the op name.
affinityNameStuct << " " << name.substr(0, name.find(' '));
if (entry.params.count("state") && entry.params["state"] == "q") {
if (entry.params.count("org_state") &&
(entry.params["org_state"] == "q" ||
entry.params["org_state"] == "queued")) {
affinityNameStuct << " Queue";
}

Expand Down Expand Up @@ -165,6 +172,11 @@ bool NNPIDeviceTracing::addTrace(
if (entry.params.count("state") <= 0) {
return false;
}

std::string eventKey = name;
if (entry.params.count("event_key") > 0) {
eventKey += (std::string(" : ") + entry.params["event_key"]);
}
std::string state = entry.params["state"];

// Calculate affinity - use the trace thread id to make sections in the
Expand All @@ -177,30 +189,38 @@ bool NNPIDeviceTracing::addTrace(
}
// Add events.
if (state == "q") {
name += "-Queue";
traceContext->logTraceEvent(name, TraceLevel::OPERATOR,
TraceEvent::InstantType, entry.hostTime,
entry.params, affinId);
} else if (state == "s" && inflight.count(name) <= 0) {
inflight[name] = entry;
} else if (state == "c" && inflight.count(name) > 0) {
} else if (state == "s" && inflight.count(eventKey) <= 0) {
inflight[eventKey] = entry;
} else if (state == "c" && inflight.count(eventKey) > 0) {
// Add only complate events.
if (entry.hostTime >= inflight[name].hostTime) {
if (entry.hostTime >= inflight[eventKey].hostTime) {
traceContext->logTraceEvent(
name, TraceLevel::OPERATOR, TraceEvent::BeginType,
inflight[name].hostTime, inflight[name].params, affinId);
inflight[eventKey].hostTime, inflight[eventKey].params, affinId);
traceContext->logTraceEvent(name, TraceLevel::OPERATOR,
TraceEvent::EndType, entry.hostTime,
entry.params, affinId);
} else {
LOG(WARNING) << "Found incomplete trace event " << name << ": start time "
<< inflight[name].hostTime << " end time " << entry.hostTime;
LOG(WARNING) << "[INCOMPLETE EVENT] Found incomplete trace event "
<< eventKey << ": start time " << inflight[eventKey].hostTime
<< " end time " << entry.hostTime;
}
inflight.erase(name);
inflight.erase(eventKey);
} else if (state == "po") {
traceContext->logTraceEvent(name, TraceLevel::OPERATOR,
TraceEvent::InstantType, entry.hostTime,
entry.params, affinId);
} else if (entry.params.count("engine") > 0 &&
entry.params["engine"] != "HW") {
// Notifies only software events that are incomplete since HW events are
// much more likely to be lost.
LOG(WARNING) << "[INCOMPLETE EVENT] "
<< " event key:" << eventKey << " state:" << state
<< " inflight: " << (inflight.count(eventKey) > 0)
<< " time: " << entry.hostTime;
}

return true;
Expand Down Expand Up @@ -228,6 +248,19 @@ bool NNPIDeviceTracing::stopAndUpdate(TraceContext *traceContext,
for (auto entry : traceCtx_->getEntries()) {
addTrace(entry, inflight, traceContext);
}
if (inflight.size() > 0) {
LOG(WARNING) << "[INCOMPLETE EVENT] " << inflight.size()
<< " events not logged (still in flight/incomplate)";
for (const auto &event : inflight) {
if (event.second.params.at("engine") != "HW") {
// Notifies only software events that are incomplete since HW events are
// much more likely to be lost.
LOG(WARNING) << "[INCOMPLETE EVENT] " << event.first << " "
<< event.second.params.at("name")
<< " state:" << event.second.params.at("state");
}
}
}
started_.clear();
return true;
}
3 changes: 2 additions & 1 deletion lib/Backends/NNPI/NNPITracing.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ class NNPIDeviceTracing {
/// Start recording events.
bool start(TraceContext *traceContext, NNPIDeviceContext deviceContext,
bool swTraces, bool hwTraces, uint32_t softwareBufferSizeMB,
uint32_t hardwareBufferSizeMB);
uint32_t hardwareBufferSizeMB,
const std::string &dumpRawEventsPath);
/// Stop recording, read and update trace context.
bool stopAndUpdate(TraceContext *traceContext,
NNPIDeviceContext deviceContext);
Expand Down
9 changes: 0 additions & 9 deletions lib/Backends/NNPI/tests/NNPIOperatorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,6 @@ struct BlacklistInitializer {
{"LayerNorm_BFloat16/0", TestBlacklist::AnyDeviceHWEngine},
{"LengthsSum/0", TestBlacklist::AnyDeviceAnyEngine},
{"LengthsToRanges/0", TestBlacklist::AnyDeviceAnyEngine},
{"ModuloInt32NoSignFollow/0", TestBlacklist::AnyDeviceAnyEngine},
{"ModuloInt32SignFollow/0", TestBlacklist::AnyDeviceAnyEngine},
{"ModuloInt64NoSignFollow/0", TestBlacklist::AnyDeviceAnyEngine},
{"ModuloInt64SignFollow/0", TestBlacklist::AnyDeviceAnyEngine},
{"mul_int32/0", TestBlacklist::AnyDeviceAnyEngine},
Expand Down Expand Up @@ -191,7 +189,6 @@ struct BlacklistInitializer {
{"ArithMul_int64_t/0", TestBlacklist::AnyDeviceAnyEngine},
{"ArithSub_int32_t/0", TestBlacklist::AnyDeviceHWEngine},
{"ArithSub_int64_t/0", TestBlacklist::AnyDeviceHWEngine},
{"BatchedGather/0", TestBlacklist::AnyDeviceHWEngine},
{"BatchNorm_Float/0", TestBlacklist::AnyDeviceHWEngine},
{"batchedReduceMin_Int32/0", TestBlacklist::AnyDeviceHWEngine},
{"batchedReduceMin_Int64/0", TestBlacklist::AnyDeviceHWEngine},
Expand Down Expand Up @@ -278,8 +275,6 @@ struct BlacklistInitializer {
{"convTransposeConvolutionCompareSimpleK5S1P2I3/0",
TestBlacklist::AnyDeviceAnyEngine},
{"ChannelwiseQuantizedConv3D/0", TestBlacklist::AnyDeviceAnyEngine},
{"ChannelwiseQuantizedConv2D_NonZero_FloatBias/0",
TestBlacklist::AnyDeviceSWEngine},
{"ChannelwiseQuantizedConv2D_NonZero_QuantizedBias/0",
TestBlacklist::AnyDeviceAnyEngine},
{"ChannelwiseQuantizedConv2D_Int8_BiasInt8_TFT/0",
Expand Down Expand Up @@ -321,14 +316,10 @@ struct BlacklistInitializer {
{"And/0", TestBlacklist::AnyDeviceAnyEngine},
{"Ceil_FloatTy/0", TestBlacklist::AnyDeviceAnyEngine},
{"Ceil_Int8QTy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpGTE_FloatTy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpGTE_Int32ITy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpGTE_Int64ITy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpGTE_Int8QTy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpGT_FloatTy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpGT_Int32ITy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpGT_Int64ITy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpGT_Int8QTy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpNEQ_FloatTy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpNEQ_Int32ITy/0", TestBlacklist::AnyDeviceAnyEngine},
{"CmpNEQ_Int64ITy/0", TestBlacklist::AnyDeviceAnyEngine},
Expand Down