Skip to content
Permalink
Browse files

Add timeouts and statuses to onnxifi events (#3400)

Summary:
see #2702
Add a timeout mechanism and a way to pass onnxStatuses from onnxifi event signallers to waiters. This enables waiting for Glow to return results for only a finite period of time and also enable Glow to pass statuses back to the caller in case a failure has occurred.

Documentation:
doxygen
Pull Request resolved: #3400

Test Plan:
CI
These changes are backwards compatible so will make the corresponding changes in c2 in a followup and test the functionality there.

Differential Revision: D16714891

Pulled By: jackm321

fbshipit-source-id: 848336c35ba4e0df2baf03eede074325793ef374
  • Loading branch information...
jackm321 authored and facebook-github-bot committed Aug 9, 2019
1 parent a9dd244 commit 7a9886f9c24303787c18eb379aad6c6bc40301d4
Showing with 80 additions and 13 deletions.
  1. +23 −4 lib/Onnxifi/Base.cpp
  2. +11 −2 lib/Onnxifi/Base.h
  3. +2 −2 lib/Onnxifi/HostManagerOnnxifi.cpp
  4. +1 −1 lib/Onnxifi/InlineOnnxifi.cpp
  5. +42 −3 lib/Onnxifi/onnxifiGlow.cpp
  6. +1 −1 thirdparty/foxi
@@ -74,30 +74,49 @@ onnxStatus Backend::checkGraphCompatibility(const void *onnxModel,
for (const auto &node : nodes) {
if (!glowBackend_->isOpSupported(node)) {
LOG(ERROR) << "ONNXIFI: Not supported op: " << node.getDebugDesc();
// TODO: Use a more specific ONNXIFI error code here to denote what about
// this operator is not supported (shape, type, etc).
// TODO: Use a more specific ONNXIFI error code here to denote what
// about this operator is not supported (shape, type, etc).
return ONNXIFI_STATUS_UNSUPPORTED_OPERATOR;
}
}

return ONNXIFI_STATUS_SUCCESS;
}

bool Event::signal() {
bool Event::signal(onnxStatus status) {
{
std::lock_guard<std::mutex> guard(mutex_);
if (fired_) {
return false;
}
status_ = status;
fired_ = true;
}
cond_.notify_all();
return true;
}

void Event::wait() {
onnxStatus Event::wait() {
std::unique_lock<std::mutex> guard(mutex_);
cond_.wait(guard, [this] { return fired_ == true; });
return status_;
}

std::pair<bool, onnxStatus> Event::waitFor(size_t timeoutMs) {
DCHECK_GT(timeoutMs, 0)
<< "0 timeoutMs should instead use Event::wait to wait indefinitely";

auto endTime =
std::chrono::steady_clock::now() + std::chrono::milliseconds(timeoutMs);

std::unique_lock<std::mutex> guard(mutex_);
while (!fired_) {
if (std::cv_status::timeout == cond_.wait_until(guard, endTime)) {
return {/*signalled*/ false, status_};
}
}

return {/*signalled*/ true, status_};
}

onnxStatus Graph::setIOAndRun(uint32_t inputsCount,
@@ -80,10 +80,16 @@ class Event {
public:
Event() : fired_{false} {}
/// Signal the event.
bool signal();
bool signal(onnxStatus status);

/// Wait until the event is signalled.
void wait();
onnxStatus wait();

/// Wait until the event is signalled or until at least \p timeoutMs
/// milliseconds have elapsed. \returns a pair with the first value being a
/// boolean that is true if the event was signalled (no timeout occurred) and
/// the second is the value of the event's status.
std::pair<bool, onnxStatus> waitFor(size_t timeoutMs);

/// Check if event was signalled.
bool isSignalled() { return fired_; }
@@ -92,6 +98,9 @@ class Event {
std::atomic<bool> fired_;
std::mutex mutex_;
std::condition_variable cond_;
/// Used to hold an onnxStatus that will be passed for the signaller of the
/// event to a waiter. Should only be accessed while holding mutex_.
onnxStatus status_ = ONNXIFI_STATUS_SUCCESS;
};

typedef Event *EventPtr;
@@ -124,7 +124,7 @@ onnxStatus HostManagerGraph::run(std::unique_ptr<ExecutionContext> ctx,
// If an Error occurred then log it in errToBool and signal the output
// event.
if (errToBool(std::move(err))) {
outputEvent->signal();
outputEvent->signal(ONNXIFI_STATUS_INTERNAL_ERROR);
return;
}

@@ -148,7 +148,7 @@ onnxStatus HostManagerGraph::run(std::unique_ptr<ExecutionContext> ctx,
}
}

outputEvent->signal();
outputEvent->signal(ONNXIFI_STATUS_SUCCESS);
});

return ONNXIFI_STATUS_SUCCESS;
@@ -93,7 +93,7 @@ onnxStatus InlineGraph::run(std::unique_ptr<ExecutionContext> ctx,
setTraceEvents(traceEvents, traceContext);
}

outputEvent->signal();
outputEvent->signal(ONNXIFI_STATUS_SUCCESS);
return ONNXIFI_STATUS_SUCCESS;
}

@@ -204,7 +204,8 @@ GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxGetBackendInfo)(
case ONNXIFI_BACKEND_EXTENSIONS:
return setBackendInfoString(
infoValue, infoValueSize,
"onnxSetIOAndRunGraphFunction onnxReleaseTraceEventsFunction");
"onnxSetIOAndRunGraphFunction onnxWaitEventForFunction "
"onnxReleaseTraceEventsFunction");
default:
return ONNXIFI_STATUS_UNSUPPORTED_PROPERTY;
}
@@ -296,14 +297,14 @@ GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxSignalEvent)(onnxEvent event) {
return ONNXIFI_STATUS_INVALID_EVENT;
}

if (!glowEvent->signal()) {
if (!glowEvent->signal(ONNXIFI_STATUS_SUCCESS)) {
return ONNXIFI_STATUS_INVALID_STATE;
}

return ONNXIFI_STATUS_SUCCESS;
}

/// Wait until an ONNXIFI event is signalled.
/// Wait until an ONNXIFI \p event is signalled.
EXTERNC ONNXIFI_PUBLIC ONNXIFI_CHECK_RESULT onnxStatus ONNXIFI_ABI
GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxWaitEvent)(onnxEvent event) {
auto &manager = glow::onnxifi::GlowOnnxifiManager::get();
@@ -318,6 +319,41 @@ GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxWaitEvent)(onnxEvent event) {
return ONNXIFI_STATUS_SUCCESS;
}

/// Wait until an ONNXIFI \p event is signalled or until \p timeoutMs
/// milliseconds have elapsed. If \p timeoutMs is 0 then wait fallback to
/// waiting indefinitely for the event to be signalled.
EXTERNC ONNXIFI_PUBLIC ONNXIFI_CHECK_RESULT onnxStatus ONNXIFI_ABI
GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxWaitEventFor)(
onnxEvent event, uint32_t timeoutMs, onnxEventState *eventState,
onnxStatus *eventStatus) {
auto &manager = glow::onnxifi::GlowOnnxifiManager::get();

if (!eventState || !eventStatus) {
return ONNXIFI_STATUS_INVALID_POINTER;
}

auto *glowEvent = static_cast<glow::onnxifi::EventPtr>(event);
if (!manager.isValid(glowEvent)) {
return ONNXIFI_STATUS_INVALID_EVENT;
}

if (timeoutMs == 0) {
auto res = glowEvent->wait();
*eventState = ONNXIFI_EVENT_STATE_SIGNALLED;
*eventStatus = res;
} else {
auto resPair = glowEvent->waitFor(timeoutMs);
if (resPair.first) {
*eventState = ONNXIFI_EVENT_STATE_SIGNALLED;
*eventStatus = resPair.second;
} else {
*eventState = ONNXIFI_EVENT_STATE_NONSIGNALLED;
}
}

return ONNXIFI_STATUS_SUCCESS;
}

/// Query ONNXIFI event state without blocking.
EXTERNC ONNXIFI_PUBLIC ONNXIFI_CHECK_RESULT onnxStatus ONNXIFI_ABI
GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxGetEventState)(
@@ -547,6 +583,9 @@ GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxGetExtensionFunctionAddress)(
{"onnxSetIOAndRunGraphFunction",
reinterpret_cast<onnxExtensionFunctionPointer>(
GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxSetIOAndRunGraph))},
{"onnxWaitEventForFunction",
reinterpret_cast<onnxExtensionFunctionPointer>(
GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxWaitEventFor))},
{"onnxReleaseTraceEventsFunction",
reinterpret_cast<onnxExtensionFunctionPointer>(
GLOW_ONNXIFI_LIBRARY_FUNCTION_WRAPPER(onnxReleaseTraceEvents))}};

0 comments on commit 7a9886f

Please sign in to comment.
You can’t perform that action at this time.