Skip to content
Permalink
Browse files

Assorted memory-related fixes for HostManager, EE, RecSys (#3411)

Summary:
4 commits here:
- When calling `ExecutionEngine::clear()`, reset the Module
- Add device memory to the EE constructor so we don't need to reset
- Use `HostManager::removeNetwork()` from `HostManager::clearHost()`
- Refactor RecSys to make sure EEs are cleared once done and we save only the result tensor to compare against.
Pull Request resolved: #3411

Differential Revision: D16796999

Pulled By: jfix71

fbshipit-source-id: 7d701bed2d610d151cff4702c35a35bd09abaf5c
  • Loading branch information...
jfix71 authored and facebook-github-bot committed Aug 14, 2019
1 parent ae4114f commit 4c6e53e5f3d0231e03c290afd5863200ebe4940e
@@ -47,7 +47,7 @@ class ExecutionEngine final {
/// this resets the ExecutionEngine.
std::string backendName_ = "";

/// Size of device memory, if 0 device default is used.
/// Size of device memory in bytes, if 0 device default is used.
uint64_t deviceMemory_{0};

/// The HostManager for executing the compiled functions.
@@ -61,7 +61,10 @@ class ExecutionEngine final {
void runInternal(ExecutionContext &context, llvm::StringRef name);

public:
ExecutionEngine(llvm::StringRef backend = "Interpreter");
/// Constructor for an ExecutionEngine with \p backend and memory \p
/// deviceMemory in bytes.
ExecutionEngine(llvm::StringRef backend = "Interpreter",
uint64_t deviceMemory = 0);

~ExecutionEngine();

@@ -27,16 +27,17 @@

using namespace glow;

ExecutionEngine::ExecutionEngine(llvm::StringRef backend) {
ExecutionEngine::ExecutionEngine(llvm::StringRef backend, uint64_t deviceMemory)
: deviceMemory_(deviceMemory) {
setBackendName(backend);
}

/// Set the code generator to the given \p backend.
void ExecutionEngine::setBackendName(llvm::StringRef backend) {
clear();
module_.reset(new Module);
rawModule_ = module_.get();
backendName_ = backend;
clear();

if (hostManager_) {
EXIT_ON_ERR(hostManager_->clearHost());
@@ -61,6 +62,7 @@ void ExecutionEngine::clear() {
EXIT_ON_ERR(hostManager_->clearHost());
}
compiledFunctions_.clear();
module_.reset(nullptr);
}

void glow::updateInputPlaceholders(PlaceholderBindings &bindings,
@@ -226,20 +226,18 @@ llvm::Error HostManager::clearHost() {
DCHECK_EQ(activeRequestCount_, 0)
<< "All requests should be finished when shutting down HostManager.";

// Remove all networks from the host and device(s).
while (networks_.size() != 0) {
RETURN_IF_ERR(removeNetwork(networks_.begin()->first));
}

// Now it's safe to stop the DeviceManagers.
std::lock_guard<std::mutex> networkLock(networkLock_);
OneErrOnly errContainer;
for (auto &it : devices_) {
errContainer.set(it.second->stop());
}

for (auto &network : networks_) {
for (auto &node : network.second.dag.nodes) {
for (auto device : node->deviceIDs) {
devices_[device]->evictNetwork(node->name);
}
}
}
networks_.clear();
return errContainer.get();
}

@@ -74,7 +74,10 @@ DECLARE_STATELESS_BACKEND_TEST(BackendStatelessTest, std::tuple<std::string>);

class BackendTest : public BackendStatelessTest {
public:
BackendTest() : mod_(EE_.getModule()) { F_ = mod_.createFunction("main"); }
BackendTest(uint64_t deviceMemory = 0)
: EE_(getBackendName(), deviceMemory), mod_(EE_.getModule()) {
F_ = mod_.createFunction("main");
}

protected:
ExecutionEngine EE_{getBackendName()};

0 comments on commit 4c6e53e

Please sign in to comment.
You can’t perform that action at this time.