-
Notifications
You must be signed in to change notification settings - Fork 74k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Try to find an allocator when the engine is not assigned a device. #21508
Changes from all commits
eef787e
7baf484
0483e03
6b5be9a
0b31ebb
4684421
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,9 @@ limitations under the License. | |
#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" | ||
#include "tensorflow/contrib/tensorrt/segment/segment.h" | ||
#include "tensorflow/contrib/tensorrt/test/utils.h" | ||
#include "tensorflow/core/common_runtime/gpu/gpu_id.h" | ||
#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" | ||
#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" | ||
#include "tensorflow/core/framework/function.h" | ||
#include "tensorflow/core/framework/graph_to_functiondef.h" | ||
#include "tensorflow/core/framework/node_def_builder.h" | ||
|
@@ -772,33 +775,55 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator( | |
const ConversionParams& params, const EngineInfo& engine) { | ||
int cuda_device_id = -1; | ||
tensorflow::Allocator* dev_allocator = nullptr; | ||
if (params.cluster) { | ||
std::vector<tensorflow::Device*> devices; | ||
if (!engine.device.empty() && params.cluster->GetDeviceSet()) { | ||
DeviceNameUtils::ParsedName parsed_name; | ||
if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) && | ||
parsed_name.has_id) { | ||
params.cluster->GetDeviceSet()->FindMatchingDevices(parsed_name, | ||
&devices); | ||
if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr || | ||
engine.device.empty()) { | ||
// If device is not set, use the first found GPU device for the conversion. | ||
for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) { | ||
TfGpuId tf_gpu_id(tf_gpu_id_value); | ||
CudaGpuId cuda_gpu_id; | ||
Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); | ||
if (s.ok()) { | ||
VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device " | ||
<< cuda_gpu_id.value(); | ||
cuda_device_id = cuda_gpu_id.value(); | ||
GPUOptions gpu_options; | ||
// If the TF to Cuda gpu id mapping exist, the device and corresponding | ||
// allocator must have been initialized already, so the | ||
// GetGPUAllocator() call won't create a new allocator. | ||
dev_allocator = GPUProcessState::singleton()->GetGPUAllocator( | ||
gpu_options, tf_gpu_id, 1); | ||
break; | ||
} | ||
LOG(ERROR) << "TF GPU with id " << tf_gpu_id_value << " does not exist " | ||
<< s; | ||
} | ||
if (!devices.empty()) { | ||
if (devices.size() > 1) { | ||
string msg = "Found multiple matching devices using name '"; | ||
StrAppend(&msg, engine.device, "': "); | ||
for (auto d : devices) StrAppend(&msg, d->name(), ", "); | ||
StrAppend(&msg, ". Will get the allocator from first one."); | ||
LOG(WARNING) << msg; | ||
} | ||
tensorflow::AllocatorAttributes alloc_attr; | ||
cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id; | ||
dev_allocator = devices[0]->GetAllocator(alloc_attr); | ||
VLOG(1) << "Using allocator " << dev_allocator->Name() | ||
<< " and cuda_device_id " << cuda_device_id; | ||
} else { | ||
LOG(WARNING) << "Cluster is set but device '" << engine.device | ||
<< "' is not found in the cluster"; | ||
return std::make_pair(cuda_device_id, dev_allocator); | ||
} | ||
|
||
// Use the device requested by the engine. | ||
auto device_set = params.cluster->GetDeviceSet(); | ||
std::vector<tensorflow::Device*> devices; | ||
DeviceNameUtils::ParsedName parsed_name; | ||
if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) && | ||
parsed_name.has_id) { | ||
device_set->FindMatchingDevices(parsed_name, &devices); | ||
} | ||
if (!devices.empty()) { | ||
if (devices.size() > 1) { | ||
string msg = "Found multiple matching devices using name '"; | ||
StrAppend(&msg, engine.device, "': "); | ||
for (auto d : devices) StrAppend(&msg, d->name(), ", "); | ||
StrAppend(&msg, ". Will get the allocator from first one."); | ||
LOG(WARNING) << msg; | ||
} | ||
tensorflow::AllocatorAttributes alloc_attr; | ||
cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id; | ||
dev_allocator = devices[0]->GetAllocator(alloc_attr); | ||
VLOG(1) << "Using allocator " << dev_allocator->Name() | ||
<< " and cuda_device_id " << cuda_device_id; | ||
} else { | ||
LOG(WARNING) << "Cluster is set but device '" << engine.device | ||
<< "' is not found in the cluster"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't we need to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay. Makes sense. |
||
} | ||
return std::make_pair(cuda_device_id, dev_allocator); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#include "tensorflow/contrib/tensorrt/convert/convert_graph.h" | ||
|
||
#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" | ||
#include "tensorflow/core/common_runtime/device_mgr.h" | ||
#include "tensorflow/core/common_runtime/device_set.h" | ||
#include "tensorflow/core/grappler/clusters/cluster.h" | ||
#include "tensorflow/core/grappler/grappler_item.h" | ||
#include "tensorflow/core/lib/core/status.h" | ||
#include "tensorflow/core/lib/core/status_test_util.h" | ||
#include "tensorflow/core/platform/test.h" | ||
#include "tensorflow/core/protobuf/config.pb.h" // NOLINT | ||
#include "tensorflow/core/public/session.h" | ||
|
||
#if GOOGLE_CUDA | ||
#if GOOGLE_TENSORRT | ||
|
||
namespace tensorflow { | ||
namespace tensorrt { | ||
namespace convert { | ||
|
||
class FakeCluster : public grappler::Cluster { | ||
public: | ||
FakeCluster() : Cluster(0) {} | ||
|
||
void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; } | ||
|
||
const DeviceSet* GetDeviceSet() const override { return device_set_; } | ||
|
||
string type() const override { return ""; } | ||
Status Provision() override { return Status::OK(); } | ||
Status Initialize(const grappler::GrapplerItem& item) override { | ||
return Status::OK(); | ||
} | ||
virtual Status Run(const GraphDef& graph_def, | ||
const std::vector<std::pair<string, Tensor>>& feed, | ||
const std::vector<string>& fetch, | ||
RunMetadata* metadata) override { | ||
return Status::OK(); | ||
} | ||
|
||
private: | ||
const DeviceSet* device_set_; | ||
}; | ||
|
||
TEST(ConvertGraphTest, GetDeviceAndAllocator) { | ||
ConversionParams params; | ||
EngineInfo engine_info; | ||
{ | ||
// params.cluster is not set, and no gpu device is available. | ||
auto result = GetDeviceAndAllocator(params, engine_info); | ||
EXPECT_EQ(-1, result.first); | ||
EXPECT_EQ(nullptr, result.second); | ||
} | ||
|
||
// Create a session with two (virtual) gpu device. | ||
SessionOptions options; | ||
ConfigProto* config = &options.config; | ||
GPUOptions* gpu_options = config->mutable_gpu_options(); | ||
auto virtual_devices = | ||
gpu_options->mutable_experimental()->add_virtual_devices(); | ||
virtual_devices->add_memory_limit_mb(200); | ||
virtual_devices->add_memory_limit_mb(200); | ||
std::unique_ptr<Session> session(NewSession(options)); | ||
|
||
{ | ||
// params.cluster is not set, should find and return first gpu id and | ||
// corresponding allocator. | ||
auto result = GetDeviceAndAllocator(params, engine_info); | ||
EXPECT_EQ(0, result.first); | ||
EXPECT_NE(nullptr, result.second); | ||
EXPECT_EQ("GPU_0_bfc", result.second->Name()); | ||
} | ||
|
||
FakeCluster cluster; | ||
params.cluster = &cluster; | ||
{ | ||
// params.cluster->GetDeviceSet() returns null, should find and return first | ||
// gpu id and corresponding allocator. | ||
auto result = GetDeviceAndAllocator(params, engine_info); | ||
EXPECT_EQ(0, result.first); | ||
EXPECT_NE(nullptr, result.second); | ||
EXPECT_EQ("GPU_0_bfc", result.second->Name()); | ||
} | ||
|
||
// Build the DeviceSet. | ||
DeviceSet device_set; | ||
const DeviceMgr* device_mgr = nullptr; | ||
TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr)); | ||
for (auto d : device_mgr->ListDevices()) { | ||
device_set.AddDevice(d); | ||
} | ||
cluster.SetDeviceSet(&device_set); | ||
{ | ||
// engine_info.device is not set, should find and return first gpu id and | ||
// corresponding allocator. | ||
auto result = GetDeviceAndAllocator(params, engine_info); | ||
EXPECT_EQ(0, result.first); | ||
EXPECT_NE(nullptr, result.second); | ||
EXPECT_EQ("GPU_0_bfc", result.second->Name()); | ||
} | ||
|
||
engine_info.device = "/GPU:1"; | ||
{ | ||
// Set to use second device. | ||
auto result = GetDeviceAndAllocator(params, engine_info); | ||
EXPECT_EQ(0, result.first); | ||
EXPECT_NE(nullptr, result.second); | ||
EXPECT_EQ("GPU_1_bfc", result.second->Name()); | ||
} | ||
|
||
engine_info.device = "/GPU:3"; | ||
{ | ||
// Set to use nonexistent device. | ||
auto result = GetDeviceAndAllocator(params, engine_info); | ||
EXPECT_EQ(-1, result.first); | ||
EXPECT_EQ(nullptr, result.second); | ||
} | ||
} | ||
|
||
} // namespace convert | ||
} // namespace tensorrt | ||
} // namespace tensorflow | ||
|
||
#endif // GOOGLE_TENSORRT | ||
#endif // GOOGLE_CUDA |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The max is hard coded to 100.
How would this work in a virtual environment (e.g. with kubernetes) where a single GPU can be shared by possibly more than 100 users?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good point. Currently TfGpuId is always starting from 0, so if there are any gpu device initialized before, gpu 0 should always be available. Note that TfGpuId is a virtual identifier of the gpu device owned by the process, not the physical gpu id. But if we hard coded 0 here, changes to BaseGpuDevice initialization flow can break the integration, so I added the loop here to reduce that risk.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it. Looks okay.