Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ROCm] Grappler unit tests and _FusedConv2D #36639

Merged
merged 2 commits into from
Mar 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 7 additions & 1 deletion tensorflow/core/grappler/clusters/single_machine_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ class SingleMachineTest : public ::testing::Test {
// Provision a single machine with 3 cpu cores, and a short timeout of 5
// seconds: since there isn't much work to process a test graph that should
// be plenty.
#if TENSORFLOW_USE_ROCM
// ROCm takes longer to start up
int timeout_s = 10;
#else
int timeout_s = 5;
#endif
#ifdef THREAD_SANITIZER
timeout_s *= 5;
#endif
Expand Down Expand Up @@ -348,10 +353,11 @@ static void RunInfiniteTFLoop() {
}

TEST_F(SingleMachineTest, InfiniteLoops) {
#if !(TENSORFLOW_USE_ROCM) // fails with ROCm (investigate)
// The RunInfiniteTFLoop function creates its own cluster.
TF_CHECK_OK(cluster_->Shutdown());

EXPECT_EXIT(RunInfiniteTFLoop(), ::testing::ExitedWithCode(0), ".*");
#endif
}

TEST_F(SingleMachineTest, InitializationMemory) {
Expand Down
28 changes: 23 additions & 5 deletions tensorflow/core/grappler/devices.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ limitations under the License.
#include "tensorflow/core/platform/byte_order.h"
#include "tensorflow/core/platform/cpu_info.h"

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
#include "tensorflow/core/platform/stream_executor.h"
#endif // GOOGLE_CUDA
Expand All @@ -30,12 +30,22 @@ namespace grappler {
int GetNumAvailableGPUs(
const std::pair<int, int>& min_cuda_compute_capability) {
int num_eligible_gpus = 0;
#if GOOGLE_CUDA

#if TENSORFLOW_USE_ROCM
if(min_cuda_compute_capability.first!=0 ||
min_cuda_compute_capability.second!=0) {
LOG(ERROR) << "GetNumAvailableGPUs() should receive zero "
"min_cuda_compute_capability";
return 0;
}
#endif
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
if (ValidateGPUMachineManager().ok()) {
se::Platform* gpu_manager = GPUMachineManager();
if (gpu_manager != nullptr) {
int num_gpus = gpu_manager->VisibleDeviceCount();
for (int i = 0; i < num_gpus; i++) {
#if GOOGLE_CUDA
auto desc_status = gpu_manager->DescriptionForDevice(i);
if (desc_status.ok()) {
auto desc = desc_status.ConsumeValueOrDie();
Expand All @@ -49,25 +59,33 @@ int GetNumAvailableGPUs(
num_eligible_gpus++;
}
}
#else
num_eligible_gpus++;
#endif
}
}
}
#if GOOGLE_CUDA
LOG(INFO)
<< "Number of eligible GPUs (core count >= 8, compute capability >= "
<< min_cuda_compute_capability.first << "."
<< min_cuda_compute_capability.second << "): " << num_eligible_gpus;
#else
LOG(INFO) << "Number of eligible GPUs: " << num_eligible_gpus;
#endif

#else // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
LOG(INFO)
<< "Number of eligible GPUs (core count >= 8, compute capability >= "
<< min_cuda_compute_capability.first << "."
<< min_cuda_compute_capability.second << "): " << num_eligible_gpus
<< " (Note: TensorFlow was not compiled with CUDA support)";
#endif // GOOGLE_CUDA
<< " (Note: TensorFlow was not compiled with CUDA or ROCm support)";
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
return num_eligible_gpus;
}

int64 AvailableGPUMemory(int gpu_id) {
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
// Look up the device, to see its attributes.
se::Platform* gpu_platform = GPUMachineManager();
CHECK_LT(gpu_id, gpu_platform->VisibleDeviceCount());
Expand Down
4 changes: 4 additions & 0 deletions tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ namespace tensorflow {
namespace grappler {
namespace {

#if GOOGLE_CUDA
const std::pair<int, int> kMinGPUArch = {7, 0};
#else
const std::pair<int, int> kMinGPUArch = {0, 0};
#endif

const char kSuffix[] = "AutoMixedPrecision";
const char kCastToFp16[] = "CastToFp16";
Expand Down
14 changes: 9 additions & 5 deletions tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ limitations under the License.
// otherwise the optimizer will not turn clearlist nodes to float16. When
// looking at clearlist nodes, this optimizer checks if the nodes have a float16
// GPU OpKernel, but without CUDA there are no GPU OpKernels at all.
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#include "tensorflow/core/grappler/optimizers/auto_mixed_precision.h"

Expand Down Expand Up @@ -76,16 +76,20 @@ class AutoMixedPrecisionTest : public GrapplerTest {
void SetUp() override {
int num_gpus = GetNumAvailableGPUs();
// If GPUs are available, require that they all satisfy the min arch.
gpu_available_ =
num_gpus > 0 && num_gpus == GetNumAvailableGPUs(kMinGPUArch);

gpu_available_ = (num_gpus > 0);
#if GOOGLE_CUDA
gpu_available_ = gpu_available_ &&
(num_gpus == GetNumAvailableGPUs(kMinGPUArch));
#endif
if (gpu_available_) {
virtual_cluster_.reset(new SingleMachine(/* timeout_s = */ 10, 1, 1));
} else {
DeviceProperties device_properties;
device_properties.set_type("GPU");
#if GOOGLE_CUDA
device_properties.mutable_environment()->insert({"architecture", "7"});
device_properties.mutable_environment()->insert({"cuda", "9010"});
#endif
virtual_cluster_.reset(
new VirtualCluster({{"/GPU:1", device_properties}}));
}
Expand Down Expand Up @@ -1078,4 +1082,4 @@ TEST_F(AutoMixedPrecisionTest, TanhOp) {
} // namespace grappler
} // namespace tensorflow

#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,9 @@ void VerifyDataFormatAttributeMatch(const utils::NodeView* node,
}

TEST_F(GenericLayoutOptimizerTest, OptimizeSimpleConv2DGraph) {
#if !GOOGLE_CUDA
GTEST_SKIP() << "CUDA is not enabled";
#endif // !GOOGLE_CUDA
#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA || TENSORFLOW_USE_ROCM
// A simple graph contains 1 "NHWC" Conv2D node, 2 input and 1 output nodes.
Scope scope = Scope::NewRootScope();

Expand Down Expand Up @@ -245,9 +245,9 @@ TEST_F(GenericLayoutOptimizerTest, PreserveFetch) {
}

TEST_F(GenericLayoutOptimizerTest, EmptyDevice) {
#if !GOOGLE_CUDA
GTEST_SKIP() << "CUDA is not enabled";
#endif // !GOOGLE_CUDA
#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !GOOGLE_CUDA || TENSORFLOW_USE_ROCM
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
auto conv = SimpleConv2D(&s, 4, 2, "VALID", "");
Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
Expand All @@ -267,9 +267,9 @@ TEST_F(GenericLayoutOptimizerTest, EmptyDevice) {
}

TEST_F(GenericLayoutOptimizerTest, GPUDevice) {
#if !GOOGLE_CUDA
GTEST_SKIP() << "CUDA is not enabled";
#endif // !GOOGLE_CUDA
#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
auto conv =
SimpleConv2D(&s, 4, 2, "VALID", "/job:w/replica:0/task:0/device:GPU:0");
Expand All @@ -290,9 +290,9 @@ TEST_F(GenericLayoutOptimizerTest, GPUDevice) {
}

TEST_F(GenericLayoutOptimizerTest, CPUDevice) {
#if !GOOGLE_CUDA
GTEST_SKIP() << "CUDA is not enabled";
#endif // !GOOGLE_CUDA
#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
tensorflow::Scope s = tensorflow::Scope::NewRootScope();
auto conv = SimpleConv2D(&s, 4, 2, "VALID", "/CPU:0");
Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
Expand All @@ -312,9 +312,9 @@ TEST_F(GenericLayoutOptimizerTest, CPUDevice) {
}

TEST_F(GenericLayoutOptimizerTest, Connectivity) {
#if !GOOGLE_CUDA
GTEST_SKIP() << "CUDA is not enabled";
#endif // !GOOGLE_CUDA
#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
Scope scope = Scope::NewRootScope();
auto conv = SimpleConv2D(&scope, 4, 2, "VALID", "/device:GPU:0");
auto i1 = ops::Identity(scope.WithOpName("i1"), conv);
Expand Down Expand Up @@ -349,9 +349,9 @@ TEST_F(GenericLayoutOptimizerTest, Connectivity) {
}

TEST_F(GenericLayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
#if !GOOGLE_CUDA
GTEST_SKIP() << "CUDA is not enabled";
#endif // !GOOGLE_CUDA
#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
Scope s = Scope::NewRootScope();
auto conv = SimpleConv2DBackpropInput(&s, 7, 2, "SAME", /*dilated=*/false);
Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
Expand Down Expand Up @@ -381,9 +381,9 @@ TEST_F(GenericLayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
}

TEST_F(GenericLayoutOptimizerTest, Conv2DDataFormatVecPermuteCollapse) {
#if !GOOGLE_CUDA
GTEST_SKIP() << "CUDA is not enabled";
#endif // !GOOGLE_CUDA
#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
Scope scope = Scope::NewRootScope().WithDevice("/device:GPU:0");
auto conv = SimpleConv2D(&scope, 4, 2, "VALID", "/device:GPU:0");
auto shape = ops::Shape(scope.WithOpName("shape"), conv);
Expand Down Expand Up @@ -434,9 +434,9 @@ TEST_F(GenericLayoutOptimizerTest, Conv2DDataFormatVecPermuteCollapse) {
}

TEST_F(GenericLayoutOptimizerTest, DoNotPruneNonAddedCancellableTransposes) {
#if !GOOGLE_CUDA
GTEST_SKIP() << "CUDA is not enabled";
#endif // !GOOGLE_CUDA
#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
GrapplerItem item;
{
Scope scope = Scope::NewRootScope().WithDevice("/device:GPU:0");
Expand Down