tensorflow · tensorflow-copybara · Mar 12, 2020 · Jan 16, 2020 · Mar 11, 2020
diff --git a/tensorflow/core/grappler/clusters/single_machine_test.cc b/tensorflow/core/grappler/clusters/single_machine_test.cc
@@ -40,7 +40,12 @@ class SingleMachineTest : public ::testing::Test {
     // Provision a single machine with 3 cpu cores, and a short timeout of 5
     // seconds: since there isn't much work to process a test graph that should
     // be plenty.
+#if TENSORFLOW_USE_ROCM
+    // ROCm takes longer to start up
+    int timeout_s = 10;
+#else
     int timeout_s = 5;
+#endif
 #ifdef THREAD_SANITIZER
     timeout_s *= 5;
 #endif
@@ -348,10 +353,11 @@ static void RunInfiniteTFLoop() {
 }
 
 TEST_F(SingleMachineTest, InfiniteLoops) {
+#if !(TENSORFLOW_USE_ROCM) // fails with ROCm (investigate)
   // The RunInfiniteTFLoop function creates its own cluster.
   TF_CHECK_OK(cluster_->Shutdown());
-
   EXPECT_EXIT(RunInfiniteTFLoop(), ::testing::ExitedWithCode(0), ".*");
+#endif
 }
 
 TEST_F(SingleMachineTest, InitializationMemory) {

diff --git a/tensorflow/core/grappler/devices.cc b/tensorflow/core/grappler/devices.cc
@@ -19,7 +19,7 @@ limitations under the License.
 #include "tensorflow/core/platform/byte_order.h"
 #include "tensorflow/core/platform/cpu_info.h"
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #endif  // GOOGLE_CUDA
@@ -30,12 +30,22 @@ namespace grappler {
 int GetNumAvailableGPUs(
     const std::pair<int, int>& min_cuda_compute_capability) {
   int num_eligible_gpus = 0;
-#if GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+  if(min_cuda_compute_capability.first!=0 || 
+    min_cuda_compute_capability.second!=0) {
+    LOG(ERROR) << "GetNumAvailableGPUs() should receive zero "
+      "min_cuda_compute_capability";
+    return 0;
+  }
+#endif  
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM  
   if (ValidateGPUMachineManager().ok()) {
     se::Platform* gpu_manager = GPUMachineManager();
     if (gpu_manager != nullptr) {
       int num_gpus = gpu_manager->VisibleDeviceCount();
       for (int i = 0; i < num_gpus; i++) {
+#if GOOGLE_CUDA
         auto desc_status = gpu_manager->DescriptionForDevice(i);
         if (desc_status.ok()) {
           auto desc = desc_status.ConsumeValueOrDie();
@@ -49,25 +59,33 @@ int GetNumAvailableGPUs(
             num_eligible_gpus++;
           }
         }
+#else
+        num_eligible_gpus++;
+#endif        
       }
     }
   }
+#if GOOGLE_CUDA  
   LOG(INFO)
       << "Number of eligible GPUs (core count >= 8, compute capability >= "
       << min_cuda_compute_capability.first << "."
       << min_cuda_compute_capability.second << "): " << num_eligible_gpus;
 #else
+    LOG(INFO) << "Number of eligible GPUs: " << num_eligible_gpus;
+#endif
+
+#else // GOOGLE_CUDA || TENSORFLOW_USE_ROCM  
   LOG(INFO)
       << "Number of eligible GPUs (core count >= 8, compute capability >= "
       << min_cuda_compute_capability.first << "."
       << min_cuda_compute_capability.second << "): " << num_eligible_gpus
-      << " (Note: TensorFlow was not compiled with CUDA support)";
-#endif  // GOOGLE_CUDA
+      << " (Note: TensorFlow was not compiled with CUDA or ROCm support)";
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM  
   return num_eligible_gpus;
 }
 
 int64 AvailableGPUMemory(int gpu_id) {
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
   // Look up the device, to see its attributes.
   se::Platform* gpu_platform = GPUMachineManager();
   CHECK_LT(gpu_id, gpu_platform->VisibleDeviceCount());

diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
@@ -44,7 +44,11 @@ namespace tensorflow {
 namespace grappler {
 namespace {
 
+#if GOOGLE_CUDA
 const std::pair<int, int> kMinGPUArch = {7, 0};
+#else
+const std::pair<int, int> kMinGPUArch = {0, 0};
+#endif
 
 const char kSuffix[] = "AutoMixedPrecision";
 const char kCastToFp16[] = "CastToFp16";

diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc
@@ -17,7 +17,7 @@ limitations under the License.
 // otherwise the optimizer will not turn clearlist nodes to float16. When
 // looking at clearlist nodes, this optimizer checks if the nodes have a float16
 // GPU OpKernel, but without CUDA there are no GPU OpKernels at all.
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #include "tensorflow/core/grappler/optimizers/auto_mixed_precision.h"
 
@@ -76,16 +76,20 @@ class AutoMixedPrecisionTest : public GrapplerTest {
   void SetUp() override {
     int num_gpus = GetNumAvailableGPUs();
     // If GPUs are available, require that they all satisfy the min arch.
-    gpu_available_ =
-        num_gpus > 0 && num_gpus == GetNumAvailableGPUs(kMinGPUArch);
-
+    gpu_available_ = (num_gpus > 0);
+#if GOOGLE_CUDA
+    gpu_available_ = gpu_available_ && 
+      (num_gpus == GetNumAvailableGPUs(kMinGPUArch));
+#endif
     if (gpu_available_) {
       virtual_cluster_.reset(new SingleMachine(/* timeout_s = */ 10, 1, 1));
     } else {
       DeviceProperties device_properties;
       device_properties.set_type("GPU");
+#if GOOGLE_CUDA
       device_properties.mutable_environment()->insert({"architecture", "7"});
       device_properties.mutable_environment()->insert({"cuda", "9010"});
+#endif
       virtual_cluster_.reset(
           new VirtualCluster({{"/GPU:1", device_properties}}));
     }
@@ -1078,4 +1082,4 @@ TEST_F(AutoMixedPrecisionTest, TanhOp) {
 }  // namespace grappler
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_test.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_test.cc
@@ -179,9 +179,9 @@ void VerifyDataFormatAttributeMatch(const utils::NodeView* node,
 }
 
 TEST_F(GenericLayoutOptimizerTest, OptimizeSimpleConv2DGraph) {
-#if !GOOGLE_CUDA
-  GTEST_SKIP() << "CUDA is not enabled";
-#endif  // !GOOGLE_CUDA
+#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
+  GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
+#endif  // !GOOGLE_CUDA || TENSORFLOW_USE_ROCM
   // A simple graph contains 1 "NHWC" Conv2D node, 2 input and 1 output nodes.
   Scope scope = Scope::NewRootScope();
 
@@ -245,9 +245,9 @@ TEST_F(GenericLayoutOptimizerTest, PreserveFetch) {
 }
 
 TEST_F(GenericLayoutOptimizerTest, EmptyDevice) {
-#if !GOOGLE_CUDA
-  GTEST_SKIP() << "CUDA is not enabled";
-#endif  // !GOOGLE_CUDA
+#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
+  GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
+#endif  // !GOOGLE_CUDA || TENSORFLOW_USE_ROCM
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 4, 2, "VALID", "");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
@@ -267,9 +267,9 @@ TEST_F(GenericLayoutOptimizerTest, EmptyDevice) {
 }
 
 TEST_F(GenericLayoutOptimizerTest, GPUDevice) {
-#if !GOOGLE_CUDA
-  GTEST_SKIP() << "CUDA is not enabled";
-#endif  // !GOOGLE_CUDA
+#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
+  GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
+#endif  // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv =
       SimpleConv2D(&s, 4, 2, "VALID", "/job:w/replica:0/task:0/device:GPU:0");
@@ -290,9 +290,9 @@ TEST_F(GenericLayoutOptimizerTest, GPUDevice) {
 }
 
 TEST_F(GenericLayoutOptimizerTest, CPUDevice) {
-#if !GOOGLE_CUDA
-  GTEST_SKIP() << "CUDA is not enabled";
-#endif  // !GOOGLE_CUDA
+#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
+  GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
+#endif  // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   auto conv = SimpleConv2D(&s, 4, 2, "VALID", "/CPU:0");
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
@@ -312,9 +312,9 @@ TEST_F(GenericLayoutOptimizerTest, CPUDevice) {
 }
 
 TEST_F(GenericLayoutOptimizerTest, Connectivity) {
-#if !GOOGLE_CUDA
-  GTEST_SKIP() << "CUDA is not enabled";
-#endif  // !GOOGLE_CUDA
+#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
+  GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
+#endif  // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
   Scope scope = Scope::NewRootScope();
   auto conv = SimpleConv2D(&scope, 4, 2, "VALID", "/device:GPU:0");
   auto i1 = ops::Identity(scope.WithOpName("i1"), conv);
@@ -349,9 +349,9 @@ TEST_F(GenericLayoutOptimizerTest, Connectivity) {
 }
 
 TEST_F(GenericLayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
-#if !GOOGLE_CUDA
-  GTEST_SKIP() << "CUDA is not enabled";
-#endif  // !GOOGLE_CUDA
+#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
+  GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
+#endif  // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
   Scope s = Scope::NewRootScope();
   auto conv = SimpleConv2DBackpropInput(&s, 7, 2, "SAME", /*dilated=*/false);
   Output fetch = ops::Identity(s.WithOpName("Fetch"), {conv});
@@ -381,9 +381,9 @@ TEST_F(GenericLayoutOptimizerTest, Conv2DBackpropInputNonConstInputSizes) {
 }
 
 TEST_F(GenericLayoutOptimizerTest, Conv2DDataFormatVecPermuteCollapse) {
-#if !GOOGLE_CUDA
-  GTEST_SKIP() << "CUDA is not enabled";
-#endif  // !GOOGLE_CUDA
+#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
+  GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
+#endif  // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
   Scope scope = Scope::NewRootScope().WithDevice("/device:GPU:0");
   auto conv = SimpleConv2D(&scope, 4, 2, "VALID", "/device:GPU:0");
   auto shape = ops::Shape(scope.WithOpName("shape"), conv);
@@ -434,9 +434,9 @@ TEST_F(GenericLayoutOptimizerTest, Conv2DDataFormatVecPermuteCollapse) {
 }
 
 TEST_F(GenericLayoutOptimizerTest, DoNotPruneNonAddedCancellableTransposes) {
-#if !GOOGLE_CUDA
-  GTEST_SKIP() << "CUDA is not enabled";
-#endif  // !GOOGLE_CUDA
+#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
+  GTEST_SKIP() << "Neither CUDA nor ROCm is enabled";
+#endif  // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
   GrapplerItem item;
   {
     Scope scope = Scope::NewRootScope().WithDevice("/device:GPU:0");