Skip to content

Commit

Permalink
Merge pull request tensorflow#32474 from ROCmSoftwarePlatform/r2.0-ro…
Browse files Browse the repository at this point in the history
…cm-upstream-squashed

[ROCM] Patch to enable rocm for r2.0 release branch
  • Loading branch information
goldiegadde committed Oct 8, 2019
2 parents 64c3d38 + c4604a3 commit 1cf0898
Show file tree
Hide file tree
Showing 43 changed files with 206 additions and 62 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ The TensorFlow project strives to abide by generally accepted best practices in

Build Type | Status | Artifacts
--------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
**Linux AMD ROCm GPU** Nightly | [![Build Status](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly/badge/icon)](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly) | [Nightly](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly/lastSuccessfulBuild/)
**Linux AMD ROCm GPU** Stable Release | [![Build Status](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/badge/icon)](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/) | [Release](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/lastSuccessfulBuild/)
**Linux s390x** Nightly | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | [Nightly](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/)
**Linux s390x CPU** Stable Release | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/badge/icon)](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/) | [Release](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/)
**Linux ppc64le CPU** Nightly | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/)
Expand Down
3 changes: 3 additions & 0 deletions tensorflow/c/eager/c_api_experimental_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ void ExecuteWithProfiling(bool async) {
if (!gpu_device_name.empty()) {
EXPECT_TRUE(HasSubstr(profile_proto_str, "/device:GPU:0"));
// device name with "stream:all" is collected by Device Tracer.
#ifndef TENSORFLOW_USE_ROCM
// ROCm platform does not yet support stream level tracing
EXPECT_TRUE(HasSubstr(profile_proto_str, "stream:all"));
#endif
}
// "/host:CPU" is collected by TraceMe
EXPECT_TRUE(HasSubstr(profile_proto_str, "/host:CPU"));
Expand Down
1 change: 1 addition & 0 deletions tensorflow/cc/profiler/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ tf_cuda_cc_test(
name = "profiler_test",
srcs = ["profiler_test.cc"],
tags = [
"no_rocm", # stream level tracing not supported on ROCm
"nogpu", # b/77649654
],
deps = [
Expand Down
3 changes: 2 additions & 1 deletion tensorflow/core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -4605,7 +4605,7 @@ tf_cc_test(
size = "small",
srcs = ["common_runtime/constant_folding_test.cc"],
linkstatic = tf_kernel_tests_linkstatic(),
tags = tf_cuda_tests_tags(),
tags = tf_cuda_tests_tags() + ["no_rocm"],
deps = [
":core",
":core_cpu",
Expand Down Expand Up @@ -4671,6 +4671,7 @@ tf_cuda_cc_test(
size = "small",
srcs = ["common_runtime/process_function_library_runtime_test.cc"],
linkstatic = tf_kernel_tests_linkstatic(),
tags = ["no_rocm"],
deps = [
":core_cpu",
":core_cpu_internal",
Expand Down
10 changes: 9 additions & 1 deletion tensorflow/core/common_runtime/direct_session_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,11 @@ limitations under the License.
#include "tensorflow/core/public/session_options.h"
#include "tensorflow/core/util/device_name_utils.h"

#ifdef GOOGLE_CUDA
#if GOOGLE_CUDA
#include "third_party/gpus/cuda/include/cuda.h"
#include "third_party/gpus/cuda/include/cuda_runtime_api.h"
#elif TENSORFLOW_USE_ROCM
#include "rocm/include/hip/hip_runtime.h"
#endif // GOOGLE_CUDA

namespace tensorflow {
Expand Down Expand Up @@ -2089,6 +2091,12 @@ bool IsCUDATensor(const Tensor& t) {
if (err == cudaErrorInvalidValue) return false;
CHECK_EQ(cudaSuccess, err) << cudaGetErrorString(err);
return (attributes.memoryType == cudaMemoryTypeDevice);
#elif TENSORFLOW_USE_ROCM
hipPointerAttribute_t attributes;
hipError_t err = hipPointerGetAttributes(&attributes, t.tensor_data().data());
if (err == hipErrorInvalidValue) return false;
CHECK_EQ(hipSuccess, err) << hipGetErrorString(err);
return (attributes.memoryType == hipMemoryTypeDevice);
#else
return false;
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ limitations under the License.
#include "tensorflow/core/public/session_options.h"
#include "tensorflow/core/public/version.h"

#ifdef GOOGLE_CUDA
#if GOOGLE_CUDA
#include "third_party/gpus/cuda/include/cuda.h"
#include "third_party/gpus/cuda/include/cuda_runtime_api.h"
#elif TENSORFLOW_USE_ROCM
#include "rocm/include/hip/hip_runtime.h"
#endif // GOOGLE_CUDA

namespace tensorflow {
Expand Down Expand Up @@ -122,7 +124,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
}

Tensor GPUToCPU(const Tensor& device_tensor) {
#ifdef GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
CHECK(gpu_device_);
CHECK(gpu_device_->tensorflow_gpu_device_info() != nullptr);
DeviceContext* device_context =
Expand All @@ -146,7 +148,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
}

Tensor CPUToGPU(const Tensor& cpu_tensor) {
#ifdef GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
CHECK(gpu_device_);
CHECK(gpu_device_->tensorflow_gpu_device_info() != nullptr);
DeviceContext* device_context =
Expand Down Expand Up @@ -461,6 +463,12 @@ bool IsCUDATensor(const Tensor& t) {
if (err == cudaErrorInvalidValue) return false;
CHECK_EQ(cudaSuccess, err) << cudaGetErrorString(err);
return (attributes.memoryType == cudaMemoryTypeDevice);
#elif TENSORFLOW_USE_ROCM
hipPointerAttribute_t attributes;
hipError_t err = hipPointerGetAttributes(&attributes, t.tensor_data().data());
if (err == hipErrorInvalidValue) return false;
CHECK_EQ(hipSuccess, err) << hipGetErrorString(err);
return (attributes.memoryType == hipMemoryTypeDevice);
#else
CHECK(false)
<< "IsCUDATensor should not be called when CUDA is not available";
Expand Down
20 changes: 19 additions & 1 deletion tensorflow/core/grappler/clusters/utils_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ TEST(UtilsTest, GetLocalGPUInfo) {
properties = GetLocalGPUInfo(PlatformGpuId(0));
EXPECT_EQ("GPU", properties.type());
EXPECT_EQ("NVIDIA", properties.vendor());
#elif TENSORFLOW_USE_ROCM
LOG(INFO) << "ROCm is enabled.";
DeviceProperties properties;

// Invalid platform GPU ID.
properties = GetLocalGPUInfo(PlatformGpuId(100));
EXPECT_EQ("UNKNOWN", properties.type());

// Succeed when a valid platform GPU id was inserted.
properties = GetLocalGPUInfo(PlatformGpuId(0));
EXPECT_EQ("GPU", properties.type());
EXPECT_EQ("Advanced Micro Devices, Inc", properties.vendor());
#else
LOG(INFO) << "CUDA is not enabled.";
DeviceProperties properties;
Expand Down Expand Up @@ -73,6 +85,8 @@ TEST(UtilsTest, GetDeviceInfo) {
EXPECT_EQ("GPU", properties.type());
#if GOOGLE_CUDA
EXPECT_EQ("NVIDIA", properties.vendor());
#elif TENSORFLOW_USE_ROCM
EXPECT_EQ("Advanced Micro Devices, Inc", properties.vendor());
#endif

// TF to platform GPU id mapping entry doesn't exist.
Expand All @@ -81,7 +95,7 @@ TEST(UtilsTest, GetDeviceInfo) {
properties = GetDeviceInfo(device);
EXPECT_EQ("UNKNOWN", properties.type());

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
// Invalid platform GPU id.
TF_ASSERT_OK(
GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(0), PlatformGpuId(100)));
Expand All @@ -94,7 +108,11 @@ TEST(UtilsTest, GetDeviceInfo) {
device.id = 1;
properties = GetDeviceInfo(device);
EXPECT_EQ("GPU", properties.type());
#if GOOGLE_CUDA
EXPECT_EQ("NVIDIA", properties.vendor());
#elif TENSORFLOW_USE_ROCM
EXPECT_EQ("Advanced Micro Devices, Inc", properties.vendor());
#endif
#endif
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ TEST_F(PinToHostOptimizerTest, Identity) {
// If CUDA, then there is a GPU kernel registration that is pinned to Host
// memory. Consequently, `b` will be mapped to Host correct if there is
// a GPU kernel registered.
#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
EXPECT_EQ(node.device(), "/device:CPU:0");
#else
EXPECT_TRUE(node.device().empty());
Expand Down
5 changes: 5 additions & 0 deletions tensorflow/core/kernels/conv_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,10 @@ class FusedConv2DWithBatchNormOpTest : public FusedConv2DOpTest<T> {};
TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest);
TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest);

// ROCm does not yet support the _FusedConv2D op,
// Therefore disable tests that check _FusedConv2D, when building with ROCm

#ifndef TENSORFLOW_USE_ROCM
// -------------------------------------------------------------------------- //
// Conv2D + BiasAdd + {Activation} //
// -------------------------------------------------------------------------- //
Expand Down Expand Up @@ -1165,4 +1169,5 @@ using FusedBatchNormDataTypes = ::testing::Types<float>;
INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBatchNormOpTest,
FusedBatchNormDataTypes);

#endif // TENSORFLOW_USE_ROCM
} // namespace tensorflow
4 changes: 2 additions & 2 deletions tensorflow/core/kernels/in_topk_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ REGISTER_KERNEL_BUILDER(Name("InTopKV2")
.TypeConstraint<int64>("T"),
InTopK<CPUDevice, float, int64>);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

// Forward declarations of the functor specializations for GPU.
namespace functor {
Expand All @@ -142,6 +142,6 @@ REGISTER_KERNEL_BUILDER(
Name("InTopKV2").Device(DEVICE_GPU).TypeConstraint<int64>("T"),
InTopK<GPUDevice, float, int64>);

#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

} // namespace tensorflow
4 changes: 2 additions & 2 deletions tensorflow/core/kernels/in_topk_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_KERNELS_IN_TOPK_OP_H_
#define TENSORFLOW_CORE_KERNELS_IN_TOPK_OP_H_

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
#define EIGEN_USE_GPU
#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/bounds_check.h"
Expand Down
8 changes: 4 additions & 4 deletions tensorflow/core/kernels/in_topk_op_gpu.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA)
#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || TENSORFLOW_USE_ROCM

#define EIGEN_USE_GPU

Expand Down Expand Up @@ -41,7 +41,7 @@ __global__ void ComputePredictionMaskKernel(
const TargetT* targets, // dims: [ num_targets ]
int64* mask, // dims: [ num_targets x num_classes ]
int num_targets, int num_classes) {
CUDA_1D_KERNEL_LOOP(i, num_targets * num_classes) {
GPU_1D_KERNEL_LOOP(i, num_targets * num_classes) {
const int batch_index = i / num_classes;
TargetT target_idx = ldg(targets + batch_index);

Expand Down Expand Up @@ -118,7 +118,7 @@ struct InTopKFunctor<GPUDevice, T, TargetT> {
const auto& d = context->eigen_device<GPUDevice>();

// Compute a mask for all predictions.
CudaLaunchConfig config = GetGpuLaunchConfig(num_targets * num_classes, d);
GpuLaunchConfig config = GetGpuLaunchConfig(num_targets * num_classes, d);
OP_REQUIRES_OK(
context, GpuLaunchKernel(ComputePredictionMaskKernel<T, TargetT>,
config.block_count, config.thread_per_block, 0,
Expand Down Expand Up @@ -173,4 +173,4 @@ DEFINE_GPU_KERNELS(float, int64);

} // end namespace tensorflow

#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
2 changes: 1 addition & 1 deletion tensorflow/core/kernels/reduction_ops_all.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ REGISTER_KERNEL_BUILDER(
.HostMemory("reduction_indices"),
ReductionOp<CPUDevice, bool, int64, Eigen::internal::AndReducer>);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
REGISTER_KERNEL_BUILDER(
Name("All")
.TypeConstraint<int32>("Tidx")
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/core/kernels/reduction_ops_any.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ REGISTER_KERNEL_BUILDER(
.HostMemory("reduction_indices"),
ReductionOp<CPUDevice, bool, int64, Eigen::internal::OrReducer>);

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
REGISTER_KERNEL_BUILDER(
Name("Any")
.TypeConstraint<int32>("Tidx")
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/core/kernels/reduction_ops_common_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_GPU_H_
#define TENSORFLOW_CORE_KERNELS_REDUCTION_OPS_COMMON_GPU_H_

#if !GOOGLE_CUDA
#error This file must only be included when building with Cuda support
#if !GOOGLE_CUDA && !TENSORFLOW_USE_ROCM
#error This file must only be included when building with GPU support
#endif

#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
Expand Down
4 changes: 3 additions & 1 deletion tensorflow/core/kernels/reduction_ops_euclidean.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace tensorflow {
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER(Name("EuclideanNorm") \
Expand All @@ -51,8 +51,10 @@ TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
ReductionOp<GPUDevice, type, int64, \
functor::EuclideanNormReducer<type>>);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
#if GOOGLE_CUDA
TF_CALL_complex64(REGISTER_GPU_KERNELS);
TF_CALL_complex128(REGISTER_GPU_KERNELS);
#endif
#undef REGISTER_GPU_KERNELS

#endif
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/core/kernels/reduction_ops_gpu_bool.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#define EIGEN_USE_GPU

Expand Down Expand Up @@ -59,4 +59,4 @@ DEFINE_FOR_TYPE_AND_R(bool, Eigen::internal::OrReducer);
} // end namespace functor
} // end namespace tensorflow

#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
4 changes: 2 additions & 2 deletions tensorflow/core/kernels/reduction_ops_gpu_double.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#define EIGEN_USE_GPU

Expand Down Expand Up @@ -67,4 +67,4 @@ DEFINE_FOR_ALL_REDUCERS(double);
} // end namespace functor
} // end namespace tensorflow

#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
4 changes: 2 additions & 2 deletions tensorflow/core/kernels/reduction_ops_gpu_float.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#define EIGEN_USE_GPU

Expand Down Expand Up @@ -67,4 +67,4 @@ DEFINE_FOR_ALL_REDUCERS(float);
} // end namespace functor
} // end namespace tensorflow

#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
4 changes: 2 additions & 2 deletions tensorflow/core/kernels/reduction_ops_gpu_int.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#define EIGEN_USE_GPU

Expand Down Expand Up @@ -68,4 +68,4 @@ DEFINE_FOR_ALL_REDUCERS(int64);
} // end namespace functor
} // end namespace tensorflow

#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
4 changes: 2 additions & 2 deletions tensorflow/core/kernels/reduction_ops_half_mean_sum.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#define EIGEN_USE_GPU

Expand Down Expand Up @@ -64,4 +64,4 @@ DEFINE_FOR_ALL_REDUCERS(Eigen::half);
} // end namespace functor
} // end namespace tensorflow

#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
4 changes: 2 additions & 2 deletions tensorflow/core/kernels/reduction_ops_half_prod_max_min.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#define EIGEN_USE_GPU

Expand Down Expand Up @@ -64,4 +64,4 @@ DEFINE_FOR_ALL_REDUCERS(Eigen::half);
} // end namespace functor
} // end namespace tensorflow

#endif // GOOGLE_CUDA
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
2 changes: 1 addition & 1 deletion tensorflow/core/kernels/reduction_ops_max.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace tensorflow {
TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS

#if GOOGLE_CUDA
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM

#define REGISTER_GPU_KERNELS(type) \
REGISTER_KERNEL_BUILDER( \
Expand Down
Loading

0 comments on commit 1cf0898

Please sign in to comment.