Skip to content

Commit

Permalink
Address code review feedbacks.
Browse files Browse the repository at this point in the history
  • Loading branch information
whchung committed Jun 26, 2019
1 parent b7d4805 commit 11864b1
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
10 changes: 6 additions & 4 deletions tensorflow/core/kernels/conv_grad_ops_3d.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1355,8 +1355,6 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
using se::dnn::AlgorithmDesc;
using se::dnn::ProfileResult;
AlgorithmConfig algorithm_config;
ProfileResult best_result;
ProfileResult best_result_no_scratch;
if (cudnn_use_autotune_ && !AutoTuneConv3dBwdData::GetInstance()->Find(
conv_parameters, &algorithm_config)) {
#if GOOGLE_CUDA
Expand All @@ -1365,6 +1363,8 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(
stream->parent()),
&algorithms));
ProfileResult best_result;
ProfileResult best_result_no_scratch;
for (auto profile_algorithm : algorithms) {
// TODO(zhengxq): profile each algorithm multiple times to better
// accuracy.
Expand Down Expand Up @@ -1405,6 +1405,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
#elif TENSORFLOW_USE_ROCM
DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
context);
ProfileResult best_result;
bool miopen_find_status =
stream
->ThenConvolveBackwardDataWithAlgorithm(
Expand Down Expand Up @@ -1777,8 +1778,6 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
using se::dnn::AlgorithmDesc;
using se::dnn::ProfileResult;
AlgorithmConfig algorithm_config;
ProfileResult best_result;
ProfileResult best_result_no_scratch;
if (cudnn_use_autotune_ && !AutoTuneConv3dBwdFilter::GetInstance()->Find(
conv_parameters, &algorithm_config)) {
#if GOOGLE_CUDA
Expand All @@ -1787,6 +1786,8 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
conv_parameters.ShouldIncludeWinogradNonfusedAlgo<T>(
stream->parent()),
&algorithms));
ProfileResult best_result;
ProfileResult best_result_no_scratch;
for (auto profile_algorithm : algorithms) {
// TODO(zhengxq): profile each algorithm multiple times to better
// accuracy.
Expand Down Expand Up @@ -1828,6 +1829,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
#elif TENSORFLOW_USE_ROCM
DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
context);
ProfileResult best_result;
bool miopen_find_status =
stream
->ThenConvolveBackwardFilterWithAlgorithm(
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/stream_executor/dnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ class ProfileResult {
// algorithm_no_scratch: a secondary algorithm that should be used, if the
// the allocation for the scratch memory fails.
// scrach_size: specify the size of scratch memory in bytes needed for the
// algorithm used
// algorithm used.
//
// On CUDA platform with CUDNN library, algorithm and algorithm_no_scratch
// would be used. On ROCm platform with MIOpen library, algorithm and
Expand Down

0 comments on commit 11864b1

Please sign in to comment.