Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
TensorFlow: Upstream changes to git.
Change 109730179 Add support for selecting partition strategy in tf.nn.embedding_lookup and related ops, and allow unequally-sized shards to be used as input. Change 109729548 TensorFlow: add RELEASE.md notes for 0.6.0. Change 109728185 Make seq2seq_test non-flaky by setting python and numpy random seed. Change 109725913 Refactor slot creation in optimizers and moving averages to separate file Change 109718024 TensorFlow: reduce runtime of seq2seq_test from ~30s to ~18s. Change 109712251 More performance improvement for convnet on GPU. + Switch forward convolution format to NCHW. + Allocate scratch space for forward- and backward- convolutions. + Users can use "TF_CUDNN_WORKSPACE_LIMIT_IN_MB" to configure the scratch space limit. The default limit in 1GB. Change 109710898 Added extract_sub_graph utility function Base CL: 109731609
- Loading branch information
Vijay Vasudevan
committed
Dec 8, 2015
1 parent
ddd4aaf
commit 2c3738d
Showing
14 changed files
with
764 additions
and
138 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,30 @@ | ||
# Release 0.6.0 | ||
|
||
## Major Features and Improvements | ||
|
||
* Python 3.3+ support via changes to python codebase and ability | ||
to specify python version via ./configure. | ||
|
||
* Some improvements to GPU performance and memory usage: | ||
[convnet benchmarks](https://github.com/soumith/convnet-benchmarks/issues/66) | ||
roughly equivalent with native cudnn v2 performance. Improvements mostly due | ||
to moving to 32-bit indices, faster shuffling kernels. More improvements to | ||
come in later releases. | ||
|
||
|
||
## Bug fixes | ||
|
||
* Lots of fixes to documentation and tutorials, many contributed | ||
by the public. | ||
|
||
* 271 closed issues on github issues. | ||
|
||
## Backwards-incompatible changes | ||
|
||
* tf.nn.fixed_unigram_candidate_sampler changed its default 'distortion' | ||
attribute from 0.0 to 1.0. This was a bug in the original release | ||
that is now fixed. | ||
|
||
# Release 0.5.0 | ||
|
||
Initial release of TensorFlow. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* Copyright 2015 Google Inc. All Rights Reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_CONV_OPS_GPU_H_ | ||
#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_CONV_OPS_GPU_H_ | ||
|
||
#if GOOGLE_CUDA | ||
|
||
#include "tensorflow/stream_executor/scratch_allocator.h" | ||
#include "tensorflow/core/common_runtime/gpu_device_context.h" | ||
|
||
namespace tensorflow { | ||
|
||
// TODO(zhengxq): move this to gpu_util.h. The use of such wrapers is wide | ||
// spread. | ||
template <typename T> | ||
perftools::gputools::DeviceMemory<T> AsDeviceMemory(const T* cuda_memory, | ||
uint64 size) { | ||
perftools::gputools::DeviceMemoryBase wrapped(const_cast<T*>(cuda_memory), | ||
size * sizeof(T)); | ||
perftools::gputools::DeviceMemory<T> typed(wrapped); | ||
return typed; | ||
} | ||
|
||
// Get the Cudnn workspace limit from the environment variable, which is in MB. | ||
// Return the workspace memory limit in bytes. If no value is set, return the | ||
// default value. | ||
int64 GetCudnnWorkspaceLimit(const string& envvar_in_mb, | ||
int64 default_value_in_bytes); | ||
|
||
// A class to provide scratch-space allocator for Stream-Executor Cudnn | ||
// callback. TensorFlow is responsible for releasing the temporary buffers after | ||
// the kernel finishes. | ||
class CudnnScratchAllocator : public perftools::gputools::ScratchAllocator { | ||
public: | ||
virtual ~CudnnScratchAllocator() {} | ||
CudnnScratchAllocator(int64 memory_limit, OpKernelContext* context) | ||
: memory_limit_(memory_limit), context_(context) {} | ||
virtual int64 GetMemoryLimitInBytes( | ||
perftools::gputools::Stream* stream) override { | ||
return memory_limit_; | ||
} | ||
virtual perftools::gputools::port::StatusOr< | ||
perftools::gputools::DeviceMemory<uint8>> | ||
AllocateBytes(perftools::gputools::Stream* stream, int64 byte_size) override { | ||
Tensor temporary_memory; | ||
|
||
Status allocation_status(context_->allocate_temp( | ||
DT_UINT8, TensorShape({byte_size}), &temporary_memory)); | ||
if (!allocation_status.ok()) { | ||
LOG(WARNING) << allocation_status; | ||
context_->SetStatus(allocation_status); | ||
return perftools::gputools::port::StatusOr< | ||
perftools::gputools::DeviceMemory<uint8>>(); | ||
} | ||
|
||
return perftools::gputools::port::StatusOr< | ||
perftools::gputools::DeviceMemory<uint8>>( | ||
AsDeviceMemory(temporary_memory.flat<uint8>().data(), | ||
temporary_memory.flat<uint8>().size())); | ||
} | ||
|
||
private: | ||
int64 memory_limit_; | ||
OpKernelContext* context_; | ||
}; | ||
|
||
} // namespace tensorflow | ||
|
||
#endif // GOOGLE_CUDA | ||
|
||
#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_CONV_OPS_GPU_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.