Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Branch 163409348 #11837

Merged
merged 31 commits into from Jul 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
32e198f
[TF:XLA] Add tf.cross support.
cdleary Jul 27, 2017
fd5de46
[XLA] Add regression test for a corner case using Reduce that current…
tensorflower-gardener Jul 27, 2017
43036ac
Remove unnecessary break statements.
tensorflower-gardener Jul 27, 2017
4404aa7
[XLA] Add TODO comment explaining why the IsScalar check exists.
tensorflower-gardener Jul 27, 2017
a49fe03
Remove platform bridge for grpc_response_reader.
Jul 27, 2017
a524701
Sets the incarnation number even when the attribute is set.
tensorflower-gardener Jul 27, 2017
631a364
[XLA] Add Reduce, DynamicSlice and DynamicSliceUpdate to HloEvaluator.
kayzhu Jul 27, 2017
dd1f0cd
Supports lookup devices by fullname either in the canonical form or the
tensorflower-gardener Jul 27, 2017
cda80a7
[tpu profiler] Dump HLO graphs in profile responses to the log direct…
Jul 27, 2017
722f6f3
Fix TensorForest's saveable object names so loading a savedmodel works.
tensorflower-gardener Jul 27, 2017
e4a5c53
["Variable", "VariableV2", "VarHandleOp"] is the default for ps_ops=None
tfboyd Jul 27, 2017
34cbf16
Update Dataset API documentation.
jsimsa Jul 27, 2017
a36bca2
Remove ShapeWithoutPadding() utility function, as it is no longer nee…
tayo Jul 27, 2017
ddd8e21
[XLA] Consolidate all similar main()s in tests into a single target.
eliben Jul 27, 2017
2139e7d
[tf.contrib.data] map expects a nested structure.
tensorflower-gardener Jul 27, 2017
376bb06
Look inside functions to see which node types are used.
petewarden Jul 27, 2017
86ca350
Further BUILD cleanup
tensorflower-gardener Jul 27, 2017
ae3119d
Small changes to op framework.
tensorflower-gardener Jul 27, 2017
3b97f1f
Change to only run one round of matmul benchmark.
Jul 27, 2017
8bc0236
PiperOrigin-RevId: 163366493
tensorflower-gardener Jul 27, 2017
9f131bd
Internal change
tensorflower-gardener Jul 27, 2017
4653d37
[XLA] Change type to appease GPU builds.
eliben Jul 27, 2017
613bf1c
fix asan test failure in SingleMachineTest::ReleaseMemoryAfterDestruc…
Jul 27, 2017
2265108
C API: Groundwork for experimenting with TF_Tensor in device memory.
asimshankar Jul 27, 2017
e5353c9
Don't prune nodes that have reference inputs.
tensorflower-gardener Jul 27, 2017
28373cf
Adds preliminary support for Cloud TPUs with Cluster Resolvers. This …
Jul 27, 2017
6b7314d
Consolidating the code to fill the partition's function library
tensorflower-gardener Jul 27, 2017
ce1c7f0
Properly include logging header in xla_internal_test_main
eliben Jul 27, 2017
d5cc143
Increase timeout to deflake the test.
tensorflower-gardener Jul 27, 2017
905abb1
Test asserts should have `expected` first.
tensorflower-gardener Jul 27, 2017
5a32c7d
Merge commit for internal changes
Jul 28, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions tensorflow/c/BUILD
Expand Up @@ -61,6 +61,7 @@ tf_cuda_library(
"//tensorflow/cc:grad_ops",
"//tensorflow/cc:scope_internal",
"//tensorflow/core:core_cpu",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:lib",
Expand Down
93 changes: 71 additions & 22 deletions tensorflow/c/c_api.cc
Expand Up @@ -27,6 +27,7 @@ limitations under the License.
#include "tensorflow/cc/saved_model/loader.h"
#endif
#include "tensorflow/c/c_api_internal.h"
#include "tensorflow/core/common_runtime/device_mgr.h"
#include "tensorflow/core/common_runtime/shape_refiner.h"
#include "tensorflow/core/framework/allocation_description.pb.h"
#include "tensorflow/core/framework/log_memory.h"
Expand Down Expand Up @@ -79,6 +80,7 @@ using tensorflow::TensorId;
using tensorflow::TensorShape;
using tensorflow::TensorShapeProto;
using tensorflow::error::Code;
using tensorflow::errors::FailedPrecondition;
using tensorflow::errors::InvalidArgument;
using tensorflow::gtl::ArraySlice;
using tensorflow::mutex_lock;
Expand Down Expand Up @@ -179,6 +181,26 @@ Status MessageToBuffer(const tensorflow::protobuf::Message& in,

} // namespace

TF_BufferAndDevice::TF_BufferAndDevice(TensorBuffer* buffer)
: buffer_(buffer), device_owner_(nullptr), device_index_(-1) {}

TF_BufferAndDevice::TF_BufferAndDevice(TensorBuffer* buffer,
TF_Session* session, int device_index)
: buffer_(buffer), device_owner_(session), device_index_(device_index) {
mutex_lock l(device_owner_->mu);
device_owner_->num_outstanding_buffers++;
}

TF_BufferAndDevice::~TF_BufferAndDevice() {
buffer_->Unref();
if (device_owner_ != nullptr) {
mutex_lock l(device_owner_->mu);
device_owner_->num_outstanding_buffers--;
}
}

TF_Tensor::~TF_Tensor() { delete buffer; }

TF_Tensor* TF_AllocateTensor(TF_DataType dtype, const int64_t* dims,
int num_dims, size_t len) {
void* data = allocate_tensor("TF_AllocateTensor", len);
Expand Down Expand Up @@ -211,33 +233,35 @@ TF_Tensor* TF_NewTensor(TF_DataType dtype, const int64_t* dims, int num_dims,
buf->deallocator_ = deallocator;
buf->deallocator_arg_ = deallocator_arg;
}
return new TF_Tensor{dtype, TensorShape(dimvec), buf};
return new TF_Tensor{dtype, TensorShape(dimvec), new TF_BufferAndDevice(buf)};
}

TF_Tensor* TF_TensorMaybeMove(TF_Tensor* tensor) {
// It is safe to move the Tensor if and only if we own the unique reference to
// it. In that case, we might as well not delete and reallocate, but a future
// implementation might need to do so.
if (tensor->buffer->RefCountIsOne() &&
tensor->buffer->root_buffer()->RefCountIsOne() &&
tensor->buffer->OwnsMemory()) {
TensorBuffer* buf = tensor->buffer->buffer();
if (buf->RefCountIsOne() && buf->root_buffer()->RefCountIsOne() &&
buf->OwnsMemory()) {
return tensor;
}
return nullptr;
}

void TF_DeleteTensor(TF_Tensor* t) {
t->buffer->Unref();
delete t;
}
void TF_DeleteTensor(TF_Tensor* t) { delete t; }

TF_DataType TF_TensorType(const TF_Tensor* t) { return t->dtype; }
int TF_NumDims(const TF_Tensor* t) { return t->shape.dims(); }
int64_t TF_Dim(const TF_Tensor* t, int dim_index) {
return static_cast<int64_t>(t->shape.dim_size(dim_index));
}
size_t TF_TensorByteSize(const TF_Tensor* t) { return t->buffer->size(); }
void* TF_TensorData(const TF_Tensor* t) { return t->buffer->data(); }
size_t TF_TensorByteSize(const TF_Tensor* t) {
return t->buffer->buffer()->size();
}
void* TF_TensorData(const TF_Tensor* t) {
if (t->buffer->on_cpu()) return t->buffer->buffer()->data();
return nullptr;
}

// --------------------------------------------------------------------------
size_t TF_StringEncode(const char* src, size_t src_len, char* dst,
Expand Down Expand Up @@ -396,7 +420,8 @@ namespace tensorflow {

Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) {
if (src->dtype != TF_STRING) {
*dst = TensorCApi::MakeTensor(src->dtype, src->shape, src->buffer);
*dst =
TensorCApi::MakeTensor(src->dtype, src->shape, src->buffer->buffer());
return Status::OK();
}
// TF_STRING tensors require copying since Tensor class expects a sequence of
Expand Down Expand Up @@ -437,7 +462,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src) {
TensorBuffer* buf = TensorCApi::Buffer(src);
buf->Ref();
return new TF_Tensor{static_cast<TF_DataType>(src.dtype()), src.shape(),
buf};
new TF_BufferAndDevice(buf)};
}
// DT_STRING tensors require a copying since TF_Tensor.buffer expects a flatly
// encoded sequence of strings.
Expand Down Expand Up @@ -2119,6 +2144,17 @@ void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny, TF_Output* x, int nx,

// TF_Session functions ----------------------------------------------

TF_Session::TF_Session(tensorflow::Session* s, TF_Graph* g)
: session(s),
graph(g),
last_num_graph_nodes(0),
device_mgr(nullptr),
num_outstanding_buffers(0) {
if (s->LocalDeviceManager(&device_mgr).ok()) {
devices = device_mgr->ListDevices();
}
}

TF_Session* TF_NewSession(TF_Graph* graph, const TF_SessionOptions* opt,
TF_Status* status) {
Session* session;
Expand Down Expand Up @@ -2149,7 +2185,6 @@ TF_Session* TF_LoadSessionFromSavedModel(
return nullptr;
#else
mutex_lock l(graph->mu);

if (!graph->name_map.empty()) {
status->status = InvalidArgument("Graph is non-empty.");
return nullptr;
Expand Down Expand Up @@ -2203,16 +2238,30 @@ void TF_CloseSession(TF_Session* s, TF_Status* status) {
}

void TF_DeleteSession(TF_Session* s, TF_Status* status) {
status->status = Status::OK();
TF_Graph* const graph = s->graph;
if (graph != nullptr) {
graph->mu.lock();
graph->num_sessions -= 1;
const bool del = graph->delete_requested && graph->num_sessions == 0;
graph->mu.unlock();
if (del) delete graph;
{
mutex_lock l(s->mu);
if (s->num_outstanding_buffers > 0) {
// This can probably be relaxed: An alternative might be to mark
// this session for deletion and do the actual delete only when
// the last TF_BufferAndDevice has been deleted.
status->status = FailedPrecondition(
s->num_outstanding_buffers,
" TF_Tensor objects with memory backed by a device "
"owned by this TF_Session are still alive. Release "
"them using TF_DeleteTensor and retry");
return;
}
status->status = Status::OK();
TF_Graph* const graph = s->graph;
if (graph != nullptr) {
graph->mu.lock();
graph->num_sessions -= 1;
const bool del = graph->delete_requested && graph->num_sessions == 0;
graph->mu.unlock();
if (del) delete graph;
}
delete s->session;
}
delete s->session;
delete s;
}

Expand Down
3 changes: 3 additions & 0 deletions tensorflow/c/c_api.h
Expand Up @@ -263,6 +263,9 @@ TF_CAPI_EXPORT extern int64_t TF_Dim(const TF_Tensor* tensor, int dim_index);
TF_CAPI_EXPORT extern size_t TF_TensorByteSize(const TF_Tensor*);

// Return a pointer to the underlying data buffer.
//
// Returns NULL if the underlying data is not in host memory
// (for example, if it refers to addresses in GPU memory).
TF_CAPI_EXPORT extern void* TF_TensorData(const TF_Tensor*);

// --------------------------------------------------------------------------
Expand Down
53 changes: 45 additions & 8 deletions tensorflow/c/c_api_internal.h
Expand Up @@ -18,19 +18,25 @@ limitations under the License.

#include "tensorflow/c/c_api.h"

#include <vector>
#include <unordered_map>
#include <vector>

#include "tensorflow/core/common_runtime/shape_refiner.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/graph/graph_constructor.h"
#include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/common_runtime/shape_refiner.h"
#include "tensorflow/core/public/session.h"

namespace tensorflow {
class Device;
class DeviceMgr;
} // namespace tensorflow
class TF_BufferAndDevice;

// Internal structures used by the C API. These are likely to change and should
// not be depended on.
Expand All @@ -40,9 +46,11 @@ struct TF_Status {
};

struct TF_Tensor {
~TF_Tensor();

TF_DataType dtype;
tensorflow::TensorShape shape;
tensorflow::TensorBuffer* buffer;
TF_BufferAndDevice* buffer;
};

struct TF_SessionOptions {
Expand Down Expand Up @@ -100,12 +108,19 @@ struct TF_Operation {
};

struct TF_Session {
TF_Session(tensorflow::Session* s, TF_Graph* g)
: session(s), graph(g), last_num_graph_nodes(0) {}
TF_Session(tensorflow::Session* s, TF_Graph* g);

tensorflow::Session* session;
TF_Graph* graph;

tensorflow::mutex mu;
int last_num_graph_nodes;

// NOTE(ashankar): Experimental fields to help keep the
// buffers of a TF_Tensor pinned in device memory.
const tensorflow::DeviceMgr* device_mgr; // Owned by session.
std::vector<tensorflow::Device*> devices; // Owned by device_mgr.
int num_outstanding_buffers GUARDED_BY(mu);
};

struct TF_ImportGraphDefOptions {
Expand All @@ -116,6 +131,28 @@ struct TF_DeviceList {
std::vector<tensorflow::DeviceAttributes> response;
};

// TF_BufferAndDevice encapsulates the memory addresses of data backing a Tensor
// and the device (e.g., GPU or host) whose memory the addresses refer to.
class TF_BufferAndDevice {
public:
explicit TF_BufferAndDevice(tensorflow::TensorBuffer* buffer);
TF_BufferAndDevice(tensorflow::TensorBuffer* buffer, TF_Session* session,
int device_index);
~TF_BufferAndDevice();

tensorflow::TensorBuffer* buffer() const { return buffer_; }
tensorflow::Device* device() const {
if (device_owner_ == nullptr) return nullptr;
return device_owner_->devices[device_index_];
}
bool on_cpu() const { return device() == nullptr; }

private:
tensorflow::TensorBuffer* buffer_;
TF_Session* device_owner_;
const int device_index_;
};

namespace tensorflow {

class TensorCApi {
Expand Down
11 changes: 5 additions & 6 deletions tensorflow/compiler/plugin/executor/executable.cc
Expand Up @@ -30,17 +30,17 @@ ExecutorExecutable::ExecutorExecutable(std::unique_ptr<HloModule> hlo_module)

ExecutorExecutable::~ExecutorExecutable() {}

static se::DeviceMemoryBase AllocateSingleOutput(sep::ExecutorExecutor* executor,
const Literal& literal) {
static se::DeviceMemoryBase AllocateSingleOutput(
sep::ExecutorExecutor* executor, const Literal& literal) {
int64 size(xla::ShapeUtil::ByteSizeOf(literal.shape()));
void* buf = executor->Allocate(size);
const void* src = literal.InternalData();
memcpy(buf, src, size);
return se::DeviceMemoryBase(buf, size);
}

static se::DeviceMemoryBase AllocateOutputBuffer(sep::ExecutorExecutor* executor,
const Literal& literal) {
static se::DeviceMemoryBase AllocateOutputBuffer(
sep::ExecutorExecutor* executor, const Literal& literal) {
const Shape& shape = literal.shape();
if (shape.element_type() != xla::TUPLE) {
return AllocateSingleOutput(executor, literal);
Expand Down Expand Up @@ -97,7 +97,7 @@ StatusOr<se::DeviceMemoryBase> ExecutorExecutable::ExecuteOnStream(
// Execute the graph using the evaluator
HloEvaluator evaluator;
TF_ASSIGN_OR_RETURN(std::unique_ptr<Literal> output,
evaluator.Evaluate(computation, arg_literals_ptrs));
evaluator.Evaluate(*computation, arg_literals_ptrs));

// Copy the result into the return buffer
perftools::gputools::StreamExecutor* executor(stream->parent());
Expand Down Expand Up @@ -140,6 +140,5 @@ StatusOr<se::DeviceMemoryBase> ExecutorExecutable::ExecuteAsyncOnStream(
return ShapeUtil::ByteSizeOf(shape, sizeof(void*));
}


} // namespace executorplugin
} // namespace xla
18 changes: 18 additions & 0 deletions tensorflow/compiler/tests/binary_ops_test.py
Expand Up @@ -773,6 +773,24 @@ def testTranspose(self):
np.array([1, 0], dtype=np.int32),
expected=np.array([[1, 3], [2, 4]], dtype=dtype))

def testCross(self):
for dtype in self.float_types:
self._testBinary(
gen_math_ops.cross,
np.zeros((4, 3), dtype=dtype),
np.zeros((4, 3), dtype=dtype),
expected=np.zeros((4, 3), dtype=dtype))
self._testBinary(
gen_math_ops.cross,
np.array([1, 2, 3], dtype=dtype),
np.array([4, 5, 6], dtype=dtype),
expected=np.array([-3, 6, -3], dtype=dtype))
self._testBinary(
gen_math_ops.cross,
np.array([[1, 2, 3], [10, 11, 12]], dtype=dtype),
np.array([[4, 5, 6], [40, 50, 60]], dtype=dtype),
expected=np.array([[-3, 6, -3], [60, -120, 60]], dtype=dtype))


if __name__ == "__main__":
googletest.main()
1 change: 1 addition & 0 deletions tensorflow/compiler/tf2xla/kernels/BUILD
Expand Up @@ -24,6 +24,7 @@ tf_kernel_library(
"concat_op.cc",
"const_op.cc",
"conv_ops.cc",
"cross_op.cc",
"cwise_ops.cc",
"depthwise_conv_ops.cc",
"diag_op.cc",
Expand Down