Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Branch 155393864 #9850

Merged
merged 15 commits into from May 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion WORKSPACE
Expand Up @@ -5,7 +5,7 @@ http_archive(
sha256 = "4be8a887f6f38f883236e77bb25c2da10d506f2bf1a8e5d785c0f35574c74ca4",
strip_prefix = "rules_closure-aac19edc557aec9b603cd7ffe359401264ceff0d",
urls = [
"http://bazel-mirror.storage.googleapis.com/github.com/bazelbuild/rules_closure/archive/aac19edc557aec9b603cd7ffe359401264ceff0d.tar.gz", # 2017-05-10
"http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/aac19edc557aec9b603cd7ffe359401264ceff0d.tar.gz", # 2017-05-10
"https://github.com/bazelbuild/rules_closure/archive/aac19edc557aec9b603cd7ffe359401264ceff0d.tar.gz",
],
)
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/BUILD
Expand Up @@ -351,6 +351,8 @@ filegroup(
"//tensorflow/tensorboard/components/tf_globals_d3v4:all_files",
"//tensorflow/tensorboard/components/tf_graph:all_files",
"//tensorflow/tensorboard/components/tf_graph/demo:all_files",
"//tensorflow/tensorboard/components/tf_graph_app:all_files",
"//tensorflow/tensorboard/components/tf_graph_app/demo:all_files",
"//tensorflow/tensorboard/components/tf_graph_board:all_files",
"//tensorflow/tensorboard/components/tf_graph_board/demo:all_files",
"//tensorflow/tensorboard/components/tf_graph_common:all_files",
Expand Down
8 changes: 0 additions & 8 deletions tensorflow/compiler/xla/client/local_client.cc
Expand Up @@ -253,14 +253,6 @@ StatusOr<std::unique_ptr<GlobalData>> LocalClient::AllocateBufferOnDevice(
return std::unique_ptr<GlobalData>(new GlobalData(local_service_, handle));
}

tensorflow::Status LocalClient::ResolveArguments(
const tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
int device_ordinal,
std::vector<perftools::gputools::DeviceMemoryBase>* argument_ptrs) {
return local_service_->ResolveArguments(arguments, device_ordinal,
argument_ptrs);
}

se::Platform* LocalClient::platform() const {
return local_service_->backend().platform();
}
Expand Down
8 changes: 0 additions & 8 deletions tensorflow/compiler/xla/client/local_client.h
Expand Up @@ -158,14 +158,6 @@ class LocalClient : public Client {
LocalClient(const LocalClient&) = delete;
void operator=(const LocalClient&) = delete;

// For an array of arguments held on the local service, validate
// that each is placed on the specified device_ordinal, and return
// the DeviceMemoryBase corresponding to each argument.
tensorflow::Status ResolveArguments(
const tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
int device_ordinal,
std::vector<perftools::gputools::DeviceMemoryBase>* argument_ptrs);

// Return a handle to a buffer large enough to hold shape, allocated
// on device_ordinal on the local service. If
// allocate_space_for_deep_copy, the buffer is large enough to hold
Expand Down
16 changes: 10 additions & 6 deletions tensorflow/compiler/xla/service/elemental_ir_emitter.cc
Expand Up @@ -240,14 +240,18 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatBinaryOp(
return ir_builder_->CreateFDiv(lhs_value, rhs_value);
case HloOpcode::kRemainder:
return ir_builder_->CreateFRem(lhs_value, rhs_value);

// The 'O' prefix on the LLVM ops means "ordered" compare where comparisons
// with NAN always return false.
// LLVM comparisons can be "unordered" (U) or "ordered" (O) -- ordered
// comparisons always return false when one of the operands is NaN, whereas
// unordered comparisons return true.
//
// We use ordered comparisons for everything except kNe, where we use an
// unordered comparison. This makes x != y equivalent to !(x == y), and
// matches C++'s semantics.
case HloOpcode::kEq:
return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OEQ, lhs_value,
rhs_value, ir_builder_);
case HloOpcode::kNe:
return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_ONE, lhs_value,
return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_UNE, lhs_value,
rhs_value, ir_builder_);
case HloOpcode::kLt:
return llvm_ir::EmitComparison(llvm::CmpInst::FCMP_OLT, lhs_value,
Expand Down Expand Up @@ -739,11 +743,11 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator(
const HloInstruction* operand = hlo->operand(operand_idx);
auto true_block = llvm_ir::CreateBasicBlock(
exit_block, tensorflow::strings::StrCat(
"concat_index_from_operand", operand_idx),
"concat_index_from_operand", operand_idx),
ir_builder_);
auto false_block = llvm_ir::CreateBasicBlock(
exit_block, tensorflow::strings::StrCat(
"concat_index_not_from_operand", operand_idx),
"concat_index_not_from_operand", operand_idx),
ir_builder_);
auto concat_dim_size =
llvm::ConstantInt::get(source_index[concat_dim]->getType(),
Expand Down
15 changes: 0 additions & 15 deletions tensorflow/compiler/xla/service/local_service.cc
Expand Up @@ -77,21 +77,6 @@ LocalService::LocalService(std::unique_ptr<Backend> execute_backend,
runs_in_client_process_ = true;
}

tensorflow::Status LocalService::ResolveArguments(
const tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
int device_ordinal,
std::vector<perftools::gputools::DeviceMemoryBase>* argument_ptrs) {
TF_ASSIGN_OR_RETURN(std::vector<const Allocation*> arg_allocations,
ResolveAndValidateArguments(
arguments, execute_backend_.get(), device_ordinal));
argument_ptrs->resize(arg_allocations.size());
for (int i = 0; i < arguments.size(); ++i) {
const Allocation& allocation = *arg_allocations[i];
(*argument_ptrs)[i] = allocation.device_memory();
}
return tensorflow::Status::OK();
}

namespace {
// Returns the space required to allocate a shape. If
// allocate_space_for_deep_copy the space includes all sub-buffers of
Expand Down
8 changes: 0 additions & 8 deletions tensorflow/compiler/xla/service/local_service.h
Expand Up @@ -43,14 +43,6 @@ class LocalService : public Service {
static StatusOr<std::unique_ptr<LocalService>> NewService(
const ServiceOptions& options);

// For an array of arguments, validate that each is placed on the
// specified device_ordinal, and return the DeviceMemoryBase
// corresponding to each argument.
tensorflow::Status ResolveArguments(
const tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
int device_ordinal,
std::vector<perftools::gputools::DeviceMemoryBase>* argument_ptrs);

// Return a handle to a buffer large enough to hold shape, allocated
// on device_ordinal. If allocate_space_for_deep_copy, the buffer is
// large enough to hold all sub-buffers of a tuple shape, otherwise
Expand Down
5 changes: 4 additions & 1 deletion tensorflow/compiler/xla/service_interface.h
Expand Up @@ -21,7 +21,10 @@ limitations under the License.

namespace xla {

// Defines the interface for an XLA service.
// Defines the interface for an XLA service on the client side. This service
// helps abstract around the actual implementation of a service - the service
// can be local (running in the same process), or remote - in which case an RPC
// stub is used as the implementation.
class ServiceInterface {
public:
ServiceInterface() {}
Expand Down
12 changes: 12 additions & 0 deletions tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
Expand Up @@ -486,6 +486,18 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementS32s) {
ComputeAndCompareR1<bool>(&builder, {}, {});
}

TEST_F(ArrayElementwiseOpTest, CompareNeF32s) {
// Disable fast-math because we're operating on NaNs.
SetFastMathDisabled(true);

ComputationBuilder builder(client_, TestName());
auto lhs = builder.ConstantR1<float>({-2.5f, 25.5f, 2.25f, NAN, 6.0f});
auto rhs = builder.ConstantR1<float>({10.0f, 25.5f, 1.0f, 10.0f, NAN});
auto compare = builder.Ne(lhs, rhs);

ComputeAndCompareR1<bool>(&builder, {true, false, true, true, true}, {});
}

TEST_F(ArrayElementwiseOpTest, CompareNeS32s) {
const int32 min = std::numeric_limits<int32>::min();
const int32 max = std::numeric_limits<int32>::max();
Expand Down