Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow sharing of TensorBundles across big- and little-endian architectures #28490

Closed
wants to merge 12 commits into from
12 changes: 10 additions & 2 deletions tensorflow/core/util/tensor_bundle/BUILD
Expand Up @@ -18,6 +18,8 @@ load(
filegroup(
name = "mobile_srcs",
srcs = [
"byte_swap.h",
"byte_swap.cc",
"naming.cc",
"naming.h",
"tensor_bundle.cc",
Expand All @@ -32,8 +34,14 @@ alias(

cc_library(
name = "tensor_bundle",
srcs = ["tensor_bundle.cc"],
hdrs = ["tensor_bundle.h"],
srcs = [
"byte_swap.cc",
"tensor_bundle.cc",
],
hdrs = [
"byte_swap.h",
"tensor_bundle.h",
],
copts = tf_copts() + if_not_windows(["-Wno-sign-compare"]),
deps = [
":naming",
Expand Down
117 changes: 117 additions & 0 deletions tensorflow/core/util/tensor_bundle/byte_swap.cc
@@ -0,0 +1,117 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "tensorflow/core/util/tensor_bundle/byte_swap.h"
#include "tensorflow/core/lib/core/status.h"

namespace tensorflow {

Status ByteSwapArray(char* array, size_t bytes_per_elem, int array_len) {
if (bytes_per_elem == 1) {
// No-op
return Status::OK();
} else if (bytes_per_elem == 2) {
auto array_16 = reinterpret_cast<uint16_t*>(array);
for (int i = 0; i < array_len; i++) {
array_16[i] = BYTE_SWAP_16(array_16[i]);
}
return Status::OK();
} else if (bytes_per_elem == 4) {
auto array_32 = reinterpret_cast<uint32_t*>(array);
for (int i = 0; i < array_len; i++) {
array_32[i] = BYTE_SWAP_32(array_32[i]);
}
return Status::OK();
} else if (bytes_per_elem == 8) {
auto array_64 = reinterpret_cast<uint64_t*>(array);
for (int i = 0; i < array_len; i++) {
array_64[i] = BYTE_SWAP_64(array_64[i]);
}
return Status::OK();
} else {
return errors::Unimplemented("Byte-swapping of ", bytes_per_elem,
"-byte values not supported.");
}
}

Status ByteSwapTensor(Tensor* t) {
size_t bytes_per_elem = 0;
int array_len = t->NumElements();

switch (t->dtype()) {
// Types that don't need byte-swapping
case DT_STRING:
case DT_QINT8:
case DT_QUINT8:
case DT_BOOL:
case DT_UINT8:
case DT_INT8:
return Status::OK();

// 16-bit types
case DT_BFLOAT16:
case DT_HALF:
case DT_QINT16:
case DT_QUINT16:
case DT_UINT16:
case DT_INT16:
bytes_per_elem = 2;
break;

// 32-bit types
case DT_FLOAT:
case DT_INT32:
case DT_QINT32:
case DT_UINT32:
bytes_per_elem = 4;
break;

// 64-bit types
case DT_INT64:
case DT_DOUBLE:
case DT_UINT64:
bytes_per_elem = 8;
break;

// Complex types need special handling
case DT_COMPLEX64:
bytes_per_elem = 4;
array_len *= 2;
break;

case DT_COMPLEX128:
bytes_per_elem = 8;
array_len *= 2;
break;

// Types that ought to be supported in the future
case DT_RESOURCE:
case DT_VARIANT:
return errors::Unimplemented(
"Byte-swapping not yet implemented for tensors with dtype ",
t->dtype());

// Byte-swapping shouldn't make sense for other dtypes.
default:
return errors::Unimplemented(
"Byte-swapping not supported for tensors with dtype ", t->dtype());
}

char* backing_buffer = const_cast<char*>((t->tensor_data().data()));
TF_RETURN_IF_ERROR(ByteSwapArray(backing_buffer, bytes_per_elem, array_len));
return Status::OK();
}

} // namespace tensorflow
127 changes: 127 additions & 0 deletions tensorflow/core/util/tensor_bundle/byte_swap.h
@@ -0,0 +1,127 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_BYTE_SWAP_H_
#define TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_BYTE_SWAP_H_

#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/byte_order.h"


// Define basic byte swapping operations.
// These operations must be macros to use compiler intrinsics.
// Note that the code here is written for portability, not speed. Byte swapping
// only happens when importing a checkpoint from one hardware architecture onto
// a different architecture. If these operations become part of a fast path,
// then the function ByteSwapArray() below should be rewritten to use
// architecture-appropriate SIMD instructions that swap multiple words at once.

#if defined(__linux__)

// Use the Gnu byte swap macros when available. See bswap(3) for more info.
#include <byteswap.h>
#define BYTE_SWAP_16(x) bswap_16 (x)
#define BYTE_SWAP_32(x) bswap_32 (x)
#define BYTE_SWAP_64(x) bswap_64 (x)

#elif defined(PLATFORM_WINDOWS)

// On windows, byte-swapping is in winsock.h, and winsock2.h has a version of
// of htonl that can byte-swap 64-bit values.
#include <winsock2.h>
#define BYTE_SWAP_16(x) htons (x)
#define BYTE_SWAP_32(x) htonl (x)
// At the moment the 64-bit and 128-bit byte-swapping routines in Winsock2 are
// disabled in TensorFlow's standard Windows build environment, so we use
// htonl() instead of "#define BYTE_SWAP_64(x) htonll (x)".
#define BYTE_SWAP_64(x) ( \
(uint64_t(htonl((x) & 0x00000000ffffffffUL)) << 32) \
| (htonl(((x) & 0xffffffff00000000UL) >> 32)) \
)

#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__

// On non-Linux, non-Windows, but little-endian, environments, use htonl/s,
// which byte-swap when the host byte order is little-endian. POSIX doesn't
// define a 64-bit version of these library functions, so we roll our own.
#include <arpa/inet.h>
#define BYTE_SWAP_16(x) htons (x)
#define BYTE_SWAP_32(x) htonl (x)
#define BYTE_SWAP_64(x) ( \
(uint64_t(htonl((x) & 0x00000000ffffffffUL)) << 32) \
| (htonl(((x) & 0xffffffff00000000UL) >> 32)) \
)

#else // not defined(__linux__) and not defined(PLATFORM_WINDOWS)
// and (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__)

// Fall back on a non-optimized implementation on other big-endian targets.
// This code swaps one byte at a time and is probably an order of magnitude
// slower.

#define BYTE_SWAP_16(x) ( \
(((x) & 0x00ff) << 8) \
| (((x) & 0xff00) >> 8) \
)

#define BYTE_SWAP_32(x) ( \
(((x) & 0x000000ffU) << 24) \
| (((x) & 0x0000ff00U) << 8) \
| (((x) & 0x00ff0000U) >> 8) \
| (((x) & 0xff000000U) >> 24) \
)

#define BYTE_SWAP_64(x) (\
(((x) & 0x00000000000000ffUL) << 56) \
| (((x) & 0x000000000000ff00UL) << 40) \
| (((x) & 0x0000000000ff0000UL) << 24) \
| (((x) & 0x00000000ff000000UL) << 8) \
| (((x) & 0x000000ff00000000UL) >> 8) \
| (((x) & 0x0000ff0000000000UL) >> 24) \
| (((x) & 0x00ff000000000000UL) >> 40) \
| (((x) & 0xff00000000000000UL) >> 56) \
)

#endif // defined(__linux__)

namespace tensorflow {

// Byte-swap an entire array of atomic C/C++ types in place.
//
// Note: When calling this function on arrays of std::complex<> types,
// multiply the number of elements by 2 and divide the bytes per element by 2.
//
// Args:
// array: Pointer to the beginning of the array
// bytes_per_elem: Number of bytes in each element of the array
// array_len: Number of elements in the array
//
// Returns: Status::OK() on success, -1 otherwise
//
frreiss marked this conversation as resolved.
Show resolved Hide resolved
Status ByteSwapArray(char *array, size_t bytes_per_elem, int array_len);

// Byte-swap a tensor's backing buffer in place.
//
// Args:
// t: Tensor to be modified IN PLACE. Any tensors that share a backing
// buffer with this one will also end up byte-swapped.
// Returns: Status::OK() on success, -1 otherwise
// TODO(frreiss): Should this be a member of the Tensor class?
Status ByteSwapTensor(Tensor *t);

} // namespace tensorflow

#endif // TENSORFLOW_CORE_UTIL_TENSOR_BUNDLE_BYTE_SWAP_H_