Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle raw types correctly in helper.make_tensor #4262

Merged
merged 3 commits into from
Jun 10, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 15 additions & 9 deletions onnx/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,14 +332,22 @@ def make_tensor(
if data_type == TensorProto.STRING:
assert not raw, "Can not use raw_data to store string type"

np_dtype = mapping.TENSOR_TYPE_TO_NP_TYPE[data_type]

# Check number of vals specified equals tensor size
storage_type = mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]
expected_size = 1 if (not raw) else (mapping.TENSOR_TYPE_TO_NP_TYPE[storage_type].itemsize)
# Flatten a numpy array if its rank > 1
expected_size = 1
if raw:
# NumPy doesn't have BFLOAT16. TENSOR_TYPE_TO_NP_TYPE maps it to float32,
# which has the wrong itemsize.
if data_type == TensorProto.BFLOAT16:
expected_size = 2
else:
expected_size = np_dtype.itemsize

if type(vals) is np.ndarray and len(vals.shape) > 1:
vals = vals.flatten()
for d in dims:
expected_size = expected_size * d
expected_size *= d

if len(vals) != expected_size:
raise ValueError("Number of values does not match tensor's size. Expected {}, but it is {}. "
Expand All @@ -348,14 +356,12 @@ def make_tensor(
if raw:
tensor.raw_data = vals
else:
if (data_type == TensorProto.COMPLEX64
or data_type == TensorProto.COMPLEX128):
if (data_type == TensorProto.COMPLEX64 or data_type == TensorProto.COMPLEX128):
vals = split_complex_to_pairs(vals)
# float16/bfloat16 are stored as uint16
elif data_type == TensorProto.FLOAT16:
vals = np.array(vals).astype(np.float16).view(dtype=np.uint16).flatten().tolist()
vals = np.array(vals).astype(np_dtype).view(dtype=np.uint16).flatten().tolist()
elif data_type == TensorProto.BFLOAT16:
vals = list(map(float32_to_bfloat16, np.array(vals).astype(np.float32).flatten().tolist()))
vals = list(map(float32_to_bfloat16, np.array(vals).astype(np_dtype).flatten().tolist()))
field = mapping.STORAGE_TENSOR_TYPE_TO_FIELD[
mapping.TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE[data_type]]
getattr(tensor, field).extend(vals)
Expand Down
3 changes: 2 additions & 1 deletion onnx/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
# Numpy float32 array is only reversed to TensorProto.FLOAT
NP_TYPE_TO_TENSOR_TYPE = {v: k for k, v in TENSOR_TYPE_TO_NP_TYPE.items() if k != TensorProto.BFLOAT16}

# This map indicates what storage-type is used in the protobuf (serialized) representation for TensorProto
# This is only used to get keys into STORAGE_TENSOR_TYPE_TO_FIELD.
# TODO(https://github.com/onnx/onnx/issues/4261): Remove this.
TENSOR_TYPE_TO_STORAGE_TENSOR_TYPE = {
int(TensorProto.FLOAT): int(TensorProto.FLOAT),
int(TensorProto.UINT8): int(TensorProto.INT32),
Expand Down
27 changes: 25 additions & 2 deletions onnx/test/helper_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,29 @@ def test_make_tensor(self) -> None:
)
self.assertEqual(string_list, list(tensor.string_data))

def test_make_int8_tensor(self) -> None:
np_array = np.random.randn(2, 3).astype(np.int8)

tensor = helper.make_tensor(
name='test',
data_type=TensorProto.INT8,
dims=(2, 3),
vals=np_array
)
self.assertEqual(tensor.name, 'test')
np.testing.assert_equal(np_array, numpy_helper.to_array(tensor))

# use raw_data field to store the data
tensor = helper.make_tensor(
name='test',
data_type=TensorProto.INT8,
dims=(2, 3),
vals=np_array.tobytes(),
raw=True,
)
np.testing.assert_equal(np_array, numpy_helper.to_array(tensor))


def test_make_float16_tensor(self) -> None:
np_array = np.random.randn(2, 3).astype(np.float16)

Expand All @@ -404,7 +427,7 @@ def test_make_float16_tensor(self) -> None:
self.assertEqual(tensor.name, 'test')
np.testing.assert_equal(np_array, numpy_helper.to_array(tensor))

def test_make_float16_tensor_with_raw(self) -> None:
def test_make_float16_tensor_raw(self) -> None:
np_array = np.random.randn(2, 3).astype(np.float16)

tensor = helper.make_tensor(
Expand Down Expand Up @@ -443,7 +466,7 @@ def test_make_bfloat16_tensor(self) -> None:
self.assertEqual(tensor.name, 'test')
np.testing.assert_equal(np_results, numpy_helper.to_array(tensor))

def test_make_bfloat16_tensor_with_raw(self) -> None:
def test_make_bfloat16_tensor_raw(self) -> None:
# numpy doesn't support bf16, so we have to compute the correct result manually
np_array = np.array([[1.0, 2.0], [3.0, 4.0], [0.099853515625, 0.099365234375], [0.0998535081744, 0.1], [np.nan, np.inf]],
dtype=np.float32)
Expand Down