-
Notifications
You must be signed in to change notification settings - Fork 43
/
_core.py
2561 lines (2117 loc) · 85.2 KB
/
_core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
"""data structures for the intermediate representation."""
# NOTES for developers:
# NOTE: None of these classes will have a "to_onnx" or "from_protobuf" method because
# We cannot assume that the build tool chain has protoc installed and would like
# to keep this module protobuf free. This way we separate the concerns of the IR
# and the serialization/deserialization.
#
# NOTE: Do not import pathlib in the IR. It is slow. Use os.path methods instead.
from __future__ import annotations
import abc
import contextlib
import dataclasses
import math
import mmap
import os
import sys
import textwrap
import typing
from typing import (
AbstractSet,
Any,
Collection,
Generic,
Iterable,
Iterator,
OrderedDict,
Sequence,
Union,
)
import numpy as np
from onnxscript.ir import (
_display,
_enums,
_linked_list,
_metadata,
_name_authority,
_protocols,
_type_casting,
)
if typing.TYPE_CHECKING:
import numpy.typing as npt
from typing_extensions import TypeGuard
TArrayCompatible = typing.TypeVar(
"TArrayCompatible",
bound=Union[_protocols.ArrayCompatible, _protocols.DLPackCompatible],
)
# System is little endian
_IS_LITTLE_ENDIAN = sys.byteorder == "little"
# Data types that are not supported by numpy
_NON_NUMPY_NATIVE_TYPES = frozenset(
(
_enums.DataType.BFLOAT16,
_enums.DataType.FLOAT8E4M3FN,
_enums.DataType.FLOAT8E4M3FNUZ,
_enums.DataType.FLOAT8E5M2,
_enums.DataType.FLOAT8E5M2FNUZ,
_enums.DataType.INT4,
_enums.DataType.UINT4,
)
)
def _compatible_with_numpy(obj: Any) -> TypeGuard[_protocols.ArrayCompatible]:
"""Use this function to check if an object is compatible with numpy.
Avoid isinstance checks with the ArrayCompatible protocol for performance reasons.
"""
return hasattr(obj, "__array__")
def _compatible_with_dlpack(obj: Any) -> TypeGuard[_protocols.DLPackCompatible]:
"""Use this function to check if an object is compatible with DLPack.
Avoid isinstance checks with the DLPackCompatible protocol for performance reasons.
"""
return hasattr(obj, "__dlpack__")
class TensorBase(abc.ABC, _protocols.TensorProtocol, _display.PrettyPrintable):
"""Convenience Shared methods for classes implementing TensorProtocol."""
__slots__ = ()
def _printable_type_shape(self) -> str:
"""Return a string representation of the shape and data type."""
return f"{self.dtype},{self.shape}"
def _repr_base(self) -> str:
"""Base string for the repr method.
Example: Tensor<FLOAT,[5,42]>
"""
return f"{self.__class__.__name__}<{self._printable_type_shape()}>"
@property
def size(self) -> int:
"""The number of elements in the tensor."""
return np.prod(self.shape.numpy()) # type: ignore[return-value,attr-defined]
@property
def nbytes(self) -> int:
"""The number of bytes in the tensor."""
# Use math.ceil because when dtype is INT4, the itemsize is 0.5
return math.ceil(self.dtype.itemsize * self.size)
def display(self, *, page: bool | None = None) -> None:
rich = _display.require_rich()
if rich is None:
status_manager = contextlib.nullcontext()
else:
import rich.status # type: ignore[import-not-found, no-redef] # pylint: disable=import-outside-toplevel
status_manager = rich.status.Status(f"Computing tensor stats for {self!r}")
from onnxscript._thirdparty import ( # pylint: disable=import-outside-toplevel
asciichartpy,
)
with status_manager:
# Construct the text to display
lines = []
array = self.numpy().flatten()
lines.append(repr(self))
lines.append("")
nan_values = np.isnan(array)
nan_count = np.count_nonzero(nan_values)
inf_count = np.count_nonzero(np.isinf(array))
numbers = array[~nan_values]
lines.append(
f"Min: {np.min(numbers)}, Max: {np.max(numbers)}, "
f"NaN count: {nan_count}, "
f"Inf count: {inf_count}"
)
# Compute sparsity
sparse_threathold = 1e-6
# NOTE: count_nonzero() is faster than sum() for boolean arrays
sparsity = np.count_nonzero(np.abs(array) < sparse_threathold) / array.size
lines.append(f"Sparsity (abs<{sparse_threathold}): {sparsity:.2f}")
# Compute histogram
finite_numbers = array[np.isfinite(array)]
lines.append("Histogram:")
hist, bin_edges = np.histogram(finite_numbers, bins=80, density=False)
lines.append(
asciichartpy.plot(
hist, bin_edges=bin_edges, cfg={"height": 8, "format": "{:8.0f}"}
)
)
text = "\n".join(lines)
if rich is None:
print(text)
elif page:
import rich.console # type: ignore[import-not-found, no-redef] # pylint: disable=import-outside-toplevel
console = rich.console.Console()
with console.pager(styles=True):
console.print(text)
else:
rich.print(text)
def _check_numpy_representation_type(array: np.ndarray, dtype: _enums.DataType) -> None:
"""Check if the numpy array dtype matches the IR data type.
When the dtype is not one of the numpy native dtypes, the value needs need to be:
- ``int8`` or ``uint8`` for int4, with the sign bit extended to 8 bits.
- ``uint8`` for uint4.
- ``uint8`` for 8-bit data types.
- ``uint16`` for bfloat16
"""
if dtype in _NON_NUMPY_NATIVE_TYPES:
if dtype.itemsize == 2 and array.dtype != np.uint16:
# TODO(justinchuby): Support the storage dtypes like uint16 for bfloat16.
raise TypeError(
f"The numpy array dtype must be uint16 (not {array.dtype}) for IR data type {dtype}."
)
if dtype.itemsize == 1 and array.dtype != np.uint8:
raise TypeError(
f"The numpy array dtype must be uint8 (not {array.dtype}) for IR data type {dtype}."
)
if dtype == _enums.DataType.INT4:
if array.dtype not in (np.int8, np.uint8):
raise TypeError(
f"The numpy array dtype must be int8 or uint8 (not {array.dtype}) for IR data type {dtype}."
)
if dtype == _enums.DataType.UINT4:
if array.dtype != np.uint8:
raise TypeError(
f"The numpy array dtype must be uint8 (not {array.dtype}) for IR data type {dtype}."
)
return
try:
dtype_numpy = _enums.DataType.from_numpy(array.dtype)
except TypeError as e:
raise TypeError(
"Failed to convert the numpy dtype to an IR data type. "
"If you are using a non-native dtype, be sure to specify the corresponding IR dtype when "
"creating a Tensor."
) from e
if dtype_numpy != dtype:
raise TypeError(
f"The numpy array dtype {array.dtype} does not match the IR data type {dtype}."
)
class Tensor(TensorBase, _protocols.TensorProtocol, Generic[TArrayCompatible]):
"""An immutable concrete tensor.
This class is a wrapper around the raw tensor data. The raw tensor data can be a numpy array
compatible object (e.g. ``np.ndarray``, ``torch.Tensor``) or a ``DLPack`` compatible object.
The tensor is immutable and the data is not copied at initialization.
To create a tensor from a numpy array::
>>> import numpy as np
>>> array = np.array([1, 2, 3])
>>> tensor = Tensor(array)
>>> # The tensor itself can be treated as a numpy array because it implements the __array__ method
>>> np.allclose(tensor, array)
True
To get a numpy array from the tensor, call :meth:`numpy`. To convert the tensor
to a byte string for serialization, call :meth:`tobytes`.
It is recommended to check the size of the tensor first before accessing the
underlying data, because accessing the data may be expensive and incur IO
overhead.
Subclass this class to efficiently handle different types of tensors from different frameworks.
Attributes:
name: The name of the tensor.
shape: The shape of the tensor.
dtype: The data type of the elements of the tensor. It is an :class:`ir.DataType` enum.
doc_string: Documentation string.
raw: The raw data behind this tensor. It can be anything.
size: The number of elements in the tensor.
nbytes: The number of bytes in the tensor.
metadata_props: Metadata that will be serialized to the ONNX file.
meta: Metadata store for graph transform passes.
"""
__slots__ = (
"_raw",
"_dtype",
"_shape",
"name",
"doc_string",
"_metadata_props",
"_metadata",
)
def __init__(
self,
value: TArrayCompatible,
dtype: _enums.DataType | None = None,
*,
shape: Shape | None = None,
name: str = "",
doc_string: str | None = None,
metadata_props: dict[str, str] | None = None,
) -> None:
"""Initialize a tensor.
Args:
value: The backing data of the tensor. It can be a numpy array compatible object or a DLPack compatible object.
When the dtype is not one of the numpy native dtypes, the value needs
to be ``uint8`` for 4-bit and 8-bit data types, and ``uint16`` for bfloat16
when the value is a numpy array; :param:`dtype` must be specified in this case.
dtype: The data type of the tensor. It can be None only when value is a numpy array.
Users are responsible for making sure the dtype matches the value when value is not a numpy array.
shape: The shape of the tensor. If None, the shape is obtained from the value.
name: The name of the tensor.
doc_string: The documentation string.
metadata_props: The metadata properties.
Raises:
TypeError: If the value is not a numpy array compatible or a DLPack compatible object.
TypeError: If the value is a numpy array and the dtype is specified but does not match the dtype of the array.
ValueError: If the shape is not specified and the value does not have a shape attribute.
ValueError: If the dtype is not specified and the value is not a numpy array.
"""
# NOTE: We should not do any copying here for performance reasons
if not _compatible_with_numpy(value) and not _compatible_with_dlpack(value):
raise TypeError(f"Expected an array compatible object, got {type(value)}")
if shape is None:
# Obtain the shape from the value
if not hasattr(value, "shape"):
raise ValueError(
f"Expected an object with a shape attribute, but {type(value)} does not have shape. "
"Please specify the shape explicitly."
)
self._shape = Shape(getattr(value, "shape"), frozen=True) # noqa: B009
else:
self._shape = shape
self._shape._frozen = True
if dtype is None:
if isinstance(value, np.ndarray):
self._dtype = _enums.DataType.from_numpy(value.dtype)
else:
raise ValueError(
"The dtype must be specified when the value is not a numpy array."
)
else:
if isinstance(value, np.ndarray):
# Make sure the dtype matches the value
_check_numpy_representation_type(value, dtype)
# Users are responsible for making sure the dtype matches the value
# when value is not a numpy array
self._dtype = dtype
self._raw = value
self.name = name
self.doc_string = doc_string
self._metadata: _metadata.MetadataStore | None = None
self._metadata_props = metadata_props
def __array__(self, dtype: Any = None) -> np.ndarray:
if isinstance(self._raw, np.ndarray) or _compatible_with_numpy(self._raw):
return self._raw.__array__(dtype)
assert _compatible_with_dlpack(
self._raw
), f"Bug: Expected DLPack or Numpy compatible objects, got {type(self._raw)}"
return np.from_dlpack(self._raw)
def __dlpack__(self, *, stream: Any = None) -> Any:
if _compatible_with_dlpack(self._raw):
return self._raw.__dlpack__(stream=stream)
return self.__array__().__dlpack__(stream=stream)
def __dlpack_device__(self) -> tuple[int, int]:
if _compatible_with_dlpack(self._raw):
return self._raw.__dlpack_device__()
return self.__array__().__dlpack_device__()
def __repr__(self) -> str:
return f"{self._repr_base()}({self._raw!r}, name={self.name!r})"
@property
def dtype(self) -> _enums.DataType:
"""The data type of the tensor. Immutable."""
return self._dtype
@property
def shape(self) -> Shape:
"""The shape of the tensor. Immutable."""
return self._shape
@property
def raw(self) -> TArrayCompatible:
"""Backing data of the tensor. Immutable."""
return self._raw # type: ignore[return-value]
def numpy(self) -> np.ndarray:
"""Return the tensor as a numpy array.
When the data type is not supported by numpy, the value is the bit representation
of the dtype:
- ``int8`` for int4, with the sign bit extended to 8 bits.
- ``uint8`` for uint4.
- ``uint8`` for 8-bit data types like float8.
- ``uint16`` for bfloat16.
"""
if isinstance(self._raw, np.ndarray):
return self._raw
# We do not cache the value to save memory
return self.__array__()
def tobytes(self) -> bytes:
"""Returns the value as bytes encoded in little endian.
Override this method for more efficient serialization when the raw
value is not a numpy array.
"""
# TODO(justinchuby): Support DLPack
array = self.numpy()
if self.dtype in {_enums.DataType.INT4, _enums.DataType.UINT4}:
# Pack the array into int4
array = _type_casting.pack_int4(array)
else:
assert self.dtype.itemsize == array.itemsize, "Bug: The itemsize should match"
if not _IS_LITTLE_ENDIAN:
array = array.view(array.dtype.newbyteorder("<"))
return array.tobytes()
@property
def metadata_props(self) -> dict[str, str]:
if self._metadata_props is None:
self._metadata_props = {}
return self._metadata_props
@property
def meta(self) -> _metadata.MetadataStore:
"""The metadata store for intermediate analysis.
Write to the :attribute:`metadata_props` if you would like the metadata to be serialized
to the ONNX proto.
"""
if self._metadata is None:
self._metadata = _metadata.MetadataStore()
return self._metadata
class ExternalTensor(TensorBase, _protocols.TensorProtocol):
"""An immutable concrete tensor with its data store on disk.
This class uses memory mapping to avoid loading the tensor into memory,
when the data type is supported by numpy. Otherwise, the tensor is loaded
into memory lazily when accessed.
Calling :attr:`shape` does not incur IO. Checking shape before loading
the tensor is recommended if IO overhead and memory usage is a concern.
To obtain an array, call :meth:`numpy`. To obtain the bytes,
call :meth:`tobytes`.
The :attribute:`path` can be a relative path or an absolute path.
Serializers should handle the path correctly to conform with the ONNX spec.
Attributes:
path: The path to the data file. This can be a relative path or an absolute path.
offset: The offset in bytes from the start of the file.
length: The length of the data in bytes.
dtype: The data type of the tensor.
shape: The shape of the tensor.
name: The name of the tensor. It must be specified.
doc_string: The documentation string.
metadata_props: The metadata properties.
"""
__slots__ = (
"_path",
"_offset",
"_length",
"_dtype",
"_shape",
"name",
"doc_string",
"_array",
"raw",
"_metadata_props",
"_metadata",
)
def __init__(
self,
path: os.PathLike | str,
offset: int | None,
length: int | None,
dtype: _enums.DataType,
*,
shape: Shape,
name: str,
doc_string: str | None = None,
metadata_props: dict[str, str] | None = None,
) -> None:
self._path = path
self._offset: int | None = offset
self._length: int | None = length
self._dtype: _enums.DataType = dtype
self.name: str = name # mutable
self._shape: Shape = shape
self._shape._frozen = True
self.doc_string: str | None = doc_string # mutable
self._array: np.ndarray | None = None
self.raw: mmap.mmap | None = None
self._metadata_props = metadata_props
self._metadata: _metadata.MetadataStore | None = None
@property
def path(self) -> str | os.PathLike:
# Immutable
return self._path
@property
def offset(self) -> int | None:
# Immutable
return self._offset
@property
def length(self) -> int | None:
# Immutable
return self._length
@property
def dtype(self) -> _enums.DataType:
# Immutable
return self._dtype
@property
def shape(self) -> Shape:
# Immutable
return self._shape
def _load(self):
assert self._array is None, "Bug: The array should be loaded only once."
# Map the whole file into the memory
# TODO(justinchuby): Verify if this would exhaust the memory address space
with open(self._path, "rb") as f:
self.raw = mmap.mmap(
f.fileno(),
0,
access=mmap.ACCESS_READ,
)
# Handle the byte order correctly by always using little endian
dt = np.dtype(self.dtype.numpy()).newbyteorder("<")
self._array = np.frombuffer(
self.raw, dtype=dt, offset=self.offset or 0, count=self.size
).reshape(self.shape.numpy())
def __array__(self, dtype: Any = None) -> np.ndarray:
if self._array is None:
self._load()
assert self._array is not None
return self._array.__array__(dtype)
def __dlpack__(self, *, stream: Any = None) -> Any:
return self.numpy().__dlpack__(stream=stream)
def __repr__(self) -> str:
return f"{self._repr_base()}(path='{self._path}', name={self.name!r}, offset={self._offset!r}), length={self._length!r})"
def numpy(self) -> np.ndarray:
"""Return the tensor as a numpy array.
The data will be memory mapped into memory and will not taken up physical memory space.
"""
if self._array is None:
self._load()
assert self._array is not None
return self._array
def tobytes(self) -> bytes:
"""Return the bytes of the tensor.
This will load the tensor into memory.
"""
if self.raw is None:
self._load()
assert self.raw is not None
offset = self._offset or 0
length = self._length or self.nbytes
return self.raw[offset : offset + length]
@property
def metadata_props(self) -> dict[str, str]:
if self._metadata_props is None:
self._metadata_props = {}
return self._metadata_props
@property
def meta(self) -> _metadata.MetadataStore:
"""The metadata store for intermediate analysis.
Write to the :attribute:`metadata_props` if you would like the metadata to be serialized
to the ONNX proto.
"""
if self._metadata is None:
self._metadata = _metadata.MetadataStore()
return self._metadata
class StringTensor(TensorBase, _protocols.TensorProtocol):
"""Multidimensional array of strings (as binary data to match the string_data field in TensorProto)."""
__slots__ = (
"_raw",
"_shape",
"name",
"doc_string",
"_metadata_props",
"_metadata",
)
def __init__(
self,
value: Sequence[bytes] | npt.NDArray[np.bytes_],
*,
shape: Shape | None = None,
name: str = "",
doc_string: str | None = None,
metadata_props: dict[str, str] | None = None,
) -> None:
"""Initialize a tensor.
Args:
value: The backing data of the tensor. It can be a numpy array or a Sequence of bytes.
shape: The shape of the tensor. If None, the shape is obtained from the value.
name: The name of the tensor.
doc_string: The documentation string.
metadata_props: The metadata properties.
"""
if shape is None:
if not hasattr(value, "shape"):
raise ValueError(
f"Expected an object with a shape attribute, but {type(value)} does not have shape. "
"Please specify the shape explicitly."
)
self._shape = Shape(getattr(value, "shape"), frozen=True) # noqa: B009
else:
self._shape = shape
self._shape._frozen = True
self._raw = value
self.name = name
self.doc_string = doc_string
self._metadata: _metadata.MetadataStore | None = None
self._metadata_props = metadata_props
def __array__(self, dtype: Any = None) -> np.ndarray:
if isinstance(self._raw, np.ndarray):
return self._raw
assert isinstance(
self._raw, Sequence
), f"Bug: Expected a sequence, got {type(self._raw)}"
return np.array(self._raw, dtype=dtype).reshape(self.shape.numpy())
def __dlpack__(self, *, stream: Any = None) -> Any:
del stream # unused
raise TypeError("StringTensor does not support DLPack")
def __dlpack_device__(self) -> tuple[int, int]:
raise TypeError("StringTensor does not support DLPack")
def __repr__(self) -> str:
return f"{self._repr_base()}({self._raw!r}, name={self.name!r})"
@property
def dtype(self) -> _enums.DataType:
"""The data type of the tensor. Immutable."""
return _enums.DataType.STRING
@property
def shape(self) -> Shape:
"""The shape of the tensor. Immutable."""
return self._shape
@property
def raw(self) -> Sequence[bytes] | npt.NDArray[np.bytes_]:
"""Backing data of the tensor. Immutable."""
return self._raw # type: ignore[return-value]
def numpy(self) -> npt.NDArray[np.bytes_]:
"""Return the tensor as a numpy array."""
return self.__array__()
def tobytes(self) -> bytes:
raise ValueError("StringTensor does not support tobytes. Use 'string_data' instead.")
def string_data(self) -> Sequence[bytes]:
"""Return the string data of the tensor."""
if isinstance(self._raw, np.ndarray):
return self._raw.flatten().tolist()
return self._raw
@property
def metadata_props(self) -> dict[str, str]:
if self._metadata_props is None:
self._metadata_props = {}
return self._metadata_props
@property
def meta(self) -> _metadata.MetadataStore:
"""The metadata store for intermediate analysis.
Write to the :attribute:`metadata_props` if you would like the metadata to be serialized
to the ONNX proto.
"""
if self._metadata is None:
self._metadata = _metadata.MetadataStore()
return self._metadata
class SymbolicDim(_protocols.SymbolicDimProtocol, _display.PrettyPrintable):
__slots__ = ("_value",)
def __init__(self, value: str | None) -> None:
"""Initialize a symbolic dimension.
Args:
value: The value of the dimension. It should not be an int.
"""
if isinstance(value, int):
raise TypeError("The value of a SymbolicDim cannot be an int")
self._value = value
def __eq__(self, other: object) -> bool:
if not isinstance(other, SymbolicDim):
return self.value == other
return self.value == other.value
def __hash__(self) -> int:
return hash(self.value)
@property
def value(self) -> str | None:
return self._value
def __str__(self) -> str:
return f"{self._value}"
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._value})"
class Shape(_protocols.ShapeProtocol, _display.PrettyPrintable):
__slots__ = ("_dims", "_frozen")
def __init__(
self,
dims: Iterable[int | SymbolicDim | str | None],
/,
denotations: Iterable[str | None] | None = None,
frozen: bool = False,
) -> None:
"""Initialize a shape.
Args:
dims: The dimensions of the shape. Each dimension can be an integer or a
SymbolicDim or any Python object. When a ``dim`` is not an integer or a
SymbolicDim, it is converted to a SymbolicDim.
denotations: The denotations of the dimensions. If None, the denotations are not set.
Standard denotation can optionally be used to denote tensor
dimensions with standard semantic descriptions to ensure
that operations are applied to the correct axis of a tensor.
Refer to https://github.com/onnx/onnx/blob/main/docs/DimensionDenotation.md#denotation-definition
for pre-defined dimension denotations.
frozen: If True, the shape is immutable and cannot be modified. This
is useful when the shape is initialized by a Tensor.
"""
self._dims: list[int | SymbolicDim] = [
SymbolicDim(dim) if not isinstance(dim, (int, SymbolicDim)) else dim
for dim in dims
]
self._denotations: list[str | None] = (
list(denotations) if denotations is not None else [None] * len(self._dims)
)
if len(self._denotations) != len(self._dims):
raise ValueError(
"The number of denotations, when provided, must be equal to the number of dimensions."
)
self._frozen: bool = frozen
@property
def dims(self) -> tuple[int | SymbolicDim, ...]:
"""All dimensions in the shape.
This property is read-only. Use __getitem__ and __setitem__ to modify the shape or create a new shape.
"""
return tuple(self._dims)
def rank(self) -> int:
"""The rank of the shape."""
return len(self._dims)
def numpy(self) -> tuple[int, ...]:
if any(not isinstance(dim, int) for dim in self._dims):
raise ValueError(f"Cannot convert the shape {self} to a tuple of ints")
return tuple(dim for dim in self._dims) # type: ignore
def __len__(self) -> int:
return len(self._dims)
def __iter__(self) -> Iterator[int | SymbolicDim]:
return iter(self._dims)
@typing.overload
def __getitem__(self, index: int) -> int | SymbolicDim: ...
@typing.overload
def __getitem__(self, index: slice) -> tuple[int | SymbolicDim, ...]: ...
def __getitem__(self, index):
return tuple(self._dims)[index]
def __setitem__(self, index: int, value: int | SymbolicDim | str | None) -> None:
"""Set the dimension at the index.
Args:
index: The index of the dimension.
value: The value of the dimension.
Raises:
TypeError: If the shape is frozen and cannot be modified.
TypeError: If the value is not an int or SymbolicDim.
"""
if self._frozen:
raise TypeError("The shape is frozen and cannot be modified.")
if isinstance(value, str) or value is None:
value = SymbolicDim(value)
if not isinstance(value, (int, SymbolicDim)):
raise TypeError(f"Expected int, str, None or SymbolicDim, got '{type(value)}'")
self._dims[index] = value
def get_denotation(self, index: int) -> str | None:
"""Return the denotation of the dimension at the index.
Args:
index: The index of the dimension.
Returns:
The denotation of the dimension.
"""
return self._denotations[index]
def set_denotation(self, index: int, denotation: str | None) -> None:
"""Set the denotation of the dimension at the index.
Args:
index: The index of the dimension.
denotation: The denotation of the dimension.
"""
self._denotations[index] = denotation
def __repr__(self) -> str:
return f"{self.__class__.__name__}({self._dims!r})"
def __str__(self) -> str:
"""Return a string representation of the shape.
E.g. [n,1,3]
"""
return f"[{','.join([str(dim) for dim in self._dims])}]"
def __eq__(self, other: object) -> bool:
"""Return True if the shapes are equal.
Two shapes are eqaul if all their dimensions are equal.
"""
if isinstance(other, Shape):
return self._dims == other._dims
if not isinstance(other, Iterable):
return False
return self._dims == list(other)
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
def _quoted(string: str) -> str:
"""Return a quoted string.
This function is used to quote value/node names in the IR for better readability.
"""
return f'"{string}"'
class Node(_protocols.NodeProtocol, _display.PrettyPrintable):
"""IR Node.
If the ``graph`` is provided, the node will be added to the graph. Otherwise,
user is responsible to call ``graph.append(node)`` (or other mutation methods
in :class:`Graph`) to add the node to the graph.
After the node is initialized, it will add itself as a user of the input values.
The output values of the node are created during node initialization and are immutable.
To change the output values, create a new node and replace the each of the inputs of ``output.uses()`` with
the new output values by calling :meth:`replace_input_with` on the using nodes
of this node's outputs.
"""
__slots__ = (
"_name",
"_domain",
"_op_type",
"_inputs",
"_outputs",
"_attributes",
"_overload",
"_version",
"doc_string",
"_metadata",
"_metadata_props",
"_graph",
)
def __init__(
self,
domain: str,
op_type: str,
inputs: Iterable[Value | None],
attributes: Iterable[Attr | RefAttr] = (),
*,
overload: str = "",
num_outputs: int = 1,
version: int | None = None,
graph: Graph | None = None,
name: str | None = None,
doc_string: str | None = None,
metadata_props: dict[str, str] | None = None,
):
"""Initialize a node and add it as a user of the input values.
Args:
domain: The domain of the operator. For onnx operators, this is an empty string.
op_type: The name of the operator.
inputs: The input values. When an input is None, it is an empty input.
attributes: The attributes. RefAttr can be used only when the node is defined in a Function.
overload: The overload name when the node is invoking a function.
num_outputs: The number of outputs of the node.
version: The version of the operator. If None, the version is unspecified and will follow that of the graph.
graph: The graph that the node belongs to. If None, the node is not added to any graph.
A `Node` must belong to zero or one graph.
name: The name of the node. If None, the node is anonymous.
doc_string: The documentation string.
metadata_props: The metadata properties.
"""
self._name = name
self._domain: str = domain
self._op_type: str = op_type
# NOTE: Make inputs immutable with the assumption that they are not mutated
# very often. This way all mutations can be tracked.
# If necessary, we can cache the inputs and outputs as tuples.
self._inputs: tuple[Value | None, ...] = tuple(inputs)
# Values belong to their defining nodes. The values list is immutable
self._outputs: tuple[Value, ...] = tuple(
Value(self, index=i) for i in range(num_outputs)
)
attributes = tuple(attributes)
if attributes and not isinstance(attributes[0], (Attr, RefAttr)):
raise TypeError(
f"Expected the attributes to be Attr or RefAttr, got {type(attributes[0])}. "
"If you are copying the attributes from another node, make sure you call "
"node.attributes.values() because it is a dictionary."
)
self._attributes: OrderedDict[str, Attr | RefAttr] = OrderedDict(
(attr.name, attr) for attr in attributes
)
self._overload: str = overload
# TODO(justinchuby): Potentially support a version range
self._version: int | None = version
self._metadata: _metadata.MetadataStore | None = None
self._metadata_props: dict[str, str] | None = metadata_props
self._graph: Graph | None = graph
self.doc_string = doc_string
# Add the node as a use of the inputs
for i, input_value in enumerate(self._inputs):
if input_value is not None:
input_value._add_usage(self, i) # pylint: disable=protected-access
# Add the node to the graph if graph is specified
if self._graph is not None:
self._graph.append(self)
def __str__(self) -> str:
node_type_text = f"{self._domain}::{self._op_type}" + f":{self._overload}" * (
self._overload != ""
)
inputs_text = (
"("
+ ", ".join(
[
(
f"%{_quoted(x.name) if x.name else 'anonymous:' + str(id(x))}"
if x is not None
else "None"
)
for x in self._inputs
]
)
+ ")"
)
attributes_text = (
(" {" + ", ".join([f"{k}={v}" for k, v in self._attributes.items()]) + "}")
if self._attributes
else ""
)
outputs_text = ", ".join(str(x) for x in self._outputs)
return f"{outputs_text} ⬅️ {node_type_text}{inputs_text}{attributes_text}"
def __repr__(self) -> str:
return (
f"{self.__class__.__name__}(name={self._name!r}, domain={self._domain!r}, "
f"op_type={self._op_type!r}, inputs={self._inputs!r}, attributes={self._attributes!r}, "
f"overload={self._overload!r}, outputs={self._outputs!r}, "
f"version={self._version!r}, doc_string={self.doc_string!r})"
)
@property