/
filewriter.py
1223 lines (1017 loc) · 43.4 KB
/
filewriter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2008-2021 pydicom authors. See LICENSE file for details.
"""Functions related to writing DICOM data."""
from struct import pack
import sys
from typing import (
Union, BinaryIO, Any, cast, Sequence, MutableSequence, Iterable, Optional,
List
)
import warnings
import zlib
from pydicom import config
from pydicom.charset import default_encoding, convert_encodings, encode_string
from pydicom.config import have_numpy
from pydicom.dataelem import DataElement_from_raw, DataElement, RawDataElement
from pydicom.dataset import Dataset, validate_file_meta, FileMetaDataset
from pydicom.filebase import DicomFile, DicomFileLike, DicomBytesIO, DicomIO
from pydicom.fileutil import path_from_pathlike, PathType
from pydicom.multival import MultiValue
from pydicom.tag import (Tag, ItemTag, ItemDelimiterTag, SequenceDelimiterTag,
tag_in_exception)
from pydicom.uid import DeflatedExplicitVRLittleEndian, UID
from pydicom.valuerep import (
PersonName, IS, DSclass, DA, DT, TM, EXPLICIT_VR_LENGTH_32, VR,
AMBIGUOUS_VR, CUSTOMIZABLE_CHARSET_VR, validate_value
)
from pydicom.values import convert_numbers
if have_numpy:
import numpy
# (0018,9810) Zero Velocity Pixel Value
# (0022,1452) Mapped Pixel Value
# (0028,0104)/(0028,0105) Smallest/Largest Valid Pixel Value
# (0028,0106)/(0028,0107) Smallest/Largest Image Pixel Value
# (0028,0108)/(0028,0109) Smallest/Largest Pixel Value in Series
# (0028,0110)/(0028,0111) Smallest/Largest Image Pixel Value in Plane
# (0028,0120) Pixel Padding Value
# (0028,0121) Pixel Padding Range Limit
# (0028,1101-1103) Red/Green/Blue Palette Color Lookup Table Descriptor
# (0028,3002) LUT Descriptor
# (0040,9216)/(0040,9211) Real World Value First/Last Value Mapped
# (0060,3004)/(0060,3006) Histogram First/Last Bin Value
_us_ss_tags = {
0x00189810, 0x00221452, 0x00280104, 0x00280105, 0x00280106,
0x00280107, 0x00280108, 0x00280109, 0x00280110, 0x00280111,
0x00280120, 0x00280121, 0x00281101, 0x00281102, 0x00281103,
0x00283002, 0x00409211, 0x00409216, 0x00603004, 0x00603006,
}
# (5400,0110) Channel Minimum Value
# (5400,0112) Channel Maximum Value
# (5400,100A) Waveform Padding Data
# (5400,1010) Waveform Data
_ob_ow_tags = {0x54000110, 0x54000112, 0x5400100A, 0x54001010}
# (60xx,3000) Overlay Data
_overlay_data_tags = {x << 16 | 0x3000 for x in range(0x6000, 0x601F, 2)}
def _correct_ambiguous_vr_element(
elem: DataElement, ds: Dataset, is_little_endian: bool
) -> DataElement:
"""Implementation for `correct_ambiguous_vr_element`.
See `correct_ambiguous_vr_element` for description.
"""
# 'OB or OW': 7fe0,0010 PixelData
if elem.tag == 0x7fe00010:
# Compressed Pixel Data
# PS3.5 Annex A.4
# If encapsulated, VR is OB and length is undefined
if elem.is_undefined_length:
elem.VR = VR.OB
elif ds.is_implicit_VR:
# Non-compressed Pixel Data - Implicit Little Endian
# PS3.5 Annex A1: VR is always OW
elem.VR = VR.OW
else:
# Non-compressed Pixel Data - Explicit VR
# PS3.5 Annex A.2:
# If BitsAllocated is > 8 then VR shall be OW,
# else may be OB or OW.
# If we get here, the data has not been written before
# or has been converted from Implicit Little Endian,
# so we default to OB for BitsAllocated 1 or 8
elem.VR = VR.OW if cast(int, ds.BitsAllocated) > 8 else VR.OB
# 'US or SS' and dependent on PixelRepresentation
elif elem.tag in _us_ss_tags:
# US if PixelRepresentation value is 0x0000, else SS
# For references, see the list at
# https://github.com/pydicom/pydicom/pull/298
# PixelRepresentation is usually set in the root dataset
while (
'PixelRepresentation' not in ds
and ds.parent_seq is not None
and ds.parent_seq().parent_dataset() # type: ignore
):
# Make weakrefs into strong refs (locally here) by calling () them
ds = ds.parent_seq().parent_dataset() # type: ignore
# if no pixel data is present, none if these tags is used,
# so we can just ignore a missing PixelRepresentation in this case
if (
'PixelRepresentation' not in ds
and 'PixelData' not in ds
or ds.PixelRepresentation == 0
):
elem.VR = VR.US
byte_type = 'H'
else:
elem.VR = VR.SS
byte_type = 'h'
if elem.VM == 0:
return elem
# Need to handle type check for elements with VM > 1
elem_value = (
elem.value if elem.VM == 1 else cast(Sequence[Any], elem.value)[0]
)
if not isinstance(elem_value, int):
elem.value = convert_numbers(
cast(bytes, elem.value), is_little_endian, byte_type
)
# 'OB or OW' and dependent on WaveformBitsAllocated
elif elem.tag in _ob_ow_tags:
# If WaveformBitsAllocated is > 8 then OW, otherwise may be
# OB or OW.
# See PS3.3 C.10.9.1.
if ds.is_implicit_VR:
elem.VR = VR.OW
else:
elem.VR = (
VR.OW if cast(int, ds.WaveformBitsAllocated) > 8 else VR.OB
)
# 'US or OW': 0028,3006 LUTData
elif elem.tag == 0x00283006:
# First value in LUT Descriptor is how many values in
# LUTData, if there's only one value then must be US
# As per PS3.3 C.11.1.1.1
if cast(Sequence[int], ds.LUTDescriptor)[0] == 1:
elem.VR = VR.US
if elem.VM == 0:
return elem
elem_value = (
elem.value if elem.VM == 1
else cast(Sequence[Any], elem.value)[0]
)
if not isinstance(elem_value, int):
elem.value = convert_numbers(
cast(bytes, elem.value), is_little_endian, 'H'
)
else:
elem.VR = VR.OW
# 'OB or OW': 60xx,3000 OverlayData and dependent on Transfer Syntax
elif elem.tag in _overlay_data_tags:
# Implicit VR must be OW, explicit VR may be OB or OW
# as per PS3.5 Section 8.1.2 and Annex A
elem.VR = VR.OW
return elem
def correct_ambiguous_vr_element(
elem: DataElement, ds: Dataset, is_little_endian: bool
) -> DataElement:
"""Attempt to correct the ambiguous VR element `elem`.
When it's not possible to correct the VR, the element will be returned
unchanged. Currently the only ambiguous VR elements not corrected for are
all retired or part of DICONDE.
If the VR is corrected and is 'US' or 'SS' then the value will be updated
using the :func:`~pydicom.values.convert_numbers` function.
Parameters
----------
elem : dataelem.DataElement
The element with an ambiguous VR.
ds : dataset.Dataset
The dataset containing `elem`.
is_little_endian : bool
The byte ordering of the values in the dataset.
Returns
-------
dataelem.DataElement
The corrected element
"""
if elem.VR in AMBIGUOUS_VR:
# convert raw data elements before handling them
if isinstance(elem, RawDataElement):
elem = DataElement_from_raw(elem, dataset=ds)
ds.__setitem__(elem.tag, elem)
try:
_correct_ambiguous_vr_element(elem, ds, is_little_endian)
except AttributeError as e:
raise AttributeError(
f"Failed to resolve ambiguous VR for tag {elem.tag}: " + str(e)
)
return elem
def correct_ambiguous_vr(ds: Dataset, is_little_endian: bool) -> Dataset:
"""Iterate through `ds` correcting ambiguous VR elements (if possible).
When it's not possible to correct the VR, the element will be returned
unchanged. Currently the only ambiguous VR elements not corrected for are
all retired or part of DICONDE.
If the VR is corrected and is 'US' or 'SS' then the value will be updated
using the :func:`~pydicom.values.convert_numbers` function.
Parameters
----------
ds : pydicom.dataset.Dataset
The dataset containing ambiguous VR elements.
is_little_endian : bool
The byte ordering of the values in the dataset.
Returns
-------
ds : dataset.Dataset
The corrected dataset
Raises
------
AttributeError
If a tag is missing in `ds` that is required to resolve the ambiguity.
"""
# Iterate through the elements
for elem in ds:
# raw data element sequences can be written as they are, because we
# have ensured that the transfer syntax has not changed at this point
if elem.VR == VR.SQ:
for item in cast(MutableSequence[Dataset], elem.value):
correct_ambiguous_vr(item, is_little_endian)
elif elem.VR in AMBIGUOUS_VR:
correct_ambiguous_vr_element(elem, ds, is_little_endian)
return ds
def write_numbers(fp: DicomIO, elem: DataElement, struct_format: str) -> None:
"""Write a "value" of type struct_format from the dicom file.
"Value" can be more than one number.
Parameters
----------
fp : file-like
The file-like to write the encoded data to.
elem : dataelem.DataElement
The element to encode.
struct_format : str
The character format as used by the struct module.
"""
endianChar = '><'[fp.is_little_endian]
value = elem.value
if value is None or value == "":
return # don't need to write anything for no or empty value
format_string = endianChar + struct_format
try:
try:
# works only if list, not if string or number
value.append
except AttributeError: # is a single value - the usual case
fp.write(pack(format_string, value))
else:
for val in cast(Iterable[Any], value):
fp.write(pack(format_string, val))
except Exception as e:
raise IOError(
f"{str(e)}\nfor data_element:\n{str(elem)}"
)
def write_OBvalue(fp: DicomIO, elem: DataElement) -> None:
"""Write a data_element with VR of 'other byte' (OB)."""
if len(elem.value) % 2:
# Pad odd length values
fp.write(cast(bytes, elem.value))
fp.write(b'\x00')
else:
fp.write(cast(bytes, elem.value))
def write_OWvalue(fp: DicomIO, elem: DataElement) -> None:
"""Write a data_element with VR of 'other word' (OW).
Note: This **does not currently do the byte swapping** for Endian state.
"""
# XXX for now just write the raw bytes without endian swapping
fp.write(cast(bytes, elem.value))
def write_UI(fp: DicomIO, elem: DataElement) -> None:
"""Write a data_element with VR of 'unique identifier' (UI)."""
write_string(fp, elem, '\0') # pad with 0-byte to even length
def _is_multi_value(val: Any) -> bool:
"""Return True if `val` is a multi-value container."""
if have_numpy and isinstance(val, numpy.ndarray):
return True
return isinstance(val, (MultiValue, list, tuple))
def multi_string(val: Union[str, Iterable[str]]) -> str:
"""Put a string together with delimiter if has more than one value"""
if _is_multi_value(val):
return "\\".join(val)
return cast(str, val)
def write_PN(
fp: DicomIO, elem: DataElement, encodings: Optional[List[str]] = None
) -> None:
if not encodings:
encodings = [default_encoding]
val: List[PersonName]
if elem.VM == 1:
val = [cast(PersonName, elem.value)]
else:
val = cast(List[PersonName], elem.value)
enc = b'\\'.join([elem.encode(encodings) for elem in val])
if len(enc) % 2 != 0:
enc += b' '
fp.write(enc)
def write_string(fp: DicomIO, elem: DataElement, padding: str = ' ') -> None:
"""Write a single or multivalued ASCII string."""
val = multi_string(cast(Union[str, Iterable[str]], elem.value))
if val is not None:
if len(val) % 2 != 0:
val += padding # pad to even length
if isinstance(val, str):
val = val.encode(default_encoding) # type: ignore[assignment]
fp.write(val) # type: ignore[arg-type]
def _encode_and_validate_string(vr: str, value: str,
encodings: Sequence[str]) -> bytes:
encoded = encode_string(value, encodings)
validate_value(vr, encoded, config.settings.writing_validation_mode)
return encoded
def write_text(
fp: DicomIO, elem: DataElement, encodings: Optional[List[str]] = None
) -> None:
"""Write a single or multivalued text string."""
encodings = encodings or [default_encoding]
val = elem.value
if val is not None:
if _is_multi_value(val):
val = cast(Union[Sequence[bytes], Sequence[str]], val)
if isinstance(val[0], str):
val = cast(Sequence[str], val)
val = b'\\'.join(
[_encode_and_validate_string(elem.VR, val, encodings)
for val in val]
)
else:
val = cast(Sequence[bytes], val)
val = b'\\'.join([val for val in val])
else:
val = cast(Union[bytes, str], val)
if isinstance(val, str):
val = _encode_and_validate_string(elem.VR, val, encodings)
if len(val) % 2 != 0:
val = val + b' ' # pad to even length
fp.write(val)
def write_number_string(fp: DicomIO, elem: DataElement) -> None:
"""Handle IS or DS VR - write a number stored as a string of digits."""
# If the DS or IS has an original_string attribute, use that, so that
# unchanged data elements are written with exact string as when read from
# file
val = elem.value
if _is_multi_value(val):
val = cast(Union[Sequence[IS], Sequence[DSclass]], val)
val = "\\".join(
(
x.original_string if hasattr(x, 'original_string')
else str(x) for x in val
)
)
else:
val = cast(Union[IS, DSclass], val)
if hasattr(val, 'original_string'):
val = val.original_string
else:
val = str(val)
if len(val) % 2 != 0:
val = val + ' ' # pad to even length
val = bytes(val, default_encoding)
fp.write(val)
def _format_DA(val: Optional[DA]) -> str:
if val is None:
return ''
if hasattr(val, 'original_string'):
return val.original_string
return val.strftime("%Y%m%d")
def write_DA(fp: DicomIO, elem: DataElement) -> None:
val = elem.value
if isinstance(val, str):
write_string(fp, elem)
else:
if _is_multi_value(val):
val = cast(Sequence[DA], val)
val = "\\".join(
(x if isinstance(x, str) else _format_DA(x) for x in val)
)
else:
val = _format_DA(cast(DA, val))
if len(val) % 2 != 0:
val = val + ' ' # pad to even length
if isinstance(val, str):
val = val.encode(default_encoding)
fp.write(val)
def _format_DT(val: Optional[DT]) -> str:
if val is None:
return ''
if hasattr(val, 'original_string'):
return val.original_string
if val.microsecond > 0:
return val.strftime("%Y%m%d%H%M%S.%f%z")
return val.strftime("%Y%m%d%H%M%S%z")
def write_DT(fp: DicomIO, elem: DataElement) -> None:
val = elem.value
if isinstance(val, str):
write_string(fp, elem)
else:
if _is_multi_value(val):
val = cast(Sequence[DT], val)
val = "\\".join(
(x if isinstance(x, str) else _format_DT(x) for x in val)
)
else:
val = _format_DT(cast(DT, val))
if len(val) % 2 != 0:
val = val + ' ' # pad to even length
if isinstance(val, str):
val = val.encode(default_encoding)
fp.write(val)
def _format_TM(val: Optional[TM]) -> str:
if val is None:
return ''
if hasattr(val, 'original_string'):
return val.original_string
if val.microsecond > 0:
return val.strftime("%H%M%S.%f")
return val.strftime("%H%M%S")
def write_TM(fp: DicomIO, elem: DataElement) -> None:
val = elem.value
if isinstance(val, str):
write_string(fp, elem)
else:
if _is_multi_value(val):
val = cast(Sequence[TM], val)
val = "\\".join(
(x if isinstance(x, str) else _format_TM(x) for x in val)
)
else:
val = _format_TM(cast(TM, val))
if len(val) % 2 != 0:
val = val + ' ' # pad to even length
if isinstance(val, str):
val = val.encode(default_encoding)
fp.write(val)
def write_data_element(
fp: DicomIO,
elem: Union[DataElement, RawDataElement],
encodings: Optional[Union[str, List[str]]] = None
) -> None:
"""Write the data_element to file fp according to
dicom media storage rules.
"""
# Write element's tag
fp.write_tag(elem.tag)
# write into a buffer to avoid seeking back which can be expansive
buffer = DicomBytesIO()
buffer.is_little_endian = fp.is_little_endian
buffer.is_implicit_VR = fp.is_implicit_VR
vr: Optional[str] = elem.VR
if not fp.is_implicit_VR and vr and len(vr) != 2:
msg = (
f"Cannot write ambiguous VR of '{vr}' for data element with "
f"tag {repr(elem.tag)}.\nSet the correct VR before "
f"writing, or use an implicit VR transfer syntax"
)
raise ValueError(msg)
if elem.is_raw:
elem = cast(RawDataElement, elem)
# raw data element values can be written as they are
buffer.write(cast(bytes, elem.value))
is_undefined_length = elem.length == 0xFFFFFFFF
else:
elem = cast(DataElement, elem)
if vr not in writers:
raise NotImplementedError(
f"write_data_element: unknown Value Representation '{vr}'"
)
encodings = encodings or [default_encoding]
encodings = convert_encodings(encodings)
fn, param = writers[cast(VR, vr)]
is_undefined_length = elem.is_undefined_length
if not elem.is_empty:
if vr in CUSTOMIZABLE_CHARSET_VR or vr == VR.SQ:
fn(buffer, elem, encodings=encodings) # type: ignore[operator]
else:
# Many numeric types use the same writer but with
# numeric format parameter
if param is not None:
fn(buffer, elem, param) # type: ignore[operator]
else:
fn(buffer, elem) # type: ignore[operator]
# valid pixel data with undefined length shall contain encapsulated
# data, e.g. sequence items - raise ValueError otherwise (see #238)
if is_undefined_length and elem.tag == 0x7fe00010:
encap_item = b'\xfe\xff\x00\xe0'
if not fp.is_little_endian:
# Non-conformant endianness
encap_item = b'\xff\xfe\xe0\x00'
if not cast(bytes, elem.value).startswith(encap_item):
raise ValueError(
"(7FE0,0010) Pixel Data has an undefined length indicating "
"that it's compressed, but the data isn't encapsulated as "
"required. See pydicom.encaps.encapsulate() for more "
"information"
)
value_length = buffer.tell()
if (
not fp.is_implicit_VR
and vr not in EXPLICIT_VR_LENGTH_32
and not is_undefined_length
and value_length > 0xffff
):
# see PS 3.5, section 6.2.2 for handling of this case
msg = (
f"The value for the data element {elem.tag} exceeds the "
f"size of 64 kByte and cannot be written in an explicit transfer "
f"syntax. The data element VR is changed from '{vr}' to 'UN' "
f"to allow saving the data."
)
warnings.warn(msg)
vr = VR.UN
# write the VR for explicit transfer syntax
if not fp.is_implicit_VR:
vr = cast(str, vr)
fp.write(bytes(vr, default_encoding))
if vr in EXPLICIT_VR_LENGTH_32:
fp.write_US(0) # reserved 2 bytes
if (
not fp.is_implicit_VR
and vr not in EXPLICIT_VR_LENGTH_32
and not is_undefined_length
):
fp.write_US(value_length) # Explicit VR length field is 2 bytes
else:
# write the proper length of the data_element in the length slot,
# unless is SQ with undefined length.
fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length)
fp.write(buffer.getvalue())
if is_undefined_length:
fp.write_tag(SequenceDelimiterTag)
fp.write_UL(0) # 4-byte 'length' of delimiter data item
def write_dataset(
fp: DicomIO,
dataset: Dataset,
parent_encoding: Union[str, List[str]] = default_encoding
) -> int:
"""Write a Dataset dictionary to the file. Return the total length written.
"""
_harmonize_properties(dataset, fp)
if None in (dataset.is_little_endian, dataset.is_implicit_VR):
name = dataset.__class__.__name__
raise AttributeError(
f"'{name}.is_little_endian' and '{name}.is_implicit_VR' must "
f"be set appropriately before saving"
)
if not dataset.is_original_encoding:
dataset = correct_ambiguous_vr(dataset, fp.is_little_endian)
dataset_encoding = cast(
Union[None, str, List[str]],
dataset.get('SpecificCharacterSet', parent_encoding)
)
fpStart = fp.tell()
# data_elements must be written in tag order
tags = sorted(dataset.keys())
for tag in tags:
# do not write retired Group Length (see PS3.5, 7.2)
if tag.element == 0 and tag.group > 6:
continue
with tag_in_exception(tag):
write_data_element(fp, dataset.get_item(tag), dataset_encoding)
return fp.tell() - fpStart
def _harmonize_properties(ds: Dataset, fp: DicomIO) -> None:
"""Make sure the properties in the dataset and the file pointer are
consistent, so the user can set both with the same effect.
Properties set on the destination file object always have preference.
"""
# ensure preference of fp over dataset
if hasattr(fp, 'is_little_endian'):
ds.is_little_endian = fp.is_little_endian
if hasattr(fp, 'is_implicit_VR'):
ds.is_implicit_VR = fp.is_implicit_VR
# write the properties back to have a consistent state
fp.is_implicit_VR = cast(bool, ds.is_implicit_VR)
fp.is_little_endian = cast(bool, ds.is_little_endian)
def write_sequence(
fp: DicomIO, elem: DataElement, encodings: List[str]
) -> None:
"""Write a sequence contained in `data_element` to the file-like `fp`.
Parameters
----------
fp : file-like
The file-like to write the encoded data to.
data_element : dataelem.DataElement
The sequence element to write to `fp`.
encodings : list of str
The character encodings to use on text values.
"""
# write_data_element has already written the VR='SQ' (if needed) and
# a placeholder for length"""
for ds in cast(Iterable[Dataset], elem.value):
write_sequence_item(fp, ds, encodings)
def write_sequence_item(
fp: DicomIO, dataset: Dataset, encodings: List[str]
) -> None:
"""Write a `dataset` in a sequence to the file-like `fp`.
This is similar to writing a data_element, but with a specific tag for
Sequence Item.
See DICOM Standard, Part 5, :dcm:`Section 7.5<sect_7.5.html>`.
Parameters
----------
fp : file-like
The file-like to write the encoded data to.
dataset : Dataset
The :class:`Dataset<pydicom.dataset.Dataset>` to write to `fp`.
encodings : list of str
The character encodings to use on text values.
"""
fp.write_tag(ItemTag) # marker for start of Sequence Item
length_location = fp.tell() # save location for later.
# will fill in real value later if not undefined length
fp.write_UL(0xffffffff)
write_dataset(fp, dataset, parent_encoding=encodings)
if getattr(dataset, "is_undefined_length_sequence_item", False):
fp.write_tag(ItemDelimiterTag)
fp.write_UL(0) # 4-bytes 'length' field for delimiter item
else: # we will be nice and set the lengths for the reader of this file
location = fp.tell()
fp.seek(length_location)
fp.write_UL(location - length_location - 4) # 4 is length of UL
fp.seek(location) # ready for next data_element
def write_UN(fp: DicomIO, elem: DataElement) -> None:
"""Write a byte string for an DataElement of value 'UN' (unknown)."""
fp.write(cast(bytes, elem.value))
def write_ATvalue(fp: DicomIO, elem: DataElement) -> None:
"""Write a data_element tag to a file."""
try:
iter(cast(Sequence[Any], elem.value)) # see if is multi-valued AT;
# Note will fail if Tag ever derived from true tuple rather than being
# a long
except TypeError:
# make sure is expressed as a Tag instance
tag = Tag(cast(int, elem.value))
fp.write_tag(tag)
else:
tags = [Tag(tag) for tag in cast(Sequence[int], elem.value)]
for tag in tags:
fp.write_tag(tag)
def write_file_meta_info(
fp: DicomIO, file_meta: FileMetaDataset, enforce_standard: bool = True
) -> None:
"""Write the File Meta Information elements in `file_meta` to `fp`.
If `enforce_standard` is ``True`` then the file-like `fp` should be
positioned past the 128 byte preamble + 4 byte prefix (which should
already have been written).
**DICOM File Meta Information Group Elements**
From the DICOM standard, Part 10,
:dcm:`Section 7.1<part10/chapter_7.html#sect_7.1>`, any DICOM file shall
contain a 128-byte preamble, a 4-byte DICOM prefix 'DICM' and (at a
minimum) the following Type 1 DICOM Elements (from
:dcm:`Table 7.1-1<part10/chapter_7.html#table_7.1-1>`):
* (0002,0000) *File Meta Information Group Length*, UL, 4
* (0002,0001) *File Meta Information Version*, OB, 2
* (0002,0002) *Media Storage SOP Class UID*, UI, N
* (0002,0003) *Media Storage SOP Instance UID*, UI, N
* (0002,0010) *Transfer Syntax UID*, UI, N
* (0002,0012) *Implementation Class UID*, UI, N
If `enforce_standard` is ``True`` then (0002,0000) will be added/updated,
(0002,0001) and (0002,0012) will be added if not already present and the
other required elements will be checked to see if they exist. If
`enforce_standard` is ``False`` then `file_meta` will be written as is
after minimal validation checking.
The following Type 3/1C Elements may also be present:
* (0002,0013) *Implementation Version Name*, SH, N
* (0002,0016) *Source Application Entity Title*, AE, N
* (0002,0017) *Sending Application Entity Title*, AE, N
* (0002,0018) *Receiving Application Entity Title*, AE, N
* (0002,0102) *Private Information*, OB, N
* (0002,0100) *Private Information Creator UID*, UI, N
If `enforce_standard` is ``True`` then (0002,0013) will be added/updated.
*Encoding*
The encoding of the *File Meta Information* shall be *Explicit VR Little
Endian*.
Parameters
----------
fp : file-like
The file-like to write the File Meta Information to.
file_meta : pydicom.dataset.Dataset
The File Meta Information elements.
enforce_standard : bool
If ``False``, then only the *File Meta Information* elements already in
`file_meta` will be written to `fp`. If ``True`` (default) then a DICOM
Standards conformant File Meta will be written to `fp`.
Raises
------
ValueError
If `enforce_standard` is ``True`` and any of the required *File Meta
Information* elements are missing from `file_meta`, with the
exception of (0002,0000), (0002,0001) and (0002,0012).
ValueError
If any non-Group 2 Elements are present in `file_meta`.
"""
validate_file_meta(file_meta, enforce_standard)
if enforce_standard and 'FileMetaInformationGroupLength' not in file_meta:
# Will be updated with the actual length later
file_meta.FileMetaInformationGroupLength = 0
# Write the File Meta Information Group elements
# first write into a buffer to avoid seeking back, that can be
# expansive and is not allowed if writing into a zip file
buffer = DicomBytesIO()
buffer.is_little_endian = True
buffer.is_implicit_VR = False
write_dataset(buffer, file_meta)
# If FileMetaInformationGroupLength is present it will be the first written
# element and we must update its value to the correct length.
if 'FileMetaInformationGroupLength' in file_meta:
# Update the FileMetaInformationGroupLength value, which is the number
# of bytes from the end of the FileMetaInformationGroupLength element
# to the end of all the File Meta Information elements.
# FileMetaInformationGroupLength has a VR of 'UL' and so has a value
# that is 4 bytes fixed. The total length of when encoded as
# Explicit VR must therefore be 12 bytes.
file_meta.FileMetaInformationGroupLength = buffer.tell() - 12
buffer.seek(0)
write_data_element(buffer, file_meta[0x00020000])
fp.write(buffer.getvalue())
def _write_dataset(
fp: DicomIO, dataset: Dataset, write_like_original: bool
) -> None:
"""Write the Data Set to a file-like. Assumes the file meta information,
if any, has been written.
"""
# if we want to write with the same endianness and VR handling as
# the read dataset we want to preserve raw data elements for
# performance reasons (which is done by get_item);
# otherwise we use the default converting item getter
if dataset.is_original_encoding:
get_item = Dataset.get_item
else:
get_item = Dataset.__getitem__ # type: ignore[assignment]
# WRITE DATASET
# The transfer syntax used to encode the dataset can't be changed
# within the dataset.
# Write any Command Set elements now as elements must be in tag order
# Mixing Command Set with other elements is non-conformant so we
# require `write_like_original` to be True
command_set = get_item(dataset, slice(0x00000000, 0x00010000))
if command_set and write_like_original:
fp.is_implicit_VR = True
fp.is_little_endian = True
write_dataset(fp, command_set)
# Set file VR and endianness. MUST BE AFTER writing META INFO (which
# requires Explicit VR Little Endian) and COMMAND SET (which requires
# Implicit VR Little Endian)
fp.is_implicit_VR = cast(bool, dataset.is_implicit_VR)
fp.is_little_endian = cast(bool, dataset.is_little_endian)
# Write non-Command Set elements now
write_dataset(fp, get_item(dataset, slice(0x00010000, None)))
def dcmwrite(
filename: Union[PathType, BinaryIO],
dataset: Dataset,
write_like_original: bool = True
) -> None:
"""Write `dataset` to the `filename` specified.
If `write_like_original` is ``True`` then the :class:`Dataset` will be
written as is (after minimal validation checking) and may or may not
contain all or parts of the *File Meta Information* (and hence may or
may not be conformant with the DICOM File Format).
If `write_like_original` is ``False``, `dataset` will be stored in the
:dcm:`DICOM File Format <part10/chapter_7.html>`. To do
so requires that the ``Dataset.file_meta`` attribute
exists and contains a :class:`Dataset` with the required (Type 1) *File
Meta Information Group* elements. The byte stream of the `dataset` will be
placed into the file after the DICOM *File Meta Information*.
**File Meta Information**
The *File Meta Information* consists of a 128-byte preamble, followed by
a 4 byte ``b'DICM'`` prefix, followed by the *File Meta Information Group*
elements.
**Preamble and Prefix**
The ``dataset.preamble`` attribute shall be 128-bytes long or ``None`` and
is available for use as defined by the Application Profile or specific
implementations. If the preamble is not used by an Application Profile or
specific implementation then all 128 bytes should be set to ``0x00``. The
actual preamble written depends on `write_like_original` and
``dataset.preamble`` (see the table below).
+------------------+------------------------------+
| | write_like_original |
+------------------+-------------+----------------+
| dataset.preamble | True | False |
+==================+=============+================+
| None | no preamble | 128 0x00 bytes |
+------------------+-------------+----------------+
| 128 bytes | dataset.preamble |
+------------------+------------------------------+
The prefix shall be the bytestring ``b'DICM'`` and will be written if and
only if the preamble is present.
**File Meta Information Group Elements**
The preamble and prefix are followed by a set of DICOM elements from the
(0002,eeee) group. Some of these elements are required (Type 1) while
others are optional (Type 3/1C). If `write_like_original` is ``True``
then the *File Meta Information Group* elements are all optional. See
:func:`~pydicom.filewriter.write_file_meta_info` for more information on
which elements are required.
The *File Meta Information Group* elements should be included within their
own :class:`~pydicom.dataset.Dataset` in the ``dataset.file_meta``
attribute.
If (0002,0010) *Transfer Syntax UID* is included then the user must ensure
its value is compatible with the values for the
``dataset.is_little_endian`` and ``dataset.is_implicit_VR`` attributes.
For example, if ``is_little_endian`` and ``is_implicit_VR`` are both
``True`` then the Transfer Syntax UID must be 1.2.840.10008.1.2 *Implicit
VR Little Endian*. See the DICOM Standard, Part 5,
:dcm:`Section 10<part05/chapter_10.html>` for more information on Transfer
Syntaxes.
*Encoding*
The preamble and prefix are encoding independent. The File Meta elements
are encoded as *Explicit VR Little Endian* as required by the DICOM
Standard.
**Dataset**
A DICOM Dataset representing a SOP Instance related to a DICOM Information
Object Definition. It is up to the user to ensure the `dataset` conforms
to the DICOM Standard.
*Encoding*
The `dataset` is encoded as specified by the ``dataset.is_little_endian``
and ``dataset.is_implicit_VR`` attributes. It's up to the user to ensure
these attributes are set correctly (as well as setting an appropriate
value for ``dataset.file_meta.TransferSyntaxUID`` if present).
Parameters
----------
filename : str or PathLike or file-like
Name of file or the file-like to write the new DICOM file to.
dataset : pydicom.dataset.FileDataset
Dataset holding the DICOM information; e.g. an object read with
:func:`~pydicom.filereader.dcmread`.
write_like_original : bool, optional
If ``True`` (default), preserves the following information from
the Dataset (and may result in a non-conformant file):
- preamble -- if the original file has no preamble then none will be
written.
- file_meta -- if the original file was missing any required *File
Meta Information Group* elements then they will not be added or
written.