/
charset.go
114 lines (107 loc) · 3.86 KB
/
charset.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package dicomio
import (
"fmt"
"github.com/suyashkumar/dicom/legacy/dicomlog"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/htmlindex"
)
// CodingSystem defines how a []byte is translated into a utf8 string.
type CodingSystem struct {
// VR="PN" is the only place where we potentially use all three
// decoders. For all other VR types, only Ideographic decoder is used.
// See P3.5, 6.2.
//
// P3.5 6.1 is supposed to define the coding systems in detail. But the
// spec text is insanely obtuse and I couldn't tell what its meaning
// after hours of trying. So I just copied what pydicom charset.py is
// doing.
Alphabetic *encoding.Decoder
Ideographic *encoding.Decoder
Phonetic *encoding.Decoder
}
// CodingSystemType defines the where the coding system is going to be
// used. This distinction is useful in Japanese, but of little use in other
// languages.
type CodingSystemType int
const (
// AlphabeticCodingSystem is for writing a name in (English) alphabets.
AlphabeticCodingSystem CodingSystemType = iota
// IdeographicCodingSystem is for writing the name in the native writing
// system (Kanji).
IdeographicCodingSystem
// PhoneticCodingSystem is for hirakana and/or katakana.
PhoneticCodingSystem
)
// Mapping of DICOM charset name to golang encoding/htmlindex name. "" means
// 7bit ascii.
var htmlEncodingNames = map[string]string{
"ISO 2022 IR 6": "iso-8859-1",
"ISO_IR 13": "shift_jis",
"ISO 2022 IR 13": "shift_jis",
"ISO_IR 100": "iso-8859-1",
"ISO 2022 IR 100": "iso-8859-1",
"ISO_IR 101": "iso-8859-2",
"ISO 2022 IR 101": "iso-8859-2",
"ISO_IR 109": "iso-8859-3",
"ISO 2022 IR 109": "iso-8859-3",
"ISO_IR 110": "iso-8859-4",
"ISO 2022 IR 110": "iso-8859-4",
"ISO_IR 126": "iso-ir-126",
"ISO 2022 IR 126": "iso-ir-126",
"ISO_IR 127": "iso-ir-127",
"ISO 2022 IR 127": "iso-ir-127",
"ISO_IR 138": "iso-ir-138",
"ISO 2022 IR 138": "iso-ir-138",
"ISO_IR 144": "iso-ir-144",
"ISO 2022 IR 144": "iso-ir-144",
"ISO_IR 148": "iso-ir-148",
"ISO 2022 IR 148": "iso-ir-148",
"ISO 2022 IR 149": "euc-kr",
"ISO 2022 IR 159": "iso-2022-jp",
"ISO_IR 166": "iso-ir-166",
"ISO 2022 IR 166": "iso-ir-166",
"ISO 2022 IR 87": "iso-2022-jp",
}
// ParseSpecificCharacterSet converts DICOM character encoding names, such as
// "ISO-IR 100" to golang decoder. It will return nil, nil for the default (7bit
// ASCII) encoding. Cf. P3.2
// D.6.2. http://dicom.nema.org/medical/dicom/2016d/output/chtml/part02/sect_D.6.2.html
func ParseSpecificCharacterSet(encodingNames []string) (CodingSystem, error) {
// Set the []byte -> string decoder for the rest of the
// file. It's sad that SpecificCharacterSet isn't part
// of metadata, but is part of regular attrs, so we need
// to watch out for multiple occurrences of this type of
// elements.
// encodingNames, err := elem.GetStrings()
//if err != nil {
//return CodingSystem{}, err
//}
var decoders []*encoding.Decoder
for _, name := range encodingNames {
var c *encoding.Decoder
dicomlog.Vprintf(2, "dicom.ParseSpecificCharacterSet: Using coding system %s", name)
if htmlName, ok := htmlEncodingNames[name]; !ok {
// TODO(saito) Support more encodings.
return CodingSystem{}, fmt.Errorf("dicom.ParseSpecificCharacterSet: Unknown character set '%s'. Assuming utf-8", encodingNames[0])
} else {
if htmlName != "" {
d, err := htmlindex.Get(htmlName)
if err != nil {
panic(fmt.Sprintf("Encoding name %s (for %s) not found", name, htmlName))
}
c = d.NewDecoder()
}
}
decoders = append(decoders, c)
}
if len(decoders) == 0 {
return CodingSystem{nil, nil, nil}, nil
}
if len(decoders) == 1 {
return CodingSystem{decoders[0], decoders[0], decoders[0]}, nil
}
if len(decoders) == 2 {
return CodingSystem{decoders[0], decoders[1], decoders[1]}, nil
}
return CodingSystem{decoders[0], decoders[1], decoders[2]}, nil
}