forked from gsiems/go-marc21
-
Notifications
You must be signed in to change notification settings - Fork 0
/
controlfield.go
140 lines (120 loc) · 4.27 KB
/
controlfield.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// Copyright 2017-2018 Gregory Siems. All rights reserved.
// Use of this source code is governed by the MIT license
// that can be found in the LICENSE file.
package marc21
import (
"fmt"
"log"
"strings"
)
/*
https://www.loc.gov/marc/specifications/specrecstruc.html
Control fields in MARC 21 formats are assigned tags beginning with
two zeroes. They are comprised of data and a field terminator; they
do not contain indicators or subfield codes. The control number
field is assigned tag 001 and contains the control number of the
record. Each record contains only one control number field (with
tag 001), which is to be located at the base address of data.
*/
/*
http://www.loc.gov/marc/bibliographic/bdintro.html
Variable control fields - The 00X fields. These fields are
identified by a field tag in the Directory but they contain neither
indicator positions nor subfield codes. The variable control fields
are structurally different from the variable data fields. They may
contain either a single data element or a series of fixed-length
data elements identified by relative character position.
*/
// http://www.loc.gov/marc/bibliographic/bd00x.html
// http://www.loc.gov/marc/holdings/hd00x.html
// http://www.loc.gov/marc/authority/ad00x.html
// http://www.loc.gov/marc/classification/cd00x.html
// http://www.loc.gov/marc/community/ci00x.html
// extractControlfields extracts the control fields from the raw MARC record bytes
func extractControlfields(rawRec []byte, baseAddress int, dir []*directoryEntry) (cfs []*Controlfield, err error) {
// There are records where the 003 and 007 fields are dorky (this
// may happen to other fields also??) where the first byte is a
// terminator character and the directory indicates that the field
// is longer.
//
// The directory for one record that has this issue looks like:
// 001 0 11
// 003 11 12
// 005 12 17
// 007 29 18
// 008 30 41
// where we can see that the directory has the 003 and 005 tags
// overlapping and the 007 and 008 tags overlapping with no actual
// data for either dorked-up 003/007 tag. Since the remainder of the
// record appears to be good we don't want to fail, but we do want
// to bring attention to the data issue.
var parseErrorTags []string
var controlNumber string
for _, d := range dir {
if strings.HasPrefix(d.tag, "00") {
start := baseAddress + d.startingPos
b := rawRec[start : start+d.fieldLength]
if b[len(b)-1] == fieldTerminator {
if d.tag == "001" {
if controlNumber == "" {
controlNumber = string(b[:len(b)-1])
} else {
parseErrorTags = append(parseErrorTags, d.tag)
}
}
cfs = append(cfs, &Controlfield{Tag: d.tag, Text: string(b[:len(b)-1])})
} else {
parseErrorTags = append(parseErrorTags, d.tag)
}
}
}
if len(parseErrorTags) > 0 {
badTags := strings.Join(parseErrorTags, ", ")
log.Printf("Control fields extraction error for ControlNumber %q (fields: %s)\n", controlNumber, badTags)
}
return cfs, nil
}
// Implement the Stringer interface for "Pretty-printing"
func (cf Controlfield) String() string {
return fmt.Sprintf("{%s: '%s'}", cf.Tag, cf.Text)
}
// GetControlfields returns the unique set of controlfields for the
// record that match the specified tags. If no tags are specified
// (empty string) then all controlfields are returned
func (rec Record) GetControlfields(tags string) (cfs []*Controlfield) {
if tags == "" {
return rec.Controlfields
}
uniq := make(map[string]bool)
for _, t := range strings.Split(tags, ",") {
for _, cf := range rec.Controlfields {
if cf.Tag == t {
ck := strings.Join([]string{cf.Tag, cf.Text}, ":")
_, ok := uniq[ck]
if !ok {
cfs = append(cfs, cf)
}
uniq[ck] = true
}
}
}
return cfs
}
// GetControlfield returns the text value of the first control field for the
// record that matches the specified (presumably non-repeating) tag
func (rec Record) GetControlfield(tag string) string {
for _, cf := range rec.Controlfields {
if cf.Tag == tag {
return cf.Text
}
}
return ""
}
// GetTag returns the tag for the controlfield
func (cf Controlfield) GetTag() string {
return cf.Tag
}
// GetText returns the text for the controlfield
func (cf Controlfield) GetText() string {
return cf.Text
}