-
Notifications
You must be signed in to change notification settings - Fork 7
/
holdings.go
134 lines (126 loc) · 4.1 KB
/
holdings.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// Package kbart implements support for KBART (Knowledge Bases And Related Tools
// working group, http://www.uksg.org/kbart/) holding files
// (http://www.uksg.org/kbart/s5/guidelines/data_format).
//
// > This is a generic format that minimizes the effort involved in receiving and
// loading the data, and reduces the likelihood of errors being introduced during
// exchange. Tab-delimited formats are preferable to comma-separated formats, as
// commas appear regularly within the distributed data and, though they can be
// "commented out", doing so leaves a greater opportunity for error than the use
// of a tab-delimited format. Tab-delimited formats can be easily exported from
// all commonly used spreadsheet programs.
package kbart
import (
"io"
"regexp"
"github.com/miku/span/encoding/tsv"
"github.com/miku/span/licensing"
"github.com/miku/span/xio"
)
// Holdings contains a list of entries about licenced or available content. In
// addition to access to all entries, this type exposes a couple of helper
// methods.
type Holdings []licensing.Entry
// ReadFrom create holdings struct from a reader. Expects tab separated content with
// a single header row. TODO: This is not exactly what ReadFrom is for.
func (h *Holdings) ReadFrom(r io.Reader) (int64, error) {
var wc xio.WriteCounter
dec := tsv.NewDecoder(io.TeeReader(r, &wc))
for {
var entry licensing.Entry
err := dec.Decode(&entry)
if err == io.EOF {
break
}
if err != nil {
return 0, err
}
*h = append(*h, entry)
}
return int64(wc.Count()), nil
}
// SerialNumberMap creates a map from ISSN to associated licensing entries.
// This is here for performance mostly, so we can access relevant licensing
// entry by ISSN. TODO: Do not replicate entries, just index into them.
func (h *Holdings) SerialNumberMap() map[string][]licensing.Entry {
cache := make(map[string]map[licensing.Entry]struct{})
for _, e := range *h {
for _, issn := range e.ISSNList() {
if cache[issn] == nil {
cache[issn] = make(map[licensing.Entry]struct{})
}
cache[issn][e] = struct{}{}
}
}
// Make unique.
result := make(map[string][]licensing.Entry)
for issn, entrymap := range cache {
for k := range entrymap {
result[issn] = append(result[issn], k)
}
}
return result
}
// TitleMap maps an exact title to a list of entries.
func (h *Holdings) TitleMap() map[string][]licensing.Entry {
cache := make(map[string]map[licensing.Entry]bool)
for _, e := range *h {
if cache[e.PublicationTitle] == nil {
cache[e.PublicationTitle] = make(map[licensing.Entry]bool)
}
cache[e.PublicationTitle][e] = true
}
// Make unique.
result := make(map[string][]licensing.Entry)
for title, entrymap := range cache {
for k := range entrymap {
result[title] = append(result[title], k)
}
}
return result
}
// WisoDatabaseMap derives a structure from the holdings file, that maps WISO
// database names to the associated entries, refs. #9534.
func (h *Holdings) WisoDatabaseMap() map[string][]licensing.Entry {
patterns := []*regexp.Regexp{
regexp.MustCompile(`https://www.wiso-net.de/toc_list/([A-Z]{3,4})`),
regexp.MustCompile(`https://www.wiso-net.de/.*dbShortcut=:2:2:([A-Z]{3,4})`),
regexp.MustCompile(`https://www.wiso-net.de/.*dbShortcut=([A-Z]{3,4})`),
}
cache := make(map[string]map[licensing.Entry]bool)
for _, e := range *h {
for _, p := range patterns {
matches := p.FindStringSubmatch(e.TitleURL)
if len(matches) < 2 {
continue
}
db := matches[1]
if cache[db] == nil {
cache[db] = make(map[licensing.Entry]bool)
}
cache[db][e] = true
}
}
// Make unique. XXX: Move this out.
result := make(map[string][]licensing.Entry)
for issn, entrymap := range cache {
for k := range entrymap {
result[issn] = append(result[issn], k)
}
}
return result
}
// Filter finds entries with certain characteristics. This will be very slow
// for KBART files with thousands of entries.
func (h *Holdings) Filter(f func(licensing.Entry) bool) (result []licensing.Entry) {
cache := make(map[licensing.Entry]bool)
for _, e := range *h {
if f(e) {
cache[e] = true
}
}
for k := range cache {
result = append(result, k)
}
return
}