/
XmlTree.java
391 lines (362 loc) · 14.6 KB
/
XmlTree.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
/*
* #%L
* Netarchivesuite - common
* %%
* Copyright (C) 2005 - 2018 The Royal Danish Library,
* the National Library of France and the Austrian National Library.
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Lesser Public License for more details.
*
* You should have received a copy of the GNU General Lesser Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/lgpl-2.1.html>.
* #L%
*/
package dk.netarkivet.common.utils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.Node;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IllegalState;
/**
* A class that implements the StringTree<T> interface by backing it with XML. The name of each XML node corresponds to
* the identifier of a node in the tree.
*
* @param <T> The type of XmlTree
*/
@SuppressWarnings({"unchecked"})
public class XmlTree<T> implements StringTree<T> {
/** This matches string values that are valid for identifying a field. */
private static final Pattern LEGAL_FIELD_NAME = Pattern.compile("[a-zA-Z0-9.-]*");
/**
* This interface defines how the value of an xml leaf is parsed to get a value of type T.
*/
interface ValueParser<T> {
T parse(String s);
}
;
/**
* A value parser that simply converts an XML node to a string, by trimming the text contents.
*/
private static final ValueParser<String> TRIMMING_STRING_PARSER = new ValueParser<String>() {
public String parse(String s) {
return s.trim();
}
};
/** The element we are encapsulating. */
private final Element element;
/** If this tree is based on a Document, this is non-null. */
private final Document root;
/** The parser for contents of leaf nodes. */
private final ValueParser<T> parser;
/**
* Initialise a node in an XML tree.
*
* @param n The XML node for this node
* @param parser The parser that can convert a leaf node to a value of type T.
* @throws ArgumentNotValid on null argument, or if n is not of type element or document.
*/
private XmlTree(Node n, ValueParser<T> parser) {
ArgumentNotValid.checkNotNull(n, "Node n");
ArgumentNotValid.checkNotNull(parser, "ValueParser<T> parser");
if (n.getNodeType() == Node.DOCUMENT_NODE) {
root = (Document) n;
element = null;
} else if (n.getNodeType() == Node.ELEMENT_NODE) {
element = (Element) n;
root = null;
} else {
throw new ArgumentNotValid("Invalid XML node type '" + n.getNodeTypeName() + "'");
}
this.parser = parser;
}
/**
* Returns a StringTree<String> view of the given XML node.
*
* @param n A part of an XML document structure
* @return A StringTree<String> backed by the given XML part.
* @throws ArgumentNotValid on null argument.
*/
public static StringTree<String> getStringTree(Node n) {
ArgumentNotValid.checkNotNull(n, "Node n");
return new XmlTree<String>(n, TRIMMING_STRING_PARSER);
}
/**
* Returns true if this object is a leaf, and thus if getValue is legal.
*
* @return True if the implementing object is a leaf, false otherwise.
*/
public boolean isLeaf() {
return element != null && elementIsLeaf(element);
}
/**
* Get the value of a named sub-leaf.
*
* @param name Name of the sub-leaf to get the value of. These are strings, and as a shorthand may specify subtrees
* of subtrees by separating each level with '.', i.e. getSubtrees("subtree.subsubtree").
* @return The value of the named leaf of this Tree, if it exists.
* @throws IllegalState if this StringTree does not have exactly one leaf sub-node with the given name.
* @throws ArgumentNotValid if argument is null or empty.
*/
public T getValue(String name) {
ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
Element n = selectSingleNode(name);
if (!elementIsLeaf(n)) {
throw new IllegalState("Subtree '" + name + "' is not a leaf");
}
return parseLeafNode(n);
}
/**
* Get the value of a leaf.
*
* @return The value of this Tree, if it is a leaf.
* @throws IllegalState if this Tree is a node.
*/
public T getValue() {
if (!isLeaf()) {
throw new IllegalState("Node is not text, but "
+ (element != null ? element.getNodeTypeName() : root.getNodeTypeName()));
}
return parseLeafNode(element);
}
/**
* Get the only subtree with the given name.
*
* @param name The name of the subtree. These are strings, and as a shorthand may specify subtrees of subtrees by
* separating each level with '.', i.e. getSubtrees("subtree.subsubtree").
* @return The single subtree with the given name.
* @throws IllegalState if this object is a leaf, or there is not exactly one subtree with the given name.
* @throws ArgumentNotValid if argument is null or empty.
*/
public StringTree<T> getSubTree(String name) {
ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
if (isLeaf()) {
throw new IllegalState("Cannot find subtrees in a leaf");
}
final Element n = selectSingleNode(name);
return new XmlTree<T>(n, parser);
}
/**
* Get the named subtrees.
*
* @param name The name of the subtrees. These are strings, and as a shorthand may specify subtrees of subtrees by
* separating each level with '.', i.e. getSubtrees("subtree.subsubtree").
* @return All subtrees with the given name, or an empty list for none.
* @throws IllegalState if this object is a leaf.
* @throws ArgumentNotValid if argument is null or empty.
*/
public List<StringTree<T>> getSubTrees(String name) {
ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
if (isLeaf()) {
throw new IllegalState("Cannot find subtrees in a leaf");
}
List<Element> nodeList = selectMultipleNodes(name);
List<StringTree<T>> resultList = new ArrayList<StringTree<T>>(nodeList.size());
for (Element n : nodeList) {
resultList.add(new XmlTree<T>(n, parser));
}
return resultList;
}
/**
* Get a map of all the children of this node.
*
* @return Map of children of this node.
* @throws IllegalState if this object is a leaf.
*/
public Map<String, List<StringTree<T>>> getChildMultimap() {
if (isLeaf()) {
throw new IllegalState("Cannot find subtrees in a leaf");
}
Map<String, List<StringTree<T>>> children = new HashMap<String, List<StringTree<T>>>();
List<Element> nodeList = getChildNodes();
if (nodeList != null) {
for (Element n : nodeList) {
List<StringTree<T>> childList = children.get(n.getName());
if (childList == null) {
childList = new ArrayList<StringTree<T>>();
children.put(n.getName(), childList);
}
childList.add(new XmlTree<T>(n, parser));
}
}
return children;
}
/**
* Get a map of all direct subtrees, assuming that all subtrees are uniquely named.
*
* @return Map of all subtrees.
* @throws IllegalState if this object is a leaf, or if the subtrees are not uniquely named.
*/
public Map<String, StringTree<T>> getChildMap() {
if (isLeaf()) {
throw new IllegalState("Cannot find subtrees in a leaf");
}
Map<String, List<StringTree<T>>> map = getChildMultimap();
return convertMultimapToMap(map);
}
/**
* Get a multimap of the names and values of all subtrees, assuming that all subtrees are leafs.
*
* @return Multimap from subtree names to values of their leaves.
* @throws ArgumentNotValid if this object is not a node, or if any of its children are not leaves.
*/
public Map<String, List<T>> getLeafMultimap() {
if (isLeaf()) {
throw new IllegalState("Cannot find subtrees in a leaf");
}
Map<String, List<T>> children = new HashMap<String, List<T>>();
List<Element> nodeList = getChildNodes();
if (nodeList != null) {
for (Element n : nodeList) {
if (!elementIsLeaf(n)) {
throw new IllegalState("Child " + n.getName() + " is not a leaf");
}
List<T> childList = children.get(n.getName());
if (childList == null) {
childList = new ArrayList<T>();
children.put(n.getName(), childList);
}
childList.add(parseLeafNode(n));
}
}
return children;
}
/**
* Get a map of the names and values of all subtrees, assuming that all subtrees are leafs and are uniquely named.
*
* @return Map from subtree names to values of their leaves.
* @throws IllegalState if this object is a leaf or if the subtrees are not uniquely named, or if any of its
* children are not leaves.
*/
public Map<String, T> getLeafMap() {
if (isLeaf()) {
throw new IllegalState("Cannot find subtrees in a leaf");
}
Map<String, List<T>> map = getLeafMultimap();
return convertMultimapToMap(map);
}
/**
* Select a list of nodes from the current tree, resolving dotted paths. Currently, if any part of the dotted path
* has multiple subtrees with the given name, this method will find all that match.
*
* @param name Name of a node (tree or leaf), possibly via a dotted path
* @return A list of nodes found via the given name from the root of this tree, mapping names to nodes.
* @throws ArgumentNotValid on null or empty argument, or if the path is illegal
*/
private List<Element> selectMultipleNodes(String name) {
ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
ArgumentNotValid.checkTrue(LEGAL_FIELD_NAME.matcher(name).matches(),
"Name must contain only alphanumeric, dash and period");
// parts contains the dotted path, split into individual components
String[] parts = name.split("\\.");
// elements contains the root elements to find children from.
List<Element> elements = new ArrayList<Element>();
int i = 0;
if (root != null) {
if (parts[i].equals(root.getRootElement().getName())) {
elements.add(root.getRootElement());
++i;
} // We allow zero-element results here, so a no-match is okay.
} else {
elements.add(element);
}
for (; i < parts.length; ++i) {
// newElements contains the children matching the name from the
// dotted path
List<Element> newElements = new ArrayList<Element>();
final String subname = parts[i];
for (Element n : elements) {
List<Element> childList = n.elements(subname);
if (childList != null) {
newElements.addAll(childList);
}
}
// loop to find children of the found nodes with the next name in
// the dotted paths
elements = newElements;
}
return elements;
}
/**
* Select a single node from the current tree, resolving dotted paths.
*
* @param name Name of a node (tree or leaf), possibly via a dotted path
* @return A single node found via the given name from the root of this tree.
* @throws ArgumentNotValid on null or empty argument, or if the path is illegal
* @throws IllegalState if there is no such node, or if there is more than one candidate for the node.
*/
private Element selectSingleNode(String name) {
ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
List<Element> elements = selectMultipleNodes(name);
if (elements.size() == 0) {
throw new IllegalState("No subtree with name '" + name + "'");
}
if (elements.size() > 1) {
throw new IllegalState("Multiple subtrees with name '" + name + "'");
}
return elements.get(0);
}
/**
* Parse the contents of this leaf node according to the parser.
*
* @param e A leaf node to parse.
* @return The parsed contents of the node.
*/
private T parseLeafNode(Element e) {
return parser.parse(e.getText());
}
/**
* Returns true if the given node is a leaf (i.e. has only text).
*
* @param e A node to check
* @return True if the given node is a leaf.
*/
private boolean elementIsLeaf(Element e) {
return e.isTextOnly();
}
/**
* Get the nodes that are children of the current node. This works for both Documents and Elements.
*
* @return List of Element objects that are children of the current node.
*/
private List<Element> getChildNodes() {
List<Element> nodeList;
if (root != null) {
nodeList = new ArrayList<Element>(1);
nodeList.add(root.getRootElement());
} else {
nodeList = (List<Element>) element.elements();
}
return nodeList;
}
/**
* Convert a multimap into a map, checking that there is only one value for each key.
*
* @param map The multimap to convert from
* @return The map to convert to.
* @throws IllegalState if the map does not contain exactly one value for each key.
*/
private static <K, V> Map<K, V> convertMultimapToMap(Map<K, List<V>> map) {
Map<K, V> result = new HashMap<K, V>();
for (Map.Entry<K, List<V>> entry : map.entrySet()) {
if (entry.getValue().size() != 1) {
throw new IllegalState("More than one value for key '" + entry.getKey() + "' found");
}
result.put(entry.getKey(), entry.getValue().get(0));
}
return result;
}
}