/
RevTree.java
311 lines (277 loc) · 11.4 KB
/
RevTree.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
/* Copyright (c) 2012-2016 Boundless and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/org/documents/edl-v10.html
*
* Contributors:
* Gabriel Roldan (Boundless) - initial implementation
*/
package org.locationtech.geogig.model;
import java.util.Collections;
import java.util.Comparator;
import java.util.Optional;
import java.util.SortedSet;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import org.locationtech.geogig.storage.ObjectStore;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Iterables;
/**
* A GeoGig revision tree is an immutable data structure that represents a
* <a href="https://en.wikipedia.org/wiki/Merkle_tree">Merkle Tree</a> (or Hash Tree), holding
* {@link TYPE#FEATURE feature} or {@link TYPE#TREE tree} {@link Node node} pointers to children
* {@link RevTree} or {@link RevFeature} objects in an {@link ObjectStore}, and {@link Bucket}
* pointers to point to subtrees it has been split into when a certain threshold on the allowed max
* capacity of it's feature or tree lists has been surpased.
* <p>
* The {@link #getId() id} of a {@code RevTree} is computed out of its directly contained
* {@link RevTree#trees() tree}, {@link RevTree#features() feature}, and {@link RevTree#getBuckets()
* bucket} pointers as mandated by {@link HashObjectFunnels}.
* <p>
* Two trees that contain the same values hash out to the same {@link ObjectId}, as long as they
* have been built using the same "clustering strategy". That is, the same limits to split a tree
* into subtrees (buckets) and the same ordering of it's nodes have been applied. The two trees,
* hence, will only differ in what have actually changed, as only the changed buckets, or the tree
* itself if it holds nodes, will hash out to a different value, conforming a DAG (Directed Acyclic
* Graph).
* <p>
* This data structure does not impose a split threshold nor a default ordering for nodes, allowing
* the builder to create trees based on different criteria, depending on whether it'll be used to
* represent a tree's canonical form, or an index of some form.
* <p>
* The <b>Canonical </b> structure of a {@code RevTree} is mandated by the
* {@link CanonicalNodeNameOrder} {@link Comparator}, which also defines the per depth level
* {@link CanonicalNodeNameOrder#normalizedSizeLimit(int) split threshold}.
* <p>
* The Canonical representation of a tree is the one that is used to store {@link RevTree} instances
* in the repository, and is optimized for fast computation of differences between any two given
* trees, as such is perhaps the most important operation in geogig.
* <p>
* Every geogig {@link RevCommit commit} points to a root tree, which in turn points to feature
* trees, which in turn point to features and/or other, nested feature trees.
* <p>
* A "feature tree" node will have a {@link Node#getMetadataId() metadata id} that's a pointer to
* the layer's default {@link RevFeatureType Feature Type}. Feature nodes that are children of such
* trees that have a {@link ObjectId#NULL} metadata id are deemed to pertain the their parent tree's
* feature type. The feature nodes that have a non {@link ObjectId#NULL} metadata id are deemed
* pointers to {@link RevFeature features} that comply to a different feature type than the tree's
* default.
* <p>
* That is, the object model allows for nested feature trees (a.k.a layers, feature classes), and
* for a single feature tree to contain features of mixed feature types.
*
* @see Node
* @see Bucket
* @see CanonicalNodeOrder
* @see CanonicalNodeNameOrder
*
* @since 1.0
*/
public interface RevTree extends RevObject {
/**
* The empty tree object id, as results from calling {@link HashObjectFunnels#hashTree
* HashObjectFunnels.hashTree} with empty arguments.
*/
ObjectId EMPTY_TREE_ID = HashObjectFunnels.hashTree(null, null, (Iterable<Bucket>) null);
/**
* A "null object" to represent the empty tree
*/
RevTree EMPTY = new RevTree() {
@Override
public ObjectId getId() {
return EMPTY_TREE_ID;
}
@Override
public ImmutableList<Node> trees() {
return ImmutableList.of();
}
@Override
public long size() {
return 0L;
}
@Override
public int numTrees() {
return 0;
}
@Override
public boolean isEmpty() {
return true;
}
@Override
public ImmutableList<Node> features() {
return ImmutableList.of();
}
@Override
public ImmutableSortedMap<Integer, Bucket> buckets() {
return ImmutableSortedMap.of();
}
@Override
public boolean equals(Object o) {
if (!(o instanceof RevObject)) {
return false;
}
return getId().equals(((RevObject) o).getId());
}
@Override
public String toString() {
return "EMPTY TREE[" + getId() + "]";
}
@Override
public SortedSet<Bucket> getBuckets() {
return Collections.emptySortedSet();
}
};
/**
* @return {@link TYPE#TREE}
*/
@Override
public default TYPE getType() {
return TYPE.TREE;
}
/**
* @return total number of features, including the size of nested trees
*/
public long size();
/**
* <p>
* A {@code RevTree} with an empty {@link #trees() tree list} may still have direct children
* trees if it has been split into bucket trees, hence {@code numTrees()} may be greater than
* zero while the trees list is empty.
*
* @return number of direct child trees
*/
public int numTrees();
/**
* Convenience method to figure out if a tree is completely empty.
* <p>
* A tree is completely empty not only if {@code size() == 0}, but also if it has no child tree
* nodes at all, since {@code size()} could be zero but the tree might have pointers to other
* empty trees.
*
* @apiNote an empty tree id MUST be {@link #EMPTY_TREE_ID}
*
* @return {@code true} if the tree is completely empty (i.e. has no feature nor child tree
* nodes)
*/
public default boolean isEmpty() {
boolean empty = treesSize() == 0 && featuresSize() == 0 && bucketsSize() == 0;
if (empty) {
Preconditions.checkState(size() == 0L);
Preconditions.checkState(EMPTY_TREE_ID.equals(getId()));
}
return empty;
}
/**
* The {@link TYPE#TREE tree} nodes held directly by this tree.
* <p>
* A {@code RevTree} with an empty tree list may still have direct children trees if it has been
* split into bucket trees, hence {@link #numTrees()} may be greater than zero while this list
* is empty.
*
* @apiNote the returned list does not contain {@code null} objects
*/
public ImmutableList<Node> trees();
/**
* @return the number of {@link Node}s in the {@link #trees} property
*/
public default int treesSize() {
return trees().size();
}
public default Node getTree(int index) {
return trees().get(index);
}
/**
* Performs the given action for each element of the {@link #trees} collection respecting its
* iteration order
*/
public default void forEachTree(Consumer<Node> consumer) {
trees().forEach(consumer);
}
/**
* The {@link TYPE#FEATURE feature} nodes held directly by this tree.
* <p>
* A {@code RevTree} instance may hold as many feature nodes as allowed by it's builder before
* being split into bucket trees.
* <p>
* Also, depending on the "clustering strategy" the tree builder employs, a {@code RevTree}
* instance may have both direct feature nodes and {@link #buckets}.
* <p>
* In the former case, where it has features and no buckets, it's a leaf tree, meaning it can't
* be split into buckets because it hasn't overcome the builder's imposed split threshold.
* <p>
* In the later, when it has both features and buckets, it's called a "mixed" tree.
* <p>
* The {@link CanonicalNodeNameOrder canonical} representation of a {@link RevTree} does not
* allow for "mixed trees" (that is, a tree is wither a leaf tree or a pure bucket tree), but
* other kinds of trees could be created that allow for mixed states, such as a tree that's
* built using a quad-tree where nodes whose {@link Node#bounds() bounds} don't fall into any of
* the quadrant buckets is left in the parent's tree.
*
* @apiNote the returned list does not contain {@code null} objects
*/
public ImmutableList<Node> features();
/**
* @return the number of {@link Node}s in the {@link #features} property
*/
public default int featuresSize() {
return features().size();
}
public default Node getFeature(int index) {
return features().get(index);
}
/**
* Performs the given action for each element of the {@link #features} collection respecting its
* iteration order
*/
public default void forEachFeature(Consumer<Node> consumer) {
features().forEach(consumer);
}
/**
* The mapping of (zero-based) bucket index to the bucket (pointer to {@link RevTree} instance)
* this revtree has been split into.
* <p>
* {@code RevTree} doesn't impose into how many subtrees a tree instance is split into. Instead,
* that limit is to be consistently imposed by the tree builder.
* <p>
* For example, the canonical representation of a tree, as mandated by
* {@link CanonicalNodeNameOrder} defines a split factor based on the bucket's
* {@link CanonicalNodeNameOrder#maxBucketsForLevel(int) depth), while a tree built to represent
* a quad-tree would always be split into four subtrees to represent the next set of quadrants.
*
* @apiNote the returned map does not contain {@code null} keys nor values
* @deprecated
*/
public ImmutableSortedMap<Integer, Bucket> buckets();
public Iterable<Bucket> getBuckets();
/**
* @return the number of buckets in the {@link #buckets} property
*/
public default int bucketsSize() {
return Iterables.size(getBuckets());
}
/**
* Performs the given action for each element of the {@link #buckets} collection respecting its
* iteration order, which is the order of the bucket index.
*
* @deprecated
* @param consumer a consumer that accepts a tuple given by the bucket index and the bucket
* itself
*/
public default void forEachBucket(BiConsumer<Integer, Bucket> consumer) {
getBuckets().forEach(b -> consumer.accept(Integer.valueOf(b.getIndex()), b));
}
public default void forEachBucket(Consumer<Bucket> consumer) {
getBuckets().forEach(consumer);
}
public default Optional<Bucket> getBucket(int bucketIndex) {
for (Bucket b : getBuckets()) {
if (bucketIndex == b.getIndex()) {
return Optional.of(b);
}
}
return Optional.empty();
}
}