This repository has been archived by the owner on Sep 3, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
leaf.rs
314 lines (287 loc) · 10.1 KB
/
leaf.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
use super::leaf_generated::leaf::{self, LeafEntry};
use super::Entry;
use crate::dag::Chunk;
use flatbuffers::FlatBufferBuilder;
use std::cmp::Ordering;
// Leaf is a leaf level node in the map tree structure.
// It wraps a chunk containing a flatbuffer and exposes handy
// utilities to inspect the buffer more easily.
#[derive(Debug, PartialEq)]
pub struct Leaf {
chunk: Chunk,
}
#[derive(Debug, Eq, PartialEq)]
pub enum LoadError {
Corrupt(&'static str),
}
impl Leaf {
pub fn chunk(&self) -> &Chunk {
&self.chunk
}
pub fn load(chunk: Chunk) -> Result<Leaf, LoadError> {
// Validate at load-time so we can assume data is valid thereafter.
let root = leaf::get_root_as_leaf(chunk.data());
let entries = root
.entries()
.ok_or(LoadError::Corrupt("missing entries"))?;
let mut prev: Option<&[u8]> = None;
for e in entries {
if prev.is_some() {
if prev == e.key() {
return Err(LoadError::Corrupt("duplicate key"));
}
if prev > e.key() {
return Err(LoadError::Corrupt("unsorted key"));
}
}
if e.key().is_none() {
return Err(LoadError::Corrupt("missing key"));
}
if e.val().is_none() {
return Err(LoadError::Corrupt("missing val"));
}
prev = e.key();
}
Ok(Leaf { chunk })
}
pub fn new<'a>(entries: impl Iterator<Item = Entry<'a>>) -> Leaf {
let mut builder = FlatBufferBuilder::default();
let entries = entries
.map(|e| {
let builder = &mut builder;
let args = &leaf::LeafEntryArgs {
key: Some(builder.create_vector(e.key)),
val: Some(builder.create_vector(e.val)),
};
leaf::LeafEntry::create(builder, args)
})
.collect::<Vec<flatbuffers::WIPOffset<leaf::LeafEntry>>>();
let entries = builder.create_vector(&entries);
let root = leaf::Leaf::create(
&mut builder,
&leaf::LeafArgs {
entries: Some(entries),
},
);
builder.finish(root, None);
Leaf {
chunk: Chunk::new(builder.collapse(), &[]),
}
}
pub fn iter(s: Option<&Self>) -> impl Iterator<Item = Entry<'_>> {
let root = s.map(|leaf| leaf::get_root_as_leaf(leaf.chunk.data()));
LeafIter {
fb_iter: root.and_then(|r| r.entries()).map(|e| e.iter()),
}
}
pub fn len(&self) -> usize {
let root = leaf::get_root_as_leaf(self.chunk.data());
// load validates that entries is not None.
root.entries().unwrap().len()
}
pub fn get_entry_by_index(&self, idx: usize) -> LeafEntry {
let root = leaf::get_root_as_leaf(self.chunk.data());
root.entries().unwrap().get(idx)
}
// binary_search is not implemented in such a way that it can be reused for
// flatbuffers::Vector (AFAICT). Copy the code and modify it to work on
// flatbuffers::Vector.
// TODO(arv): License
pub fn binary_search(&self, key: &[u8]) -> Result<usize, usize> {
let root = leaf::get_root_as_leaf(self.chunk.data());
let v = match root.entries() {
None => return Err(0),
Some(v) => v,
};
let mut size = v.len();
if size == 0 {
return Err(0);
}
let mut base = 0usize;
while size > 1 {
let half = size / 2;
let mid = base + half;
// mid is always in [0, size), that means mid is >= 0 and < size.
// mid >= 0: by definition
// mid < size: mid = size / 2 + size / 4 + size / 8 ...
let entry = v.get(mid);
// No way that key can be None.
let cmp = entry.key().unwrap().cmp(key);
base = if cmp == Ordering::Greater { base } else { mid };
size -= half;
}
// base is always in [0, size) because base <= mid.
let entry = v.get(base);
let cmp = entry.key().unwrap().cmp(key);
if cmp == Ordering::Equal {
Ok(base)
} else {
Err(base + (cmp == Ordering::Less) as usize)
}
}
}
// LeafIter simplifies iteration over the leaf entries. Unfortunately it needs
// to be generic because the type returned by flatbuffer::Vector<T>::iter() is
// private. The only way to encapsulate that type appears to be by making it
// generic.
#[allow(dead_code)]
struct LeafIter<'a, FBIter: Iterator<Item = leaf::LeafEntry<'a>>> {
fb_iter: Option<FBIter>,
}
impl<'a, FBIter: Iterator<Item = leaf::LeafEntry<'a>>> Iterator for LeafIter<'a, FBIter> {
type Item = Entry<'a>;
fn next(&mut self) -> Option<Entry<'a>> {
match self.fb_iter.as_mut() {
None => None,
Some(fb_iter) => fb_iter.next().map(|e| e.into()),
}
}
}
impl<'a> From<leaf::LeafEntry<'a>> for Entry<'a> {
fn from(leaf_entry: leaf::LeafEntry<'a>) -> Self {
// load() validates that key and val are present.
Entry {
key: leaf_entry.key().unwrap(),
val: leaf_entry.val().unwrap(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn try_from() {
fn test(input: Chunk, expected: Entry) {
let leaf = Leaf::load(input).unwrap();
let actual = Leaf::iter(Some(&leaf)).next().unwrap();
assert_eq!(actual, expected);
}
// zero-length keys and vals are supported.
test(
make_leaf(vec![vec![].into(), vec![].into()].into()),
Entry {
key: vec![].as_slice().into(),
val: vec![].as_slice().into(),
},
);
// normal non-zero keys and values too.
test(
make_leaf(vec![vec![1].into(), vec![1].into()].into()),
Entry {
key: vec![1].as_slice().into(),
val: vec![1].as_slice().into(),
},
);
test(
make_leaf(vec![vec![1, 2].into(), vec![3, 4].into()].into()),
Entry {
key: vec![1, 2].as_slice().into(),
val: vec![3, 4].as_slice().into(),
},
);
}
#[test]
fn leaf_iter() {
fn test(chunk: Option<Chunk>, expected: Vec<Entry>) {
let leaf = chunk.map(|chunk| Leaf::load(chunk).unwrap());
let it = Leaf::iter(leaf.as_ref());
assert_eq!(it.collect::<Vec<Entry>>(), expected);
}
// None is flattened to empty iterator.
test(None, vec![]);
test(make_leaf(vec![].into()).into(), vec![]);
// Single entry
test(
make_leaf(vec![vec![1].into(), vec![2].into()].into()).into(),
vec![Entry {
key: vec![1].as_slice(),
val: vec![2].as_slice(),
}],
);
// multiple entries
test(
make_leaf(vec![vec![].into(), vec![].into(), vec![1].into(), vec![1].into()].into())
.into(),
vec![
Entry {
key: vec![].as_slice(),
val: vec![].as_slice(),
},
Entry {
key: vec![1].as_slice(),
val: vec![1].as_slice(),
},
],
);
}
#[test]
fn round_trip() {
let k0 = vec![0];
let k1 = vec![1];
let expected = vec![Entry { key: &k0, val: &k0 }, Entry { key: &k1, val: &k1 }];
let expected = Leaf::new(expected.into_iter());
let actual = Leaf::load(Chunk::read(
expected.chunk.hash().to_string(),
expected.chunk.data().to_vec(),
None,
))
.unwrap();
assert_eq!(expected, actual);
assert_eq!(2 as usize, Leaf::iter((&actual).into()).count());
}
#[test]
fn load() {
fn test(kv: Option<Vec<Option<Vec<u8>>>>, expected: Result<Leaf, LoadError>) {
let chunk = make_leaf(kv);
let actual = Leaf::load(chunk);
assert_eq!(expected, actual);
}
test(None, Err(LoadError::Corrupt("missing entries")));
test(
vec![None, None].into(),
Err(LoadError::Corrupt("missing key")),
);
test(
vec![vec![].into(), None].into(),
Err(LoadError::Corrupt("missing val")),
);
test(
vec![vec![1].into(), vec![].into(), vec![1].into(), vec![].into()].into(),
Err(LoadError::Corrupt("duplicate key")),
);
test(
vec![vec![1].into(), vec![].into(), vec![0].into(), vec![].into()].into(),
Err(LoadError::Corrupt("unsorted key")),
);
}
fn make_leaf(kv: Option<Vec<Option<Vec<u8>>>>) -> Chunk {
let mut builder = FlatBufferBuilder::default();
let mut entries: Option<
flatbuffers::WIPOffset<
flatbuffers::Vector<flatbuffers::ForwardsUOffset<leaf::LeafEntry>>,
>,
> = None;
if let Some(kv) = kv {
let mut temp = Vec::<flatbuffers::WIPOffset<leaf::LeafEntry>>::new();
for i in 0..kv.len() / 2 {
let key = &kv[i * 2];
let val = &kv[i * 2 + 1];
let mut args = leaf::LeafEntryArgs {
key: None,
val: None,
};
if let Some(key) = key {
args.key = builder.create_vector(key.as_slice()).into();
}
if let Some(val) = val {
args.val = builder.create_vector(val.as_slice()).into();
}
temp.push(leaf::LeafEntry::create(&mut builder, &args));
}
entries = builder.create_vector(temp.as_slice()).into();
}
let leaf = leaf::Leaf::create(&mut builder, &leaf::LeafArgs { entries });
builder.finish(leaf, None);
Chunk::new(builder.collapse(), vec![].as_slice())
}
}