Skip to content

Commit

Permalink
Merge pull request #15 from sile/unicode-safe-keys
Browse files Browse the repository at this point in the history
Add `PatriciaMap::insert_str()` and `PatriciaSet::insert_str()` methods
  • Loading branch information
sile committed Jan 6, 2023
2 parents 1384257 + 85a6d5a commit 81c1626
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 8 deletions.
15 changes: 7 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
toolchain: [stable]
# [NOTE] Clippy checks on beta and nightly fails due to a tool's bug
# toolchain: [stable, beta, nightly]
toolchain: [stable, beta, nightly]
steps:
- name: Checkout sources
uses: actions/checkout@v1
Expand All @@ -77,8 +75,9 @@ jobs:
command: fmt
args: --all -- --check

- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
command: clippy
args: --all-features --all -- -D warnings
# [NOTE] Clippy check fails due to a tool's bug
# - name: Run cargo clippy
# uses: actions-rs/cargo@v1
# with:
# command: clippy
# args: --all-features --all -- -D warnings
51 changes: 51 additions & 0 deletions src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,37 @@ impl<V> PatriciaMap<V> {
self.tree.insert(key, value)
}

/// As with [`PatriciaMap::insert()`] except for that this method regards UTF-8 character boundaries of the input key.
///
/// # Examples
///
/// ```
/// use patricia_tree::PatriciaMap;
///
/// // Insert keys as opaque byte strings.
/// //
/// // Node labels can be arbitrary byte strings.
/// let mut map = PatriciaMap::new();
/// map.insert("🌏🗻", ()); // [240, 159, 140, 143, 240, 159, 151, 187]
/// map.insert("🌏🍔", ()); // [240, 159, 140, 143, 240, 159, 141, 148]
///
/// let first_label = map.as_ref().child().unwrap().label();
/// assert_eq!(first_label, [240, 159, 140, 143, 240, 159]);
///
/// // Insert keys as UTF-8 strings.
/// //
/// // Node labels are guaranteed to be UTF-8 byte strings.
/// let mut map = PatriciaMap::new();
/// map.insert_str("🌏🗻", ());
/// map.insert_str("🌏🍔", ());
///
/// let first_label = map.as_ref().child().unwrap().label();
/// assert_eq!(first_label, "🌏".as_bytes());
/// ```
pub fn insert_str(&mut self, key: &str, value: V) -> Option<V> {
self.tree.insert_str(key, value)
}

/// Removes a key from this map, returning the value at the key if the key was previously in it.
///
/// # Examples
Expand Down Expand Up @@ -845,4 +876,24 @@ mod tests {

assert!(vec![0 as u16, 1, 2].into_iter().eq(results.into_iter()));
}

#[test]
fn utf8_keys_works() {
// Insert as bytes.
let mut t = PatriciaMap::new();
t.insert("🌏🗻", ()); // [240,159,140,143,240,159,151,187]
t.insert("🌏🍔", ()); // [240,159,140,143,240,159,141,148]

let first_label = t.as_ref().child().unwrap().label();
assert!(std::str::from_utf8(first_label).is_err());
assert_eq!(first_label, [240, 159, 140, 143, 240, 159]);

// Insert as string.
let mut t = PatriciaMap::new();
t.insert_str("🌏🗻", ());
t.insert_str("🌏🍔", ());

let first_label = t.as_ref().child().unwrap().label();
assert_eq!(std::str::from_utf8(first_label).ok(), Some("🌏"));
}
}
58 changes: 58 additions & 0 deletions src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -691,13 +691,71 @@ impl<V> Node<V> {
}
}

pub(crate) fn insert_str(&mut self, key: &str, value: V) -> Option<V> {
if self.label().get(0) > key.as_bytes().get(0) {
let this = Node {
ptr: self.ptr,
_value: PhantomData,
};
let node = Node::new(key.as_bytes(), Some(value), None, Some(this));
self.ptr = node.ptr;
mem::forget(node);
return None;
}

let common_prefix_len = self.skip_str_common_prefix(key);
let next = &key[common_prefix_len..];
let is_label_matched = common_prefix_len == self.label().len();
if next.is_empty() {
if is_label_matched {
let old = self.take_value();
self.set_value(value);
old
} else {
self.split_at(common_prefix_len);
self.set_value(value);
None
}
} else if is_label_matched {
if let Some(child) = self.child_mut() {
return child.insert_str(next, value);
}
let child = Node::new(next.as_bytes(), Some(value), None, None);
self.set_child(child);
None
} else if common_prefix_len == 0 {
if let Some(sibling) = self.sibling_mut() {
return sibling.insert_str(next, value);
}
let sibling = Node::new(next.as_bytes(), Some(value), None, None);
self.set_sibling(sibling);
None
} else {
self.split_at(common_prefix_len);
assert_some!(self.child_mut()).insert_str(next, value);
None
}
}

fn skip_common_prefix(&self, key: &[u8]) -> usize {
self.label()
.iter()
.zip(key.iter())
.take_while(|x| x.0 == x.1)
.count()
}
fn skip_str_common_prefix(&self, key: &str) -> usize {
for (i, c) in key.char_indices() {
let n = c.len_utf8();
if key.as_bytes()[i..i + n]
.iter()
.ne(self.label()[i..].iter().take(n))
{
return i;
}
}
key.len()
}
pub(crate) fn flags(&self) -> Flags {
Flags::from_bits_truncate(unsafe { *self.ptr })
}
Expand Down
31 changes: 31 additions & 0 deletions src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,37 @@ impl PatriciaSet {
self.map.insert(value, ()).is_none()
}

/// As with [`PatriciaSet::insert()`] except for that this method regards UTF-8 character boundaries of the input value.
///
/// # Examples
///
/// ```
/// use patricia_tree::PatriciaSet;
///
/// // Insert values as opaque byte strings.
/// //
/// // Node labels can be arbitrary byte strings.
/// let mut set = PatriciaSet::new();
/// set.insert("🌏🗻"); // [240, 159, 140, 143, 240, 159, 151, 187]
/// set.insert("🌏🍔"); // [240, 159, 140, 143, 240, 159, 141, 148]
///
/// let first_label = set.as_ref().child().unwrap().label();
/// assert_eq!(first_label, [240, 159, 140, 143, 240, 159]);
///
/// // Insert values as UTF-8 strings.
/// //
/// // Node labels are guaranteed to be UTF-8 byte strings.
/// let mut set = PatriciaSet::new();
/// set.insert_str("🌏🗻");
/// set.insert_str("🌏🍔");
///
/// let first_label = set.as_ref().child().unwrap().label();
/// assert_eq!(first_label, "🌏".as_bytes());
/// ```
pub fn insert_str(&mut self, value: &str) -> bool {
self.map.insert_str(value, ()).is_none()
}

/// Removes a value from the set. Returns `true` is the value was present in this set.
///
/// # Examples
Expand Down
8 changes: 8 additions & 0 deletions src/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ impl<V> PatriciaTree<V> {
None
}
}
pub fn insert_str(&mut self, key: &str, value: V) -> Option<V> {
if let Some(old) = self.root.insert_str(key, value) {
Some(old)
} else {
self.len += 1;
None
}
}
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<&V> {
self.root.get(key.as_ref())
}
Expand Down

0 comments on commit 81c1626

Please sign in to comment.