Skip to content

Commit

Permalink
per(rust, python): use global string cache for rev cat (#6067)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 5, 2023
1 parent 41f0aa6 commit a8e7e4a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,16 @@ impl RevMapping {
/// str to Categorical
pub fn find(&self, value: &str) -> Option<u32> {
match self {
Self::Global(map, a, _) => {
map.iter()
Self::Global(rev_map, a, id) => {
// fast path is check
if using_string_cache() {
let map = crate::STRING_CACHE.read_map();
if map.uuid == *id {
return map.get_cat(value);
}
}
rev_map
.iter()
// Safety:
// value is always within bounds
.find(|(_k, &v)| (unsafe { a.value_unchecked(v as usize) } == value))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,22 @@ impl SCacheInner {
global_idx
}

#[inline]
pub(crate) fn get_cat(&self, s: &str) -> Option<u32> {
let h = StringCache::get_hash_builder().hash_single(s);
// as StrHashGlobal may allocate a string
self.map
.raw_entry()
.from_hash(h, |key| {
(key.hash == h) && {
let pos = key.idx as usize;
let value = unsafe { self.payloads.get_unchecked(pos) };
s == value.as_str()
}
})
.map(|(k, _)| k.idx)
}

#[inline]
pub(crate) fn insert(&mut self, s: &str) -> u32 {
let h = StringCache::get_hash_builder().hash_single(s);
Expand Down

0 comments on commit a8e7e4a

Please sign in to comment.