Skip to content

Commit

Permalink
improve from dictionary -> categorical (#3996)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 13, 2022
1 parent 4d541a0 commit f6d2c66
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
28 changes: 28 additions & 0 deletions polars/polars-core/src/chunked_array/logical/categorical/from.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::*;
use crate::use_string_cache;
use arrow::array::DictionaryArray;
use arrow::datatypes::IntegerType;
use polars_arrow::compute::cast::cast;
Expand Down Expand Up @@ -80,3 +81,30 @@ impl From<&CategoricalChunked> for DictionaryArray<i64> {
}
}
}

impl CategoricalChunked {
/// # Safety
/// The caller must ensure that index values in the `keys` are in within bounds of the `values` length.
pub(crate) unsafe fn from_keys_and_values(
name: &str,
keys: &PrimitiveArray<u32>,
values: &Utf8Array<i64>,
) -> Self {
if use_string_cache() {
// todo!
// we probably can make this faster as we have more information.
let mut builder = CategoricalChunkedBuilder::new(name, keys.len());
let iter = keys
.into_iter()
.map(|opt_key| opt_key.map(|k| values.value_unchecked(*k as usize)));
builder.drain_iter(iter);
builder.finish()
} else {
CategoricalChunked::from_chunks_original(
name,
vec![Box::new(keys.clone())],
RevMapping::Local(values.clone()),
)
}
}
}
9 changes: 3 additions & 6 deletions polars/polars-core/src/series/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,9 @@ impl Series {
let keys = keys.as_any().downcast_ref::<PrimitiveArray<u32>>().unwrap();
let values = values.as_any().downcast_ref::<Utf8Array<i64>>().unwrap();

let mut builder = CategoricalChunkedBuilder::new(name, keys.len());
let iter = keys
.into_iter()
.map(|opt_key| opt_key.map(|k| values.value_unchecked(*k as usize)));
builder.drain_iter(iter);
Ok(builder.finish().into_series())
// Safety
// the invariants of an Arrow Dictionary guarantee the keys are in bounds
Ok(CategoricalChunked::from_keys_and_values(name, keys, values).into_series())
}
#[cfg(not(feature = "dtype-u8"))]
ArrowDataType::LargeBinary | ArrowDataType::Binary => {
Expand Down

0 comments on commit f6d2c66

Please sign in to comment.