Skip to content

Commit

Permalink
feature gate categorical dtype
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 17, 2021
1 parent 7c0e034 commit fce369f
Show file tree
Hide file tree
Showing 42 changed files with 141 additions and 11 deletions.
4 changes: 3 additions & 1 deletion polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ dtype-i16 = ["polars-core/dtype-i16", "polars-lazy/dtype-i16"]
dtype-u8 = ["polars-core/dtype-u8", "polars-lazy/dtype-u8"]
dtype-u16 = ["polars-core/dtype-u16", "polars-lazy/dtype-u16"]
dtype-u64 = ["polars-core/dtype-u64", "polars-lazy/dtype-u64", "polars-io/dtype-u64"]
dtype-categorical = ["polars-core/dtype-categorical"]

docs-selection = [
"csv-file",
Expand Down Expand Up @@ -161,7 +162,8 @@ docs-selection = [
"cum_agg",
"rolling_window",
"interpolate",
"diff"
"diff",
"dtype-categorical"
]

[dependencies]
Expand Down
4 changes: 3 additions & 1 deletion polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ dtype-i16 = []
dtype-u8 = []
dtype-u16 = []
dtype-u64 = []
dtype-categorical = []

parquet = ["arrow/io_parquet"]

Expand Down Expand Up @@ -119,7 +120,8 @@ docs-selection = [
"rolling_window",
"interpolate",
"diff",
"moment"
"moment",
"dtype-categorical"
]

[dependencies]
Expand Down
5 changes: 4 additions & 1 deletion polars/polars-core/src/chunked_array/builder/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#[cfg(feature = "dtype-categorical")]
pub mod categorical;
#[cfg(feature = "dtype-categorical")]
pub use self::categorical::CategoricalChunkedBuilder;
use crate::{
prelude::*,
Expand Down Expand Up @@ -607,7 +609,6 @@ pub fn get_list_builder(
#[cfg(test)]
mod test {
use super::*;
use crate::{reset_string_cache, toggle_string_cache};

#[test]
fn test_primitive_builder() {
Expand Down Expand Up @@ -658,7 +659,9 @@ mod test {
}

#[test]
#[cfg(feature = "dtype-categorical")]
fn test_categorical_builder() {
use crate::{reset_string_cache, toggle_string_cache};
let _lock = crate::SINGLE_LOCK.lock();
for b in &[false, true] {
reset_string_cache();
Expand Down
7 changes: 7 additions & 0 deletions polars/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//! Implementations of the ChunkCast Trait.
#[cfg(feature = "dtype-categorical")]
use crate::chunked_array::builder::CategoricalChunkedBuilder;
use crate::chunked_array::kernels::cast_physical;
use crate::prelude::*;
Expand Down Expand Up @@ -70,6 +71,7 @@ macro_rules! cast_with_dtype {
ChunkCast::cast::<Time64NanosecondType>($self).map(|ca| ca.into_series())
}
List(_) => ChunkCast::cast::<ListType>($self).map(|ca| ca.into_series()),
#[cfg(feature = "dtype-categorical")]
Categorical => ChunkCast::cast::<CategoricalType>($self).map(|ca| ca.into_series()),
dt => Err(PolarsError::ComputeError(
format!(
Expand All @@ -83,6 +85,7 @@ macro_rules! cast_with_dtype {
}};
}

#[cfg(feature = "dtype-categorical")]
impl ChunkCast for CategoricalChunked {
fn cast<N>(&self) -> Result<ChunkedArray<N>>
where
Expand Down Expand Up @@ -114,6 +117,7 @@ impl ChunkCast for CategoricalChunked {
ca.field = Arc::new(Field::new(ca.name(), DataType::UInt32));
Ok(ca)
}
#[cfg(feature = "dtype-categorical")]
DataType::Categorical => {
let mut out = ChunkedArray::new_from_chunks(self.name(), self.chunks.clone());
out.categorical_map = self.categorical_map.clone();
Expand All @@ -138,6 +142,7 @@ where
{
use DataType::*;
let ca = match (T::get_dtype(), N::get_dtype()) {
#[cfg(feature = "dtype-categorical")]
(UInt32, Categorical) => {
let mut ca: ChunkedArray<N> = unsafe { std::mem::transmute(self.clone()) };
ca.field = Arc::new(Field::new(ca.name(), DataType::Categorical));
Expand Down Expand Up @@ -179,6 +184,7 @@ impl ChunkCast for Utf8Chunked {
N: PolarsDataType,
{
match N::get_dtype() {
#[cfg(feature = "dtype-categorical")]
DataType::Categorical => {
let iter = self.into_iter();
let mut builder = CategoricalChunkedBuilder::new(self.name(), self.len());
Expand Down Expand Up @@ -300,6 +306,7 @@ mod test {
}

#[test]
#[cfg(feature = "dtype-categorical")]
fn test_cast_noop() {
// check if we can cast categorical twice without panic
let ca = Utf8Chunked::new_from_slice("foo", &["bar", "ham"]);
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,7 @@ impl ChunkEqualElement for Utf8Chunked {
}

impl ChunkEqualElement for ListChunked {}
#[cfg(feature = "dtype-categorical")]
impl ChunkEqualElement for CategoricalChunked {
unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool {
let ca_other = other.as_ref().as_ref();
Expand Down
4 changes: 4 additions & 0 deletions polars/polars-core/src/chunked_array/iterator/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#[cfg(feature = "dtype-categorical")]
use crate::datatypes::CategoricalChunked;
use crate::prelude::*;
use crate::utils::CustomIterTools;
use arrow::array::*;
use std::convert::TryFrom;
#[cfg(feature = "dtype-categorical")]
use std::ops::Deref;

type LargeStringArray = Utf8Array<i64>;
Expand Down Expand Up @@ -43,6 +45,7 @@ where
}
}

#[cfg(feature = "dtype-categorical")]
impl<'a> IntoIterator for &'a CategoricalChunked {
type Item = Option<u32>;
type IntoIter = Box<dyn PolarsIterator<Item = Self::Item> + 'a>;
Expand Down Expand Up @@ -362,6 +365,7 @@ where

impl<I> ExactSizeIterator for SomeIterator<I> where I: ExactSizeIterator {}

#[cfg(feature = "dtype-categorical")]
impl CategoricalChunked {
#[allow(clippy::wrong_self_convention)]
pub fn into_no_null_iter(
Expand Down
2 changes: 0 additions & 2 deletions polars/polars-core/src/chunked_array/list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,6 @@ impl ListChunked {
mod test {
use super::*;
use crate::chunked_array::builder::get_list_builder;
use std::mem::ManuallyDrop;
use std::ops::DerefMut;

#[test]
fn test_iter_list() {
Expand Down
10 changes: 9 additions & 1 deletion polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@ use arrow::array::Array;
pub(crate) mod list;
use polars_arrow::prelude::*;

#[cfg(feature = "dtype-categorical")]
use crate::chunked_array::builder::categorical::RevMapping;
use crate::utils::{slice_offsets, CustomIterTools};
use std::mem;
use std::ops::{Deref, DerefMut};

#[cfg(not(feature = "dtype-categorical"))]
pub struct RevMapping {}

pub type ChunkIdIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;

/// # ChunkedArray
Expand Down Expand Up @@ -436,6 +440,7 @@ impl<T> ChunkedArray<T> {
where
Self: std::marker::Sized,
{
#[cfg(feature = "dtype-categorical")]
if let (Some(rev_map_l), Some(rev_map_r)) = (
self.categorical_map.as_ref(),
other.categorical_map.as_ref(),
Expand Down Expand Up @@ -580,6 +585,7 @@ where
let s = Series::try_from(("", v));
AnyValue::List(s.unwrap())
}
#[cfg(feature = "dtype-categorical")]
DataType::Categorical => {
let v = downcast!(UInt32Array);
AnyValue::Utf8(self.categorical_map.as_ref().expect("should be set").get(v))
Expand Down Expand Up @@ -822,6 +828,7 @@ impl From<UInt32Chunked> for CategoricalChunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl CategoricalChunked {
fn set_state<T>(mut self, other: &ChunkedArray<T>) -> Self {
self.categorical_map = other.categorical_map.clone();
Expand Down Expand Up @@ -878,7 +885,6 @@ impl<T: PolarsNumericType> From<PrimitiveArray<T::Native>> for ChunkedArray<T> {
#[cfg(test)]
pub(crate) mod test {
use crate::prelude::*;
use crate::reset_string_cache;

pub(crate) fn get_chunked_array() -> Int32Chunked {
ChunkedArray::new_from_slice("a", &[1, 2, 3])
Expand Down Expand Up @@ -1055,7 +1061,9 @@ pub(crate) mod test {
}

#[test]
#[cfg(feature = "dtype-categorical")]
fn test_iter_categorical() {
use crate::reset_string_cache;
use crate::SINGLE_LOCK;
let _lock = SINGLE_LOCK.lock();
reset_string_cache();
Expand Down
4 changes: 4 additions & 0 deletions polars/polars-core/src/chunked_array/ops/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ impl ChunkVar<f64> for Float64Chunked {

impl ChunkVar<String> for Utf8Chunked {}
impl ChunkVar<Series> for ListChunked {}
#[cfg(feature = "dtype-categorical")]
impl ChunkVar<u32> for CategoricalChunked {}
#[cfg(feature = "object")]
impl<T> ChunkVar<Series> for ObjectChunked<T> {}
Expand Down Expand Up @@ -342,6 +343,7 @@ impl VarAggSeries for BooleanChunked {
Self::full_null(self.name(), 1).into_series()
}
}
#[cfg(feature = "dtype-categorical")]
impl VarAggSeries for CategoricalChunked {
fn var_as_series(&self) -> Series {
self.cast::<UInt32Type>().unwrap().var_as_series()
Expand Down Expand Up @@ -439,6 +441,7 @@ impl ChunkAggSeries for Utf8Chunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl ChunkAggSeries for CategoricalChunked {}

macro_rules! one_null_list {
Expand Down Expand Up @@ -493,6 +496,7 @@ where
}

impl ArgAgg for BooleanChunked {}
#[cfg(feature = "dtype-categorical")]
impl ArgAgg for CategoricalChunked {}
impl ArgAgg for Utf8Chunked {}
impl ArgAgg for ListChunked {}
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/ops/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ impl ChunkAnyValue for ListChunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl ChunkAnyValue for CategoricalChunked {
#[inline]
unsafe fn get_any_value_unchecked(&self, index: usize) -> AnyValue {
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/ops/bit_repr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ where
}
}

#[cfg(feature = "dtype-categorical")]
impl ToBitRepr for CategoricalChunked {
fn bit_repr_is_large() -> bool {
// u32
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ impl ChunkOps for Utf8Chunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl ChunkOps for CategoricalChunked {
fn rechunk(&self) -> Self
where
Expand Down Expand Up @@ -136,6 +137,7 @@ mod test {
use crate::prelude::*;

#[test]
#[cfg(feature = "dtype-categorical")]
fn test_categorical_map_after_rechunk() {
let s = Series::new("", &["foo", "bar", "spam"]);
let mut a = s.cast::<CategoricalType>().unwrap();
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/src/chunked_array/ops/compare_inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ impl<'a> IntoPartialEqInner<'a> for &'a ListChunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl<'a> IntoPartialEqInner<'a> for &'a CategoricalChunked {
fn into_partial_eq_inner(self) -> Box<dyn PartialEqInner> {
unimplemented!()
Expand Down Expand Up @@ -281,6 +282,7 @@ impl<'a> IntoPartialOrdInner<'a> for &'a ListChunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl<'a> IntoPartialOrdInner<'a> for &'a CategoricalChunked {
fn into_partial_ord_inner(self) -> Box<dyn PartialOrdInner> {
unimplemented!()
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/ops/cum_agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ where
}
}

#[cfg(feature = "dtype-categorical")]
impl ChunkCumAgg<CategoricalType> for CategoricalChunked {}
impl ChunkCumAgg<Utf8Type> for Utf8Chunked {}
impl ChunkCumAgg<ListType> for ListChunked {}
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/src/chunked_array/ops/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use arrow::{array::*, bitmap::MutableBitmap, buffer::Buffer};
use polars_arrow::bit_util::unset_bit_raw;
use polars_arrow::prelude::{FromDataUtf8, ValueSize};
use std::convert::TryFrom;
#[cfg(feature = "dtype-categorical")]
use std::ops::Deref;

pub(crate) trait ExplodeByOffsets {
Expand Down Expand Up @@ -168,6 +169,7 @@ impl ExplodeByOffsets for Utf8Chunked {
builder.finish().into()
}
}
#[cfg(feature = "dtype-categorical")]
impl ExplodeByOffsets for CategoricalChunked {
#[inline(never)]
fn explode_by_offsets(&self, offsets: &[i64]) -> Series {
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/ops/fill_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ impl ChunkFillNull for ListChunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl ChunkFillNull for CategoricalChunked {
fn fill_null(&self, _strategy: FillNullStrategy) -> Result<Self> {
Err(PolarsError::InvalidOperation(
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/src/chunked_array/ops/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::utils::align_chunks_binary;
#[cfg(feature = "object")]
use arrow::array::Array;
use arrow::compute::filter::filter as filter_fn;
#[cfg(feature = "dtype-categorical")]
use std::ops::Deref;

macro_rules! check_filter_len {
Expand Down Expand Up @@ -92,6 +93,7 @@ impl ChunkFilter<Utf8Type> for Utf8Chunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl ChunkFilter<CategoricalType> for CategoricalChunked {
fn filter(&self, filter: &BooleanChunked) -> Result<ChunkedArray<CategoricalType>>
where
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/ops/interpolate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ macro_rules! interpolate {
interpolate!(Utf8Chunked);
interpolate!(ListChunked);
interpolate!(BooleanChunked);
#[cfg(feature = "dtype-categorical")]
interpolate!(CategoricalChunked);

#[cfg(feature = "object")]
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/ops/is_in.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ impl IsIn for BooleanChunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl IsIn for CategoricalChunked {
fn is_in(&self, other: &Series) -> Result<BooleanChunked> {
self.cast::<UInt32Type>().unwrap().is_in(other)
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/src/chunked_array/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,7 @@ where
}
}

#[cfg(feature = "dtype-categorical")]
impl ChunkReverse<CategoricalType> for CategoricalChunked {
fn reverse(&self) -> ChunkedArray<CategoricalType> {
self.cast::<UInt32Type>().unwrap().reverse().cast().unwrap()
Expand Down Expand Up @@ -928,6 +929,7 @@ impl ChunkExpandAtIndex<Utf8Type> for Utf8Chunked {
}
}

#[cfg(feature = "dtype-categorical")]
impl ChunkExpandAtIndex<CategoricalType> for CategoricalChunked {
fn expand_at_index(&self, index: usize, length: usize) -> CategoricalChunked {
self.cast::<UInt32Type>()
Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/ops/peaks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ where

impl ChunkPeaks for BooleanChunked {}
impl ChunkPeaks for Utf8Chunked {}
#[cfg(feature = "dtype-categorical")]
impl ChunkPeaks for CategoricalChunked {}
impl ChunkPeaks for ListChunked {}

Expand Down
1 change: 1 addition & 0 deletions polars/polars-core/src/chunked_array/ops/repeat_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ impl RepeatBy for Utf8Chunked {

impl RepeatBy for ListChunked {}

#[cfg(feature = "dtype-categorical")]
impl RepeatBy for CategoricalChunked {
fn repeat_by(&self, by: &UInt32Chunked) -> ListChunked {
let mut ca = self.cast::<UInt32Type>().unwrap().repeat_by(by);
Expand Down

0 comments on commit fce369f

Please sign in to comment.