Skip to content

Commit

Permalink
Redesign logical types (#1489)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 6, 2021
1 parent c9eb93e commit f167d3f
Show file tree
Hide file tree
Showing 37 changed files with 1,998 additions and 1,767 deletions.
42 changes: 20 additions & 22 deletions polars/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ where
Arc::new(array)
}

pub(crate) fn cast_chunks(chunks: &[ArrayRef], dtype: DataType) -> Result<Vec<ArrayRef>> {
let chunks = chunks
.iter()
.map(|arr| cast::cast(arr.as_ref(), &dtype.to_arrow()))
.map(|arr| arr.map(|x| x.into()))
.collect::<arrow::error::Result<Vec<_>>>()?;
Ok(chunks)
}

fn cast_ca<N, T>(ca: &ChunkedArray<T>) -> Result<ChunkedArray<N>>
where
N: PolarsDataType,
Expand All @@ -25,17 +34,11 @@ where
if N::get_dtype() == T::get_dtype() {
return Ok(ChunkedArray::new_from_chunks(ca.name(), ca.chunks.clone()));
};
let chunks = ca
.chunks
.iter()
.map(|arr| cast::cast(arr.as_ref(), &N::get_dtype().to_arrow()))
.map(|arr| arr.map(|x| x.into()))
.collect::<arrow::error::Result<Vec<_>>>()?;

let chunks = cast_chunks(&ca.chunks, N::get_dtype())?;
Ok(ChunkedArray::new_from_chunks(ca.field.name(), chunks))
}

fn cast_from_dtype<N, T>(chunked: &ChunkedArray<T>, dtype: DataType) -> Result<ChunkedArray<N>>
fn cast_from_dtype<T, N>(chunked: &ChunkedArray<T>, dtype: DataType) -> Result<ChunkedArray<N>>
where
N: PolarsNumericType,
T: PolarsNumericType,
Expand Down Expand Up @@ -70,9 +73,13 @@ macro_rules! cast_with_dtype {
Float32 => ChunkCast::cast::<Float32Type>($self).map(|ca| ca.into_series()),
Float64 => ChunkCast::cast::<Float64Type>($self).map(|ca| ca.into_series()),
#[cfg(feature = "dtype-date32")]
Date32 => ChunkCast::cast::<Date32Type>($self).map(|ca| ca.into_series()),
Date32 => {
ChunkCast::cast::<Int32Type>($self).map(|ca| Date32Chunked::new(ca).into_series())
}
#[cfg(feature = "dtype-date64")]
Date64 => ChunkCast::cast::<Date64Type>($self).map(|ca| ca.into_series()),
Date64 => {
ChunkCast::cast::<Int64Type>($self).map(|ca| Date64Chunked::new(ca).into_series())
}
List(_) => ChunkCast::cast::<ListType>($self).map(|ca| ca.into_series()),
#[cfg(feature = "dtype-categorical")]
Categorical => ChunkCast::cast::<CategoricalType>($self).map(|ca| ca.into_series()),
Expand Down Expand Up @@ -153,19 +160,10 @@ where
// paths not supported by arrow kernel
// to float32
(Date32, Float32) | (Date64, Float32) => {
cast_from_dtype::<Float32Type, _>(self, Float32)?.cast::<N>()
}
// to float64
(Date32, Float64) | (Date64, Float64) => {
cast_from_dtype::<Float64Type, _>(self, Float64)?.cast::<N>()
}
// to date64
(Int32, Date64) | (Float64, Date64) | (Float32, Date64) => {
cast_from_dtype::<Date64Type, _>(self, Date64)?.cast::<N>()
cast_from_dtype::<_, Float32Type>(self, Float32)?.cast::<N>()
}
// to date32
(Int64, Date32) | (Float64, Date32) | (Float32, Date32) => {
cast_from_dtype::<Date32Type, _>(self, Date32)?.cast::<N>()
(_, Date32) | (_, Date64) => {
panic!("use cast_with_dtype for casting date types")
}
_ => cast_ca(self),
};
Expand Down
11 changes: 11 additions & 0 deletions polars/polars-core/src/chunked_array/categorical/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,17 @@ impl From<&CategoricalChunked> for DictionaryArray<i64> {
}
}

impl CategoricalChunked {
/// Get a reference to the mapping of categorical types to the string values.
pub fn get_categorical_map(&self) -> Option<&Arc<RevMapping>> {
self.categorical_map.as_ref()
}

pub(crate) fn set_categorical_map(&mut self, categorical_map: Arc<RevMapping>) {
self.categorical_map = Some(categorical_map)
}
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
22 changes: 22 additions & 0 deletions polars/polars-core/src/chunked_array/logical/date32.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use super::*;
use crate::prelude::*;

pub type Date32Chunked = Logical<Date32Type, Int32Type>;

impl From<Int32Chunked> for Date32Chunked {
fn from(ca: Int32Chunked) -> Self {
Date32Chunked::new(ca)
}
}

impl Int32Chunked {
pub fn into_date(self) -> Date32Chunked {
Date32Chunked::new(self)
}
}

impl LogicalType for Date32Chunked {
fn dtype(&self) -> &'static DataType {
&DataType::Date32
}
}
22 changes: 22 additions & 0 deletions polars/polars-core/src/chunked_array/logical/date64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use super::*;
use crate::prelude::*;

pub type Date64Chunked = Logical<Date64Type, Int64Type>;

impl From<Int64Chunked> for Date64Chunked {
fn from(ca: Int64Chunked) -> Self {
Date64Chunked::new(ca)
}
}

impl Int64Chunked {
pub fn into_date(self) -> Date64Chunked {
Date64Chunked::new(self)
}
}

impl LogicalType for Date64Chunked {
fn dtype(&self) -> &'static DataType {
&DataType::Date64
}
}
53 changes: 53 additions & 0 deletions polars/polars-core/src/chunked_array/logical/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
mod date32;
mod date64;

pub use {date32::*, date64::*};

use crate::prelude::*;
use std::marker::PhantomData;
use std::ops::{Deref, DerefMut};

/// Maps a logical type to a a chunked array implementation of the physical type.
/// This saves a lot of compiler bloat and allows us to reuse functionality.
pub struct Logical<K: PolarsDataType, T: PolarsDataType>(pub ChunkedArray<T>, PhantomData<K>);

impl<K: PolarsDataType, T: PolarsDataType> Clone for Logical<K, T> {
fn clone(&self) -> Self {
Logical::<K, _>::new(self.0.clone())
}
}

impl<K: PolarsDataType, T: PolarsDataType> Deref for Logical<K, T> {
type Target = ChunkedArray<T>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<K: PolarsDataType, T: PolarsDataType> DerefMut for Logical<K, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl<K: PolarsDataType, T: PolarsDataType> Logical<K, T> {
pub fn new<J: PolarsDataType>(ca: ChunkedArray<T>) -> Logical<J, T> {
Logical(ca, PhantomData)
}
}

pub trait LogicalType {
/// Get data type of ChunkedArray.
fn dtype(&self) -> &'static DataType;
}

impl<K: PolarsDataType, T: PolarsDataType> Logical<K, T>
where
Self: LogicalType,
{
pub fn field(&self) -> Field {
let name = self.0.ref_field().name();
Field::new(name, LogicalType::dtype(self).clone())
}
}
5 changes: 1 addition & 4 deletions polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ mod bitwise;
#[cfg(feature = "dtype-categorical")]
pub(crate) mod categorical;
pub(crate) mod list;
pub(crate) mod logical;

use polars_arrow::prelude::*;

Expand Down Expand Up @@ -178,10 +179,6 @@ impl<T> ChunkedArray<T> {
self.bit_settings |= 1
}
}
/// Get a reference to the mapping of categorical types to the string values.
pub fn get_categorical_map(&self) -> Option<&Arc<RevMapping>> {
self.categorical_map.as_ref()
}

/// Get the index of the first non null value in this ChunkedArray.
pub fn first_non_null(&self) -> Option<usize> {
Expand Down
18 changes: 10 additions & 8 deletions polars/polars-core/src/chunked_array/temporal/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ impl Utf8Chunked {
None => self.sniff_fmt_date32()?,
};

let mut ca: Date32Chunked = match self.null_count() {
let mut ca: Int32Chunked = match self.null_count() {
0 => self
.into_no_null_iter()
.map(|s| {
Expand All @@ -150,7 +150,7 @@ impl Utf8Chunked {
.collect_trusted(),
};
ca.rename(self.name());
Ok(ca)
Ok(ca.into())
}

#[cfg(feature = "dtype-date64")]
Expand All @@ -160,7 +160,7 @@ impl Utf8Chunked {
None => self.sniff_fmt_date64()?,
};

let mut ca: Date64Chunked = match self.null_count() {
let mut ca: Int64Chunked = match self.null_count() {
0 => self
.into_no_null_iter()
.map(|s| {
Expand All @@ -186,7 +186,7 @@ impl Utf8Chunked {
.collect_trusted(),
};
ca.rename(self.name());
Ok(ca)
Ok(ca.into())
}
}

Expand Down Expand Up @@ -271,11 +271,11 @@ impl Date64Chunked {
.iter()
.map(naive_datetime_to_date64)
.collect_trusted::<AlignedVec<_>>();
ChunkedArray::new_from_aligned_vec(name, vals)
Int64Chunked::new_from_aligned_vec(name, vals).into()
}

pub fn parse_from_str_slice(name: &str, v: &[&str], fmt: &str) -> Self {
ChunkedArray::new_from_opt_iter(
Int64Chunked::new_from_opt_iter(
name,
v.iter().map(|s| {
NaiveDateTime::parse_from_str(s, fmt)
Expand All @@ -284,6 +284,7 @@ impl Date64Chunked {
.map(naive_datetime_to_date64)
}),
)
.into()
}
}

Expand Down Expand Up @@ -344,11 +345,11 @@ impl Date32Chunked {
.iter()
.map(|v| naive_date_to_date32(*v))
.collect::<AlignedVec<_>>();
ChunkedArray::new_from_aligned_vec(name, unit)
Int32Chunked::new_from_aligned_vec(name, unit).into()
}

pub fn parse_from_str_slice(name: &str, v: &[&str], fmt: &str) -> Self {
ChunkedArray::new_from_opt_iter(
Int32Chunked::new_from_opt_iter(
name,
v.iter().map(|s| {
NaiveDate::parse_from_str(s, fmt)
Expand All @@ -357,5 +358,6 @@ impl Date32Chunked {
.map(|v| naive_date_to_date32(*v))
}),
)
.into()
}
}
23 changes: 12 additions & 11 deletions polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
//!
#[cfg(feature = "dtype-categorical")]
use crate::chunked_array::categorical::RevMapping;
pub use crate::chunked_array::logical::*;
#[cfg(feature = "object")]
use crate::chunked_array::object::PolarsObjectSafe;
use crate::prelude::*;
Expand Down Expand Up @@ -122,8 +123,6 @@ pub type Int64Chunked = ChunkedArray<Int64Type>;
pub type Float32Chunked = ChunkedArray<Float32Type>;
pub type Float64Chunked = ChunkedArray<Float64Type>;
pub type Utf8Chunked = ChunkedArray<Utf8Type>;
pub type Date32Chunked = ChunkedArray<Date32Type>;
pub type Date64Chunked = ChunkedArray<Date64Type>;
pub type CategoricalChunked = ChunkedArray<CategoricalType>;

pub trait NumericNative:
Expand Down Expand Up @@ -188,12 +187,6 @@ impl PolarsNumericType for Float32Type {
impl PolarsNumericType for Float64Type {
type Native = f64;
}
impl PolarsNumericType for Date32Type {
type Native = i32;
}
impl PolarsNumericType for Date64Type {
type Native = i64;
}

pub trait PolarsIntegerType: PolarsNumericType {}
impl PolarsIntegerType for UInt8Type {}
Expand All @@ -204,8 +197,6 @@ impl PolarsIntegerType for Int8Type {}
impl PolarsIntegerType for Int16Type {}
impl PolarsIntegerType for Int32Type {}
impl PolarsIntegerType for Int64Type {}
impl PolarsIntegerType for Date32Type {}
impl PolarsIntegerType for Date64Type {}

pub trait PolarsFloatType: PolarsNumericType {}
impl PolarsFloatType for Float32Type {}
Expand Down Expand Up @@ -320,7 +311,17 @@ where
}

impl<'a> AnyValue<'a> {
pub fn add<'b>(&self, rhs: &AnyValue<'b>) -> AnyValue<'a> {
pub(crate) fn into_date(self) -> Self {
match self {
#[cfg(feature = "dtype-date32")]
AnyValue::Int32(v) => AnyValue::Date32(v),
#[cfg(feature = "dtype-date64")]
AnyValue::Int64(v) => AnyValue::Date64(v),
_ => panic!("cannot create date from other type"),
}
}

pub fn add<'b>(&self, rhs: &AnyValue<'b>) -> Self {
use AnyValue::*;
match (self, rhs) {
(Null, _) => Null,
Expand Down
12 changes: 12 additions & 0 deletions polars/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,18 @@ impl FmtList for CategoricalChunked {
}
}

impl FmtList for Date32Chunked {
fn fmt_list(&self) -> String {
impl_fmt_list!(self)
}
}

impl FmtList for Date64Chunked {
fn fmt_list(&self) -> String {
impl_fmt_list!(self)
}
}

#[cfg(feature = "object")]
impl<T> FmtList for ObjectChunked<T> {
fn fmt_list(&self) -> String {
Expand Down
5 changes: 3 additions & 2 deletions polars/polars-core/src/frame/asof_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use num::Bounded;
use polars_arrow::trusted_len::PushUnchecked;

pub(crate) trait JoinAsof<T: PolarsDataType> {
fn join_asof(&self, _other: &ChunkedArray<T>) -> Result<Vec<Option<u32>>> {
fn join_asof(&self, _other: &Series) -> Result<Vec<Option<u32>>> {
Err(PolarsError::InvalidOperation(
format!(
"asof join not implemented for key with dtype: {:?}",
Expand All @@ -20,7 +20,8 @@ where
T: PolarsNumericType,
T::Native: Bounded + PartialOrd,
{
fn join_asof(&self, other: &ChunkedArray<T>) -> Result<Vec<Option<u32>>> {
fn join_asof(&self, other: &Series) -> Result<Vec<Option<u32>>> {
let other = self.unpack_series_matching_type(other)?;
let mut rhs_iter = other.into_iter();
let mut tuples = Vec::with_capacity(self.len());
if self.null_count() > 0 {
Expand Down

0 comments on commit f167d3f

Please sign in to comment.