Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Apr 24, 2024
1 parent 800257b commit b64c935
Show file tree
Hide file tree
Showing 19 changed files with 230 additions and 445 deletions.
10 changes: 4 additions & 6 deletions crates/polars-core/src/datatypes/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,16 +228,14 @@ impl DataType {

/// Check if this [`DataType`] is a basic numeric type (excludes Decimal).
pub fn is_numeric(&self) -> bool {
self.is_float()
|| self.is_integer()
|| self.is_dynamic()
self.is_float() || self.is_integer() || self.is_dynamic()
}

pub fn is_dynamic(&self) -> bool {
matches!(
self,
DataType::Unknown(UnknownKind::Int(_) | UnknownKind::Float | UnknownKind::Str)
)
self,
DataType::Unknown(UnknownKind::Int(_) | UnknownKind::Float | UnknownKind::Str)
)
}

/// Check if this [`DataType`] is a boolean
Expand Down
12 changes: 7 additions & 5 deletions crates/polars-core/src/utils/supertype.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use num_traits::Signed;

use super::*;

/// Given two data types, determine the data type that both types can safely be cast to.
Expand Down Expand Up @@ -256,7 +257,7 @@ pub fn get_supertype(l: &DataType, r: &DataType) -> Option<DataType> {
}
#[cfg(feature = "dtype-struct")]
(Struct(inner), right @ Unknown(UnknownKind::Float | UnknownKind::Int(_))) => {
match inner.get(0) {
match inner.first() {
Some(inner) => get_supertype(&inner.dtype, right),
None => None
}
Expand All @@ -266,18 +267,19 @@ pub fn get_supertype(l: &DataType, r: &DataType) -> Option<DataType> {
UnknownKind::Float | UnknownKind::Int(_) if dt.is_float() | dt.is_string() => Some(dt.clone()),
UnknownKind::Float if dt.is_numeric() => Some(Unknown(UnknownKind::Float)),
UnknownKind::Str if dt.is_string() | dt.is_enum() => Some(dt.clone()),
#[cfg(feature = "dtype-categorical")]
UnknownKind::Str if dt.is_categorical() => {
let Categorical(_, ord) = dt else { unreachable!()};
Some(Categorical(None, *ord))
},
dynam @ _ if dt.is_null() => Some(Unknown(*dynam)),
dynam if dt.is_null() => Some(Unknown(*dynam)),
UnknownKind::Int(v) if dt.is_numeric() => {
let smallest_fitting_dtype = if dt.is_unsigned_integer() && v.is_positive() {
materialize_dyn_int_pos(*v).dtype()
} else {
materialize_smallest_dyn_int(*v).dtype()
};
get_supertype(&dt, &smallest_fitting_dtype)
get_supertype(dt, &smallest_fitting_dtype)
}
_ => Some(Unknown(UnknownKind::Any))
}
Expand Down Expand Up @@ -395,7 +397,7 @@ fn materialize_dyn_int_pos(v: i128) -> AnyValue<'static> {
Some(v) => AnyValue::UInt32(v),
None => match u64::try_from(v).ok() {
Some(v) => AnyValue::UInt64(v),
None => AnyValue::Null
None => AnyValue::Null,
},
},
},
Expand All @@ -415,7 +417,7 @@ fn materialize_smallest_dyn_int(v: i128) -> AnyValue<'static> {
Some(v) => AnyValue::UInt64(v),
None => AnyValue::Null,
},
}
},
},
},
}
Expand Down
7 changes: 6 additions & 1 deletion crates/polars-lazy/src/physical_plan/expressions/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,12 @@ impl PhysicalExpr for LiteralExpr {
.into_time()
.into_series(),
Series(series) => series.deref().clone(),
lv @ (Int(_) | Float(_) | StrCat(_)) => polars_core::prelude::Series::from_any_values(LITERAL_NAME, &[lv.to_any_value().unwrap()], false).unwrap(),
lv @ (Int(_) | Float(_) | StrCat(_)) => polars_core::prelude::Series::from_any_values(
LITERAL_NAME,
&[lv.to_any_value().unwrap()],
false,
)
.unwrap(),
};
Ok(s)
}
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-lazy/src/tests/optimization_checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,12 @@ pub fn test_predicate_block_cast() -> PolarsResult<()> {
let lf1 = df
.clone()
.lazy()
.with_column(col("value").cast(DataType::Int16) * lit(0.1f32))
.with_column(col("value").cast(DataType::Int16) * lit(0.1).cast(DataType::Float32))
.filter(col("value").lt(lit(2.5f32)));

let lf2 = df
.lazy()
.select([col("value").cast(DataType::Int16) * lit(0.1f32)])
.select([col("value").cast(DataType::Int16) * lit(0.1).cast(DataType::Float32)])
.filter(col("value").lt(lit(2.5f32)));

for lf in [lf1, lf2] {
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-lazy/src/tests/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ fn test_simplify_expr() {

let plan = df
.lazy()
.select(&[lit(1.0f32) + lit(1.0f32) + col("sepal_width")])
.select(&[lit(1.0) + lit(1.0) + col("sepal_width")])
.logical_plan;

let mut expr_arena = Arena::new();
Expand All @@ -564,7 +564,7 @@ fn test_simplify_expr() {
.unwrap();
let plan = node_to_lp(lp_top, &expr_arena, &mut lp_arena);
assert!(
matches!(plan, DslPlan::Select{ expr, ..} if matches!(&expr[0], Expr::BinaryExpr{left, ..} if **left == Expr::Literal(LiteralValue::Float32(2.0))))
matches!(plan, DslPlan::Select{ expr, ..} if matches!(&expr[0], Expr::BinaryExpr{left, ..} if **left == Expr::Literal(LiteralValue::Float(2.0))))
);
}

Expand Down
6 changes: 4 additions & 2 deletions crates/polars-plan/src/dsl/function_expr/fill_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ pub(super) fn fill_null(s: &[Series]) -> PolarsResult<Series> {
match series.dtype() {
#[cfg(feature = "dtype-categorical")]
// for Categoricals we first need to check if the category already exist
dt @ DataType::Categorical(Some(rev_map), _) => {
DataType::Categorical(Some(rev_map), _) => {
if rev_map.is_local() && fill_value.len() == 1 && fill_value.null_count() == 0 {
let fill_av = fill_value.get(0).unwrap();
let fill_str = fill_av.get_str().unwrap();
Expand All @@ -46,7 +46,9 @@ pub(super) fn fill_null(s: &[Series]) -> PolarsResult<Series> {
}
}
let fill_value = if fill_value.dtype().is_string() {
fill_value.cast(&DataType::Categorical(None, Default::default())).unwrap()
fill_value
.cast(&DataType::Categorical(None, Default::default()))
.unwrap()
} else {
fill_value
};
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ impl Hash for FunctionExpr {
Sign => {},
#[cfg(feature = "row_hash")]
Hash(a, b, c, d) => (a, b, c, d).hash(state),
FillNull => {},
FillNull => {},
#[cfg(feature = "rolling_window")]
RollingExpr(f) => {
f.hash(state);
Expand Down
39 changes: 15 additions & 24 deletions crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use polars_core::utils::materialize_dyn_int;

use super::*;

impl FunctionExpr {
Expand Down Expand Up @@ -58,7 +59,7 @@ impl FunctionExpr {
Atan2 => mapper.map_to_float_dtype(),
#[cfg(feature = "sign")]
Sign => mapper.with_dtype(DataType::Int64),
FillNull { .. } => mapper.map_to_supertype(),
FillNull { .. } => mapper.map_to_supertype(),
#[cfg(feature = "rolling_window")]
RollingExpr(rolling_func, ..) => {
use RollingFunction::*;
Expand Down Expand Up @@ -410,7 +411,7 @@ impl<'a> FieldsMapper<'a> {

/// Map the dtype to the "supertype" of all fields.
pub fn map_to_supertype(&self) -> PolarsResult<Field> {
let st = args_to_supertype(&self.fields)?;
let st = args_to_supertype(self.fields)?;
let mut first = self.fields[0].clone();
first.coerce(st);
Ok(first)
Expand Down Expand Up @@ -520,38 +521,28 @@ impl<'a> FieldsMapper<'a> {
}
}


pub(crate) fn args_to_supertype<D: AsRef<DataType>>(dtypes: &[D]) -> PolarsResult<DataType> {
let mut st = dtypes[0].as_ref().clone();
for dt in &dtypes[1..] {
st = try_get_supertype(&st, dt.as_ref())?
}

match (dtypes[0].as_ref(), &st) {
(DataType::Categorical(_, ord), DataType::String) => {
st = DataType::Categorical(None, *ord)
}
#[cfg(feature = "dtype-categorical")]
(DataType::Categorical(_, ord), DataType::String) => st = DataType::Categorical(None, *ord),
_ => {
match st {
DataType::Unknown(kind) => {
match kind {
UnknownKind::Float => {
st = DataType::Float64
},
UnknownKind::Int(v) => {
st = materialize_dyn_int(v).dtype();
},
UnknownKind::Str => {
st = DataType::String
},
_ => {}
}
if let DataType::Unknown(kind) = st {
match kind {
UnknownKind::Float => st = DataType::Float64,
UnknownKind::Int(v) => {
st = materialize_dyn_int(v).dtype();
},
UnknownKind::Str => st = DataType::String,
_ => {},
}
_ => {}
}

}
},
}

Ok(st)
}
}
8 changes: 2 additions & 6 deletions crates/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@ pub use arity::*;
pub use array::*;
use arrow::legacy::prelude::QuantileInterpolOptions;
pub use expr::*;
pub use function_expr::schema::{
FieldsMapper
};
pub(crate) use function_expr::schema::args_to_supertype;
pub use function_expr::schema::FieldsMapper;
pub use function_expr::*;
pub use functions::*;
pub use list::*;
Expand Down Expand Up @@ -946,8 +943,7 @@ impl Expr {

Expr::Function {
input,
function: FunctionExpr::FillNull
,
function: FunctionExpr::FillNull,
options: FunctionOptions {
collect_groups: ApplyOptions::ElementWise,
cast_to_supertypes: true,
Expand Down
5 changes: 2 additions & 3 deletions crates/polars-plan/src/logical_plan/aexpr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,11 +419,10 @@ impl AExpr {

pub(crate) fn is_dynamic_literal(&self) -> bool {
match self {
AExpr::Literal(lv) => lv.is_dynamic(),
_ => false
AExpr::Literal(lv) => lv.is_dynamic(),
_ => false,
}
}

}

impl AAggExpr {
Expand Down
26 changes: 15 additions & 11 deletions crates/polars-plan/src/logical_plan/conversion/expr_to_expr_ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ where
}
}

fn to_aexpr_impl_materialized_lit(expr: Expr, arena: &mut Arena<AExpr>, state: &mut ConversionState) -> Node {
fn to_aexpr_impl_materialized_lit(
expr: Expr,
arena: &mut Arena<AExpr>,
state: &mut ConversionState,
) -> Node {
// Already convert `Lit Float and Lit Int` expressions that are not used in a binary / function expression.
// This means they can be materialized immediately
let e = match expr {
Expand All @@ -95,10 +99,6 @@ fn to_aexpr_impl_materialized_lit(expr: Expr, arena: &mut Arena<AExpr>, state: &
name,
)
},
Expr::Literal(lv @ LiteralValue::Int(_) | lv @ LiteralValue::Float(_)) => {
let av = lv.to_any_value().unwrap();
Expr::Literal(LiteralValue::try_from(av).unwrap())
},
e => e,
};
to_aexpr_impl(e, arena, state)
Expand Down Expand Up @@ -227,13 +227,17 @@ fn to_aexpr_impl(expr: Expr, arena: &mut Arena<AExpr>, state: &mut ConversionSta
quantile: to_aexpr_impl_materialized_lit(owned(quantile), arena, state),
interpol,
},
AggExpr::Sum(expr) => AAggExpr::Sum(to_aexpr_impl_materialized_lit(owned(expr), arena, state)),
AggExpr::Std(expr, ddof) => {
AAggExpr::Std(to_aexpr_impl_materialized_lit(owned(expr), arena, state), ddof)
},
AggExpr::Var(expr, ddof) => {
AAggExpr::Var(to_aexpr_impl_materialized_lit(owned(expr), arena, state), ddof)
AggExpr::Sum(expr) => {
AAggExpr::Sum(to_aexpr_impl_materialized_lit(owned(expr), arena, state))
},
AggExpr::Std(expr, ddof) => AAggExpr::Std(
to_aexpr_impl_materialized_lit(owned(expr), arena, state),
ddof,
),
AggExpr::Var(expr, ddof) => AAggExpr::Var(
to_aexpr_impl_materialized_lit(owned(expr), arena, state),
ddof,
),
AggExpr::AggGroups(expr) => {
AAggExpr::AggGroups(to_aexpr_impl_materialized_lit(owned(expr), arena, state))
},
Expand Down
25 changes: 1 addition & 24 deletions crates/polars-plan/src/logical_plan/expr_expansion.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
//! this contains code used for rewriting projections, expanding wildcards, regex selection etc.
use polars_core::utils::get_supertype;

use super::*;

/// This replace the wildcard Expr with a Column Expr. It also removes the Exclude Expr from the
Expand Down Expand Up @@ -378,28 +376,6 @@ fn expand_function_inputs(expr: Expr, schema: &Schema) -> Expr {
})
}

/// this is determined in type coercion
/// but checking a few types early can improve type stability (e.g. no need for unknown)
fn early_supertype(inputs: &[Expr], schema: &Schema) -> Option<DataType> {
let mut arena = Arena::with_capacity(8);

let mut st = None;
for e in inputs {
let dtype = e
.to_field_amortized(schema, Context::Default, &mut arena)
.ok()?
.dtype;
arena.clear();
match st {
None => {
st = Some(dtype);
},
Some(st_val) => st = get_supertype(&st_val, &dtype),
}
}
st
}

#[derive(Copy, Clone)]
struct ExpansionFlags {
multiple_columns: bool,
Expand Down Expand Up @@ -460,6 +436,7 @@ pub(crate) fn rewrite_projections(
let mut result = Vec::with_capacity(exprs.len() + schema.len());

for mut expr in exprs {
#[cfg(feature = "dtype-struct")]
let result_offset = result.len();

// Functions can have col(["a", "b"]) or col(String) as inputs.
Expand Down

0 comments on commit b64c935

Please sign in to comment.