Skip to content

Commit

Permalink
concat list determine supertype (#3649)
Browse files Browse the repository at this point in the history
* concat list determine supertype
  • Loading branch information
ritchie46 committed Jun 10, 2022
1 parent cea3e89 commit 462f8a9
Show file tree
Hide file tree
Showing 13 changed files with 104 additions and 56 deletions.
13 changes: 6 additions & 7 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars"
version = "0.22.2"
version = "0.22.5"
authors = ["ritchie46 <ritchie46@gmail.com>"]
edition = "2021"
keywords = ["dataframe", "query-engine", "arrow"]
Expand Down Expand Up @@ -217,7 +217,6 @@ docs-selection = [
"decompress",
"mode",
"take_opt_iter",
"extract_jsonpath",
"cum_agg",
"rolling_window",
"interpolate",
Expand All @@ -243,11 +242,11 @@ bench = [
]

[dependencies]
polars-core = { version = "0.22.2", path = "./polars-core", features = ["docs", "private"], default-features = false }
polars-io = { version = "0.22.2", path = "./polars-io", features = ["private"], default-features = false, optional = true }
polars-lazy = { version = "0.22.2", path = "./polars-lazy", features = ["private"], default-features = false, optional = true }
polars-ops = { version = "0.22.2", path = "./polars-ops" }
polars-time = { version = "0.22.2", path = "./polars-time", default-features = false, optional = true }
polars-core = { version = "0.22.5", path = "./polars-core", features = ["docs", "private"], default-features = false }
polars-io = { version = "0.22.5", path = "./polars-io", features = ["private"], default-features = false, optional = true }
polars-lazy = { version = "0.22.5", path = "./polars-lazy", features = ["private"], default-features = false, optional = true }
polars-ops = { version = "0.22.5", path = "./polars-ops" }
polars-time = { version = "0.22.5", path = "./polars-time", default-features = false, optional = true }

[dev-dependencies]
ahash = "0.7"
Expand Down
21 changes: 15 additions & 6 deletions polars/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,28 @@ bench-save:
bench-cmp:
cargo bench --features=random --bench $(BENCH) -- --load-baseline $(FEAT) --baseline $(BASE)

doctest:
cargo doc --all-features -p polars-arrow
cargo doc --all-features -p polars-utils
cargo doc --features=docs-selection -p polars-core
cargo doc -p polars-time
cargo doc -p polars-ops
cargo doc --all-features -p polars-io
cargo doc --all-features -p polars-lazy
cargo doc --features=docs-selection -p polars

publish:
cargo publish --allow-dirty -p polars-arrow
sleep 10
sleep 15
cargo publish --allow-dirty -p polars-utils
sleep 10
sleep 15
cargo publish --allow-dirty -p polars-core
sleep 10
sleep 15
cargo publish --allow-dirty -p polars-time
cargo publish --allow-dirty -p polars-ops
sleep 10
sleep 15
cargo publish --allow-dirty -p polars-io
sleep 10
sleep 15
cargo publish --allow-dirty -p polars-lazy
sleep 10
sleep 15
cargo publish --allow-dirty -p polars
2 changes: 1 addition & 1 deletion polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-arrow"
version = "0.22.2"
version = "0.22.5"
authors = ["ritchie46 <ritchie46@gmail.com>"]
edition = "2021"
license = "MIT"
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-core"
version = "0.22.3"
version = "0.22.5"
authors = ["ritchie46 <ritchie46@gmail.com>"]
edition = "2021"
license = "MIT"
Expand Down Expand Up @@ -158,8 +158,8 @@ jsonpath_lib = { version = "0.3.0", optional = true, git = "https://github.com/r
ndarray = { version = "0.15", optional = true, default_features = false }
num = "^0.4"
once_cell = "1"
polars-arrow = { version = "0.22.2", path = "../polars-arrow", features = ["compute"] }
polars-utils = { version = "0.22.2", path = "../polars-utils" }
polars-arrow = { version = "0.22.5", path = "../polars-arrow", features = ["compute"] }
polars-utils = { version = "0.22.5", path = "../polars-utils" }
rand = { version = "0.8", optional = true, features = ["small_rng", "std"] }
rand_distr = { version = "0.4", optional = true }
rayon = "1.5"
Expand Down
10 changes: 5 additions & 5 deletions polars/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-io"
version = "0.22.2"
version = "0.22.5"
authors = ["ritchie46 <ritchie46@gmail.com>"]
edition = "2021"
license = "MIT"
Expand Down Expand Up @@ -47,10 +47,10 @@ memchr = "2.4"
memmap = { package = "memmap2", version = "0.5.2", optional = true }
num = "^0.4"
once_cell = "1"
polars-arrow = { version = "0.22.2", path = "../polars-arrow" }
polars-core = { version = "0.22.2", path = "../polars-core", features = ["private"], default-features = false }
polars-time = { version = "0.22.2", path = "../polars-time", features = ["private"], default-features = false, optional = true }
polars-utils = { version = "0.22.2", path = "../polars-utils" }
polars-arrow = { version = "0.22.5", path = "../polars-arrow" }
polars-core = { version = "0.22.5", path = "../polars-core", features = ["private"], default-features = false }
polars-time = { version = "0.22.5", path = "../polars-time", features = ["private"], default-features = false, optional = true }
polars-utils = { version = "0.22.5", path = "../polars-utils" }
rayon = "1.5"
regex = "1.5"
serde = { version = "1", features = ["derive"], optional = true }
Expand Down
14 changes: 7 additions & 7 deletions polars/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-lazy"
version = "0.22.2"
version = "0.22.5"
authors = ["ritchie46 <ritchie46@gmail.com>"]
edition = "2021"
license = "MIT"
Expand Down Expand Up @@ -123,12 +123,12 @@ rayon = "1.5"
regex = { version = "1.5", optional = true }
serde = { version = "1", features = ["derive", "rc"], optional = true }

polars-arrow = { version = "0.22.2", path = "../polars-arrow" }
polars-core = { version = "0.22.2", path = "../polars-core", features = ["lazy", "private", "zip_with", "random"], default-features = false }
polars-io = { version = "0.22.2", path = "../polars-io", features = ["lazy", "csv-file", "private"], default-features = false }
polars-ops = { version = "0.22.2", path = "../polars-ops", default-features = false }
polars-time = { version = "0.22.2", path = "../polars-time", optional = true }
polars-utils = { version = "0.22.2", path = "../polars-utils" }
polars-arrow = { version = "0.22.5", path = "../polars-arrow" }
polars-core = { version = "0.22.5", path = "../polars-core", features = ["lazy", "private", "zip_with", "random"], default-features = false }
polars-io = { version = "0.22.5", path = "../polars-io", features = ["lazy", "csv-file", "private"], default-features = false }
polars-ops = { version = "0.22.5", path = "../polars-ops", default-features = false }
polars-time = { version = "0.22.5", path = "../polars-time", optional = true }
polars-utils = { version = "0.22.5", path = "../polars-utils" }

[package.metadata.docs.rs]
all-features = true
Expand Down
18 changes: 17 additions & 1 deletion polars/polars-lazy/src/dsl/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,23 @@ pub fn concat_lst<E: AsRef<[IE]>, IE: Into<Expr> + Clone>(s: E) -> Expr {
Expr::AnonymousFunction {
input: s,
function,
output_type: GetOutput::same_type(),
output_type: GetOutput::map_dtypes(|dts| {
let mut super_type_inner = None;

for dt in dts {
match dt {
DataType::List(inner) => match super_type_inner {
None => super_type_inner = Some(*inner.clone()),
Some(st_inner) => super_type_inner = get_supertype(&st_inner, inner).ok(),
},
dt => match super_type_inner {
None => super_type_inner = Some((*dt).clone()),
Some(st_inner) => super_type_inner = get_supertype(&st_inner, dt).ok(),
},
}
}
DataType::List(Box::new(super_type_inner.unwrap()))
}),
options: FunctionOptions {
collect_groups: ApplyOptions::ApplyFlat,
input_wildcard_expansion: true,
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-ops"
version = "0.22.2"
version = "0.22.5"
authors = ["ritchie46 <ritchie46@gmail.com>"]
edition = "2021"
license = "MIT"
Expand All @@ -10,8 +10,8 @@ description = "More operations on polars data structures"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
polars-arrow = { version = "0.22.2", path = "../polars-arrow", default-features = false }
polars-core = { version = "0.22.2", path = "../polars-core", features = ["private"], default-features = false }
polars-arrow = { version = "0.22.5", path = "../polars-arrow", default-features = false }
polars-core = { version = "0.22.5", path = "../polars-core", features = ["private"], default-features = false }

[features]
dtype-categorical = ["polars-core/dtype-categorical"]
Expand Down
35 changes: 25 additions & 10 deletions polars/polars-ops/src/chunked_array/list/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use polars_arrow::kernels::list::sublist_get;
use polars_arrow::prelude::ValueSize;
use polars_core::chunked_array::builder::get_list_builder;
use polars_core::series::ops::NullBehavior;
use polars_core::utils::CustomIterTools;
use polars_core::utils::{get_supertype, CustomIterTools};
use std::convert::TryFrom;
use std::fmt::Write;

Expand Down Expand Up @@ -216,13 +216,28 @@ pub trait ListNameSpaceImpl: AsList {
let other_len = other.len();
let length = ca.len();
let mut other = other.to_vec();
let dtype = ca.dtype();
let inner_type = ca.inner_dtype();
let mut inner_super_type = ca.inner_dtype();

for s in &other {
match s.dtype() {
DataType::List(inner_type) => {
inner_super_type = get_supertype(&inner_super_type, inner_type)?;
}
dt => {
inner_super_type = get_supertype(&inner_super_type, dt)?;
}
}
}

// cast lhs
let dtype = &DataType::List(Box::new(inner_super_type.clone()));
let ca = ca.cast(dtype)?;
let ca = ca.list().unwrap();

// broadcasting path in case all unit length
// this path will not expand the series, so saves memory
if other.iter().all(|s| s.len() == 1) && ca.len() != 1 {
cast_rhs(&mut other, &inner_type, dtype, length, false)?;
cast_rhs(&mut other, &inner_super_type, dtype, length, false)?;
let to_append = other
.iter()
.flat_map(|s| {
Expand All @@ -235,7 +250,7 @@ pub trait ListNameSpaceImpl: AsList {
return Ok(ListChunked::full_null_with_dtype(
ca.name(),
length,
&inner_type,
&inner_super_type,
));
}

Expand All @@ -245,7 +260,7 @@ pub trait ListNameSpaceImpl: AsList {
.sum::<usize>();

let mut builder = get_list_builder(
&inner_type,
&inner_super_type,
ca.get_values_size() + vals_size_other + 1,
length,
ca.name(),
Expand All @@ -255,7 +270,7 @@ pub trait ListNameSpaceImpl: AsList {
for append in &to_append {
s.append(append).unwrap();
}
match inner_type {
match inner_super_type {
// structs don't have chunks, so we must first rechunk the underlying series
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => s = s.rechunk(),
Expand All @@ -269,7 +284,7 @@ pub trait ListNameSpaceImpl: AsList {
Ok(builder.finish())
} else {
// normal path which may contain same length list or unit length lists
cast_rhs(&mut other, &inner_type, dtype, length, true)?;
cast_rhs(&mut other, &inner_super_type, dtype, length, true)?;

let vals_size_other = other
.iter()
Expand All @@ -282,7 +297,7 @@ pub trait ListNameSpaceImpl: AsList {
}
let mut first_iter = ca.into_iter();
let mut builder = get_list_builder(
&inner_type,
&inner_super_type,
ca.get_values_size() + vals_size_other + 1,
length,
ca.name(),
Expand Down Expand Up @@ -319,7 +334,7 @@ pub trait ListNameSpaceImpl: AsList {
continue;
}

match inner_type {
match inner_super_type {
// structs don't have chunks, so we must first rechunk the underlying series
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => acc = acc.rechunk(),
Expand Down
8 changes: 4 additions & 4 deletions polars/polars-time/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-time"
version = "0.22.2"
version = "0.22.5"
authors = ["ritchie46 <ritchie46@gmail.com>"]
edition = "2021"
license = "MIT"
Expand All @@ -11,9 +11,9 @@ description = "Time related code for the polars dataframe library"
[dependencies]
chrono = "0.4"
lexical = { version = "6", default-features = false, features = ["std", "parse-floats", "parse-integers"] }
polars-arrow = { version = "0.22.2", path = "../polars-arrow", features = ["compute", "temporal"] }
polars-core = { version = "0.22.2", path = "../polars-core", default-features = false, features = ["private", "dtype-datetime", "dtype-duration", "dtype-time", "dtype-date"] }
polars-utils = { version = "0.22.2", path = "../polars-utils" }
polars-arrow = { version = "0.22.5", path = "../polars-arrow", features = ["compute", "temporal"] }
polars-core = { version = "0.22.5", path = "../polars-core", default-features = false, features = ["private", "dtype-datetime", "dtype-duration", "dtype-time", "dtype-date"] }
polars-utils = { version = "0.22.5", path = "../polars-utils" }
serde = { version = "1", features = ["derive"], optional = true }

[features]
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-utils/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars-utils"
version = "0.22.2"
version = "0.22.5"
authors = ["ritchie46 <ritchie46@gmail.com>"]
edition = "2021"
license = "MIT"
Expand Down
16 changes: 8 additions & 8 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions py-polars/tests/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,12 @@ def test_list_concat_nulls() -> None:
None,
None,
]


def test_list_concat_supertype() -> None:
df = pl.DataFrame(
[pl.Series("a", [1, 2], pl.UInt8), pl.Series("b", [10000, 20000], pl.UInt16)]
)
assert df.with_column(pl.concat_list(pl.col(["a", "b"])).alias("concat_list"))[
"concat_list"
].to_list() == [[1, 10000], [2, 20000]]

0 comments on commit 462f8a9

Please sign in to comment.