Skip to content

Commit

Permalink
unset sorted metadata on append (#3610)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 7, 2022
1 parent 9a7c042 commit 1895865
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::*;
use crate::chunked_array::ops::append::new_chunks;
use crate::series::IsSorted;

impl CategoricalChunked {
pub fn append(&mut self, other: &Self) -> Result<()> {
Expand All @@ -8,6 +9,7 @@ impl CategoricalChunked {

let len = self.len();
new_chunks(&mut self.logical.chunks, &other.logical().chunks, len);
self.logical.set_sorted2(IsSorted::Not);
Ok(())
}
}
11 changes: 10 additions & 1 deletion polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,24 @@ impl<T> ChunkedArray<T> {
/// Set the 'sorted' bit meta info.
pub fn set_sorted(&mut self, reverse: bool) {
if reverse {
// unset sorted
self.bit_settings &= !1;
// set reverse sorted
self.bit_settings |= 1 << 1
} else {
// // unset reverse sorted
self.bit_settings &= !(1 << 1);
// set sorted
self.bit_settings |= 1
}
}

pub fn set_sorted2(&mut self, sorted: IsSorted) {
match sorted {
IsSorted::Not => {}
IsSorted::Not => {
self.bit_settings &= !(1 << 1);
self.bit_settings &= !1;
}
IsSorted::Ascending => self.set_sorted(false),
IsSorted::Descending => self.set_sorted(true),
}
Expand Down
4 changes: 4 additions & 0 deletions polars/polars-core/src/chunked_array/ops/append.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::prelude::*;
use crate::series::IsSorted;

pub(crate) fn new_chunks(chunks: &mut Vec<ArrayRef>, other: &[ArrayRef], len: usize) {
// replace an empty array
Expand All @@ -19,6 +20,7 @@ where
pub fn append(&mut self, other: &Self) {
let len = self.len();
new_chunks(&mut self.chunks, &other.chunks, len);
self.set_sorted2(IsSorted::Not);
}
}

Expand All @@ -27,13 +29,15 @@ impl BooleanChunked {
pub fn append(&mut self, other: &Self) {
let len = self.len();
new_chunks(&mut self.chunks, &other.chunks, len);
self.set_sorted2(IsSorted::Not);
}
}
#[doc(hidden)]
impl Utf8Chunked {
pub fn append(&mut self, other: &Self) {
let len = self.len();
new_chunks(&mut self.chunks, &other.chunks, len);
self.set_sorted2(IsSorted::Not);
}
}

Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/frame/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ impl DataFrame {
if i == 0 {
let row_idx = offsets_to_indexes(&offsets, exploded.len());
let mut row_idx = IdxCa::from_vec("", row_idx);
row_idx.set_sorted(true);
row_idx.set_sorted(false);
// Safety
// We just created indices that are in bounds.
df = unsafe { df.take_unchecked(&row_idx) };
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/dsl/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ pub fn concat<L: AsRef<[LazyFrame]>>(inputs: L, rechunk: bool) -> Result<LazyFra
if rechunk {
Ok(lf.map(
|mut df: DataFrame| {
df.rechunk();
df.as_single_chunk_par();
Ok(df)
},
Some(AllowedOptimizations::default()),
Expand Down
17 changes: 1 addition & 16 deletions polars/polars-lazy/src/physical_plan/expressions/count.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use crate::physical_plan::state::ExecutionState;
use crate::prelude::*;
use polars_arrow::utils::CustomIterTools;
use polars_core::prelude::*;
use polars_core::utils::NoNull;
use std::borrow::Cow;

const COUNT_NAME: &str = "count";
Expand Down Expand Up @@ -32,20 +30,7 @@ impl PhysicalExpr for CountExpr {
groups: &'a GroupsProxy,
_state: &ExecutionState,
) -> Result<AggregationContext<'a>> {
let mut ca = match groups {
GroupsProxy::Idx(groups) => {
let ca: NoNull<IdxCa> = groups
.all()
.iter()
.map(|g| g.len() as IdxSize)
.collect_trusted();
ca.into_inner()
}
GroupsProxy::Slice { groups, .. } => {
let ca: NoNull<IdxCa> = groups.iter().map(|g| g[1]).collect_trusted();
ca.into_inner()
}
};
let mut ca = groups.group_count();
ca.rename(COUNT_NAME);
let s = ca.into_series();

Expand Down
17 changes: 17 additions & 0 deletions py-polars/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,20 @@ def test_categorical_is_in_list() -> None:
"a": [1, 2, 3],
"b": ["a", "b", "c"],
}


def test_unset_sorted_on_append() -> None:
df1 = pl.DataFrame(
[
pl.Series("key", ["a", "b", "a", "b"], dtype=pl.Categorical),
pl.Series("val", [1, 2, 3, 4]),
]
).sort("key")
df2 = pl.DataFrame(
[
pl.Series("key", ["a", "b", "a", "b"], dtype=pl.Categorical),
pl.Series("val", [5, 6, 7, 8]),
]
).sort("key")
df = pl.concat([df1, df2], rechunk=False)
assert df.groupby("key").count()["count"].to_list() == [4, 4]

0 comments on commit 1895865

Please sign in to comment.