Skip to content

Commit

Permalink
don't copy the sorted flag on many operations (#3795)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 24, 2022
1 parent b59b6d7 commit c578bfb
Show file tree
Hide file tree
Showing 25 changed files with 149 additions and 91 deletions.
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ where
.zip(rhs.downcast_iter())
.map(|(lhs, rhs)| Box::new(kernel(lhs, rhs)) as ArrayRef)
.collect();
lhs.copy_with_chunks(chunks)
lhs.copy_with_chunks(chunks, false)
}
// broadcast right path
(_, 1) => {
Expand Down
17 changes: 6 additions & 11 deletions polars/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,13 @@ where
}
_ => cast_impl_inner(self.name(), &self.chunks, data_type, checked).map(|mut s| {
// maintain sorted if data types remain signed
if self.is_sorted()
|| self.is_sorted_reverse() && (s.null_count() == self.null_count())
// this may still fail with overflow?
if ((self.dtype().is_signed() && data_type.is_signed())
|| (self.dtype().is_unsigned() && data_type.is_unsigned()))
&& (s.null_count() == self.null_count())
{
// this may still fail with overflow?
//
if (self.dtype().is_signed() && data_type.is_signed())
|| (self.dtype().is_unsigned() && data_type.is_unsigned())
{
let reverse = self.is_sorted_reverse();
let inner = s._get_inner_mut();
inner._set_sorted(reverse)
}
let is_sorted = self.is_sorted2();
s.set_sorted(is_sorted)
}
s
}),
Expand Down
18 changes: 16 additions & 2 deletions polars/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,16 @@ impl<T> ChunkedArray<T> {
}
}

pub fn is_sorted2(&self) -> IsSorted {
if self.is_sorted() {
IsSorted::Ascending
} else if self.is_sorted_reverse() {
IsSorted::Descending
} else {
IsSorted::Not
}
}

pub fn set_sorted2(&mut self, sorted: IsSorted) {
match sorted {
IsSorted::Not => {
Expand Down Expand Up @@ -325,14 +335,18 @@ impl<T> ChunkedArray<T> {
}

/// Create a new ChunkedArray from self, where the chunks are replaced.
fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
ChunkedArray {
fn copy_with_chunks(&self, chunks: Vec<ArrayRef>, keep_sorted: bool) -> Self {
let mut out = ChunkedArray {
field: self.field.clone(),
chunks,
phantom: PhantomData,
categorical_map: self.categorical_map.clone(),
bit_settings: self.bit_settings,
};
if !keep_sorted {
out.set_sorted2(IsSorted::Not);
}
out
}

/// Get a mask of the null values.
Expand Down
12 changes: 6 additions & 6 deletions polars/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ where
}
#[inline]
fn slice(&self, offset: i64, length: usize) -> Self {
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()))
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()), true)
}
}

Expand All @@ -92,7 +92,7 @@ impl ChunkOps for BooleanChunked {
}
#[inline]
fn slice(&self, offset: i64, length: usize) -> Self {
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()))
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()), true)
}
}

Expand All @@ -109,12 +109,12 @@ impl ChunkOps for Utf8Chunked {
.as_slice(),
)
.unwrap()];
self.copy_with_chunks(chunks)
self.copy_with_chunks(chunks, true)
}
}
#[inline]
fn slice(&self, offset: i64, length: usize) -> Self {
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()))
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()), true)
}
}

Expand All @@ -140,7 +140,7 @@ impl ChunkOps for ListChunked {
}
#[inline]
fn slice(&self, offset: i64, length: usize) -> Self {
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()))
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()), true)
}
}

Expand Down Expand Up @@ -183,7 +183,7 @@ where
}
#[inline]
fn slice(&self, offset: i64, length: usize) -> Self {
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()))
self.copy_with_chunks(slice(&self.chunks, offset, length, self.len()), true)
}
}

Expand Down
6 changes: 3 additions & 3 deletions polars/polars-core/src/chunked_array/ops/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ where
.zip(filter.downcast_iter())
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();
Ok(self.copy_with_chunks(chunks))
Ok(self.copy_with_chunks(chunks, true))
}
}

Expand All @@ -66,7 +66,7 @@ impl ChunkFilter<BooleanType> for BooleanChunked {
.zip(filter.downcast_iter())
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();
Ok(self.copy_with_chunks(chunks))
Ok(self.copy_with_chunks(chunks, true))
}
}

Expand All @@ -88,7 +88,7 @@ impl ChunkFilter<Utf8Type> for Utf8Chunked {
.map(|(left, mask)| filter_fn(left, mask).unwrap())
.collect::<Vec<_>>();

Ok(self.copy_with_chunks(chunks))
Ok(self.copy_with_chunks(chunks, true))
}
}

Expand Down
20 changes: 10 additions & 10 deletions polars/polars-core/src/chunked_array/ops/take/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ where
}
}
};
self.copy_with_chunks(vec![array])
self.copy_with_chunks(vec![array], false)
}
TakeIdx::Iter(iter) => {
if self.is_empty() {
Expand All @@ -122,7 +122,7 @@ where
return ca;
}
};
self.copy_with_chunks(vec![array])
self.copy_with_chunks(vec![array], false)
}
TakeIdx::IterNulls(iter) => {
if self.is_empty() {
Expand All @@ -143,7 +143,7 @@ where
return ca;
}
};
self.copy_with_chunks(vec![array])
self.copy_with_chunks(vec![array], false)
}
}
}
Expand Down Expand Up @@ -192,7 +192,7 @@ impl ChunkTake for BooleanChunked {
}
}
};
self.copy_with_chunks(vec![array])
self.copy_with_chunks(vec![array], false)
}
TakeIdx::Iter(iter) => {
if self.is_empty() {
Expand All @@ -209,7 +209,7 @@ impl ChunkTake for BooleanChunked {
return ca;
}
};
self.copy_with_chunks(vec![array])
self.copy_with_chunks(vec![array], false)
}
TakeIdx::IterNulls(iter) => {
if self.is_empty() {
Expand All @@ -229,7 +229,7 @@ impl ChunkTake for BooleanChunked {
return ca;
}
};
self.copy_with_chunks(vec![array])
self.copy_with_chunks(vec![array], false)
}
}
}
Expand Down Expand Up @@ -278,7 +278,7 @@ impl ChunkTake for Utf8Chunked {
}
}
};
self.copy_with_chunks(vec![array])
self.copy_with_chunks(vec![array], false)
}
TakeIdx::Iter(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -292,7 +292,7 @@ impl ChunkTake for Utf8Chunked {
return ca;
}
};
self.copy_with_chunks(vec![array])
self.copy_with_chunks(vec![array], false)
}
TakeIdx::IterNulls(iter) => {
let array = match (self.has_validity(), self.chunks.len()) {
Expand All @@ -309,7 +309,7 @@ impl ChunkTake for Utf8Chunked {
return ca;
}
};
self.copy_with_chunks(vec![array])
self.copy_with_chunks(vec![array], false)
}
}
}
Expand Down Expand Up @@ -369,7 +369,7 @@ impl ChunkTake for ListChunked {
}
}
};
ca_self.copy_with_chunks(vec![array])
ca_self.copy_with_chunks(vec![array], false)
}
// todo! fast path for single chunk
TakeIdx::Iter(iter) => {
Expand Down
11 changes: 9 additions & 2 deletions polars/polars-core/src/frame/groupby/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,11 @@ impl<'df> GroupBy<'df> {
let mut iter = groups.iter().map(|(first, _idx)| first as usize);
// Safety:
// groups are always in bounds
unsafe { s.take_iter_unchecked(&mut iter) }
let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
if groups.sorted {
out.set_sorted(s.is_sorted());
};
out
}
GroupsProxy::Slice { groups, rolling } => {
if *rolling && !groups.is_empty() {
Expand All @@ -348,7 +352,10 @@ impl<'df> GroupBy<'df> {
let mut iter = groups.iter().map(|&[first, _len]| first as usize);
// Safety:
// groups are always in bounds
unsafe { s.take_iter_unchecked(&mut iter) }
let mut out = unsafe { s.take_iter_unchecked(&mut iter) };
// sliced groups are always in order of discovery
out.set_sorted(s.is_sorted());
out
}
}
})
Expand Down
7 changes: 6 additions & 1 deletion polars/polars-core/src/frame/groupby/proxy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,12 @@ pub type GroupsSlice = Vec<[IdxSize; 2]>;
#[derive(Debug, Clone, PartialEq)]
pub enum GroupsProxy {
Idx(GroupsIdx),
Slice { groups: GroupsSlice, rolling: bool },
Slice {
// the groups slices
groups: GroupsSlice,
// indicates if we do a rolling groupby
rolling: bool,
},
}

impl Default for GroupsProxy {
Expand Down
6 changes: 5 additions & 1 deletion polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1766,7 +1766,11 @@ impl DataFrame {
// not present in the dataframe
let _ = df.apply(&first_by_column, |s| {
let mut s = s.clone();
s.set_sorted(first_reverse);
if first_reverse {
s.set_sorted(IsSorted::Descending)
} else {
s.set_sorted(IsSorted::Ascending)
}
s
});
Ok(df)
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/series/implementations/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ impl private::PrivateSeries for SeriesWrap<BooleanChunked> {
self.0.explode_by_offsets(offsets)
}

fn _set_sorted(&mut self, reverse: bool) {
self.0.set_sorted(reverse)
fn _set_sorted(&mut self, is_sorted: IsSorted) {
self.0.set_sorted2(is_sorted)
}

unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool {
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-core/src/series/implementations/categorical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ impl private::PrivateSeries for SeriesWrap<CategoricalChunked> {
.into_series()
}

fn _set_sorted(&mut self, reverse: bool) {
self.0.logical_mut().set_sorted(reverse)
fn _set_sorted(&mut self, is_sorted: IsSorted) {
self.0.logical_mut().set_sorted2(is_sorted)
}

unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool {
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-core/src/series/implementations/dates_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ macro_rules! impl_dyn_series {
self.0.cummin(reverse).$into_logical().into_series()
}

fn _set_sorted(&mut self, reverse: bool) {
self.0.deref_mut().set_sorted(reverse)
fn _set_sorted(&mut self, is_sorted: IsSorted) {
self.0.deref_mut().set_sorted2(is_sorted)
}

#[cfg(feature = "zip_with")]
Expand Down Expand Up @@ -297,7 +297,7 @@ macro_rules! impl_dyn_series {
let mut out = ChunkTake::take_unchecked(self.0.deref(), idx.into());

if self.0.is_sorted() && (idx.is_sorted() || idx.is_sorted_reverse()) {
out.set_sorted(idx.is_sorted_reverse())
out.set_sorted2(idx.is_sorted2())
}

Ok(out.$into_logical().into_series())
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-core/src/series/implementations/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ impl private::PrivateSeries for SeriesWrap<DatetimeChunked> {
.into_series()
}

fn _set_sorted(&mut self, reverse: bool) {
self.0.deref_mut().set_sorted(reverse)
fn _set_sorted(&mut self, is_sorted: IsSorted) {
self.0.deref_mut().set_sorted2(is_sorted)
}

#[cfg(feature = "zip_with")]
Expand Down Expand Up @@ -332,7 +332,7 @@ impl SeriesTrait for SeriesWrap<DatetimeChunked> {
let mut out = ChunkTake::take_unchecked(self.0.deref(), idx.into());

if self.0.is_sorted() && (idx.is_sorted() || idx.is_sorted_reverse()) {
out.set_sorted(idx.is_sorted_reverse())
out.set_sorted2(idx.is_sorted2())
}

Ok(out
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-core/src/series/implementations/duration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ impl private::PrivateSeries for SeriesWrap<DurationChunked> {
.into_series()
}

fn _set_sorted(&mut self, reverse: bool) {
self.0.deref_mut().set_sorted(reverse)
fn _set_sorted(&mut self, is_sorted: IsSorted) {
self.0.deref_mut().set_sorted2(is_sorted)
}

unsafe fn equal_element(&self, idx_self: usize, idx_other: usize, other: &Series) -> bool {
Expand Down Expand Up @@ -359,7 +359,7 @@ impl SeriesTrait for SeriesWrap<DurationChunked> {
let mut out = ChunkTake::take_unchecked(self.0.deref(), idx.into());

if self.0.is_sorted() && (idx.is_sorted() || idx.is_sorted_reverse()) {
out.set_sorted(idx.is_sorted_reverse())
out.set_sorted2(idx.is_sorted2())
}

Ok(out.into_duration(self.0.time_unit()).into_series())
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-core/src/series/implementations/floats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ macro_rules! impl_dyn_series {
self.0.cummin(reverse).into_series()
}

fn _set_sorted(&mut self, reverse: bool) {
self.0.set_sorted(reverse)
fn _set_sorted(&mut self, is_sorted: IsSorted) {
self.0.set_sorted2(is_sorted)
}

unsafe fn equal_element(
Expand Down Expand Up @@ -279,7 +279,7 @@ macro_rules! impl_dyn_series {
let mut out = ChunkTake::take_unchecked(&self.0, (&*idx).into());

if self.0.is_sorted() && (idx.is_sorted() || idx.is_sorted_reverse()) {
out.set_sorted(idx.is_sorted_reverse())
out.set_sorted2(idx.is_sorted2())
}

Ok(out.into_series())
Expand Down

0 comments on commit c578bfb

Please sign in to comment.