Skip to content

Commit

Permalink
fix groups edge case (#4261)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 4, 2022
1 parent 4e83c02 commit e6dc1c9
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 31 deletions.
9 changes: 7 additions & 2 deletions polars/polars-core/src/frame/groupby/into_groups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,10 @@ pub(super) fn pack_utf8_columns(
let lhs = lhs.slice(offset as i64, len);
let rhs = rhs.slice(offset as i64, len);

// the additional 2 is needed for the validity
let size = lhs.get_values_size() + rhs.get_values_size() + lhs.len() * 2 + 1;
// the additional:
// 2 is needed for the validity
// 1 for the '_' delimiter
let size = lhs.get_values_size() + rhs.get_values_size() + lhs.len() * 3 + 1;

let mut values = Vec::with_capacity(size);
let ptr = values.as_ptr() as usize;
Expand All @@ -397,6 +399,7 @@ pub(super) fn pack_utf8_columns(
let start = values.len();
values.extend_from_slice("11".as_bytes());
values.extend_from_slice(lhs.as_bytes());
values.push(b'_');
values.extend_from_slice(rhs.as_bytes());
// reallocated lifetime is invalid
debug_assert_eq!(ptr, values.as_ptr() as usize);
Expand All @@ -417,6 +420,7 @@ pub(super) fn pack_utf8_columns(
(None, Some(rhs)) => {
let start = values.len();
values.extend_from_slice("01".as_bytes());
values.push(b'_');
values.extend_from_slice(rhs.as_bytes());
debug_assert_eq!(ptr, values.as_ptr() as usize);
let end = values.len();
Expand All @@ -432,6 +436,7 @@ pub(super) fn pack_utf8_columns(
let start = values.len();
values.extend_from_slice("10".as_bytes());
values.extend_from_slice(lhs.as_bytes());
values.push(b'_');
debug_assert_eq!(ptr, values.as_ptr() as usize);
let end = values.len();
let str_val: &'static str = unsafe {
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-core/src/vector_hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,14 +248,14 @@ impl AsU64 for i32 {
#[inline]
fn as_u64(self) -> u64 {
let asu32: u32 = unsafe { std::mem::transmute(self) };
dbg!(asu32 as u64)
asu32 as u64
}
}

impl AsU64 for i64 {
#[inline]
fn as_u64(self) -> u64 {
unsafe { dbg!(std::mem::transmute(self)) }
unsafe { std::mem::transmute(self) }
}
}

Expand Down Expand Up @@ -364,7 +364,7 @@ impl IdxHash {
/// Contains a ptr to the string slice an the precomputed hash of that string.
/// During rehashes, we will rehash the hash instead of the string, that makes rehashing
/// cheap and allows cache coherent small hash tables.
#[derive(Eq, Copy, Clone)]
#[derive(Eq, Copy, Clone, Debug)]
pub(crate) struct StrHash<'a> {
str: Option<&'a str>,
hash: u64,
Expand Down
22 changes: 0 additions & 22 deletions polars/polars-time/src/groupby/dynamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -653,26 +653,4 @@ mod test {
},
);
}

#[test]
fn test_foo() {
let s = Series::new("a", (0..20i32).collect::<Vec<_>>());
let df = df![
"ints"=> s
]
.unwrap();
let (_, _, groups) = df
.groupby_rolling(
vec![],
&RollingGroupOptions {
index_column: "ints".into(),
period: Duration::parse("2i"),
offset: Duration::parse("-5i"),
closed_window: ClosedWindow::Both,
},
)
.unwrap();

dbg!(groups);
}
}
78 changes: 74 additions & 4 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions py-polars/tests/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,3 +322,17 @@ def test_when_then_edge_cases_3994() -> None:
.keep_name()
)
).to_dict(False) == {"id": [], "type": []}


def test_edge_cast_string_duplicates_4259() -> None:
# carefully constructed data.
# note that row 2, 3 concattenated are the same string
df = pl.DataFrame(
{
"a": [99, 54612, 546121],
"b": [1, 14484, 4484],
}
).with_columns(pl.all().cast(pl.Utf8))

mask = df.select(["a", "b"]).is_duplicated()
assert df.filter(pl.lit(mask)).shape == (0, 2)

0 comments on commit e6dc1c9

Please sign in to comment.