Skip to content

Commit

Permalink
fix writing def levels in multi-page nullable nested columns
Browse files Browse the repository at this point in the history
  • Loading branch information
twilson-palantir committed Jan 13, 2023
1 parent 211be21 commit f8b0cca
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/io/parquet/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,13 @@ fn slice_parquet_array<'a>(
// the slice is a bit awkward because we always want the latest value to compute the next length;
l_nested.offsets = &l_nested.offsets
[offset..offset + std::cmp::min(length + 1, l_nested.offsets.len())];
l_nested.validity_offset = offset;
}
Nested::List(l_nested) => {
is_nested = true;
l_nested.offsets = &l_nested.offsets
[offset..offset + std::cmp::min(length + 1, l_nested.offsets.len())];
l_nested.validity_offset = offset;
}
_ => {}
}
Expand Down
31 changes: 31 additions & 0 deletions src/io/parquet/write/nested/def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ fn single_list_iter<'a, O: Offset>(nested: &ListNested<'a, O>) -> Box<dyn DebugI
(true, Some(validity)) => Box::new(
validity
.iter()
.skip(nested.validity_offset)
// lists have 2 groups, so
// True => 2
// False => 1
Expand Down Expand Up @@ -221,6 +222,7 @@ mod tests {
is_optional: false,
offsets: &[0, 2, 2, 5, 8, 8, 11, 11, 12],
validity: None,
validity_offset: 0,
}),
Nested::Primitive(None, false, 12),
];
Expand All @@ -246,6 +248,7 @@ mod tests {
is_optional: true,
offsets: &[0, 2, 2, 5, 8, 8, 11, 11, 12],
validity: Some(&v0),
validity_offset: 0,
}),
Nested::Primitive(Some(&v1), true, 12),
];
Expand All @@ -261,11 +264,13 @@ mod tests {
is_optional: false,
offsets: &[0, 2, 4],
validity: None,
validity_offset: 0,
}),
Nested::List(ListNested::<i32> {
is_optional: false,
offsets: &[0, 3, 7, 8, 10],
validity: None,
validity_offset: 0,
}),
Nested::Primitive(None, false, 12),
];
Expand All @@ -283,11 +288,13 @@ mod tests {
is_optional: true,
offsets: &[0, 2, 2, 2, 5],
validity: Some(&a),
validity_offset: 0,
}),
Nested::List(ListNested::<i32> {
is_optional: false,
offsets: &[0, 3, 7, 8, 8, 10],
validity: None,
validity_offset: 0,
}),
Nested::Primitive(None, false, 12),
];
Expand All @@ -306,11 +313,13 @@ mod tests {
is_optional: true,
offsets: &[0, 2, 2, 5],
validity: Some(&a),
validity_offset: 0,
}),
Nested::List(ListNested::<i32> {
is_optional: true,
offsets: &[0, 3, 7, 8, 8, 8],
validity: Some(&b),
validity_offset: 0,
}),
Nested::Primitive(None, false, 12),
];
Expand All @@ -330,16 +339,38 @@ mod tests {
is_optional: true,
offsets: &[0, 2, 2, 4],
validity: Some(&a),
validity_offset: 0,
}),
Nested::List(ListNested::<i32> {
is_optional: true,
offsets: &[0, 3, 7, 8, 8],
validity: Some(&b),
validity_offset: 0,
}),
Nested::Primitive(Some(&c), true, 12),
];
let expected = vec![5, 5, 5, 5, 4, 5, 5, 0, 5, 2];

test(nested, expected)
}

#[test]
fn list_offset() {
let a = Bitmap::from([
true, false, false, false, true, false, true, false, false, false, true, false, false,
false, false,
]);
let nested = vec![
Nested::List(ListNested {
is_optional: true,
offsets: &[4, 4, 7, 7],
validity: Some(&validity),
validity_offset: 3,
}),
Nested::Primitive(None, false, 12),
];

let expected = vec![0, 2, 2, 2, 0];
test(nested, expected)
}
}
4 changes: 4 additions & 0 deletions src/io/parquet/write/nested/rep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ mod tests {
is_optional: false,
offsets: &[0, 2, 2, 5, 8, 8, 11, 11, 12],
validity: None,
validity_offset: 0,
}),
Nested::Primitive(None, false, 12),
];
Expand All @@ -197,11 +198,13 @@ mod tests {
is_optional: false,
offsets: &[0, 2, 2, 4],
validity: None,
validity_offset: 0,
}),
Nested::List(ListNested::<i32> {
is_optional: false,
offsets: &[0, 3, 7, 8, 10],
validity: None,
validity_offset: 0,
}),
Nested::Primitive(None, false, 10),
];
Expand All @@ -222,6 +225,7 @@ mod tests {
is_optional: true,
offsets: &[0i32, 1, 2],
validity: None,
validity_offset: 0,
}),
Nested::Struct(None, true, 2),
Nested::Primitive(None, true, 2),
Expand Down
4 changes: 4 additions & 0 deletions src/io/parquet/write/pages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub struct ListNested<'a, O: Offset> {
pub is_optional: bool,
pub offsets: &'a [O],
pub validity: Option<&'a Bitmap>,
pub validity_offset: usize,
}

impl<'a, O: Offset> ListNested<'a, O> {
Expand All @@ -27,6 +28,7 @@ impl<'a, O: Offset> ListNested<'a, O> {
is_optional,
offsets,
validity,
validity_offset: 0,
}
}
}
Expand Down Expand Up @@ -487,6 +489,7 @@ mod tests {
is_optional: false,
offsets: &[0, 2, 4],
validity: None,
validity_offset: 0,
}),
Nested::Struct(Some(&Bitmap::from([true, true, false, true])), true, 4),
Nested::Primitive(None, false, 4),
Expand All @@ -496,6 +499,7 @@ mod tests {
is_optional: false,
offsets: &[0, 2, 4],
validity: None,
validity_offset: 0,
}),
Nested::Struct(Some(&Bitmap::from([true, true, false, true])), true, 4),
Nested::Primitive(None, false, 4),
Expand Down

0 comments on commit f8b0cca

Please sign in to comment.