Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

fix: fix deserialization of parquets with large string list columns causing stack overflow #1575

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions src/io/parquet/read/deserialize/binary/nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,22 +170,26 @@
type Item = Result<(NestedState, Box<dyn Array>)>;

fn next(&mut self) -> Option<Self::Item> {
let maybe_state = next(
&mut self.iter,
&mut self.items,
&mut self.dict,
&mut self.remaining,
&self.init,
self.chunk_size,
&BinaryDecoder::<O>::default(),
);
match maybe_state {
MaybeNext::Some(Ok((nested, decoded))) => {
Some(finish(&self.data_type, decoded.0, decoded.1).map(|array| (nested, array)))
loop {
let maybe_state = next(
&mut self.iter,
&mut self.items,
&mut self.dict,
&mut self.remaining,
&self.init,
self.chunk_size,
&BinaryDecoder::<O>::default(),
);
match maybe_state {
MaybeNext::Some(Ok((nested, decoded))) => {
return Some(
finish(&self.data_type, decoded.0, decoded.1).map(|array| (nested, array)),
)
}
MaybeNext::Some(Err(e)) => return Some(Err(e)),
MaybeNext::None => return None,

Check warning on line 190 in src/io/parquet/read/deserialize/binary/nested.rs

View check run for this annotation

Codecov / codecov/patch

src/io/parquet/read/deserialize/binary/nested.rs#L189-L190

Added lines #L189 - L190 were not covered by tests
MaybeNext::More => continue, // Using continue in a loop instead of calling next helps prevent stack overflow.
}
MaybeNext::Some(Err(e)) => Some(Err(e)),
MaybeNext::None => None,
MaybeNext::More => self.next(),
}
}
}
Loading