Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
fix: fix deserialization of parquets with large string list columns c…
Browse files Browse the repository at this point in the history
…ausing stack overflow (#1575)
  • Loading branch information
ByteNybbler committed Oct 7, 2023
1 parent 63e99ad commit ced0938
Showing 1 changed file with 19 additions and 15 deletions.
34 changes: 19 additions & 15 deletions src/io/parquet/read/deserialize/binary/nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,22 +170,26 @@ impl<O: Offset, I: Pages> Iterator for NestedIter<O, I> {
type Item = Result<(NestedState, Box<dyn Array>)>;

fn next(&mut self) -> Option<Self::Item> {
let maybe_state = next(
&mut self.iter,
&mut self.items,
&mut self.dict,
&mut self.remaining,
&self.init,
self.chunk_size,
&BinaryDecoder::<O>::default(),
);
match maybe_state {
MaybeNext::Some(Ok((nested, decoded))) => {
Some(finish(&self.data_type, decoded.0, decoded.1).map(|array| (nested, array)))
loop {
let maybe_state = next(
&mut self.iter,
&mut self.items,
&mut self.dict,
&mut self.remaining,
&self.init,
self.chunk_size,
&BinaryDecoder::<O>::default(),
);
match maybe_state {
MaybeNext::Some(Ok((nested, decoded))) => {
return Some(
finish(&self.data_type, decoded.0, decoded.1).map(|array| (nested, array)),
)
}
MaybeNext::Some(Err(e)) => return Some(Err(e)),
MaybeNext::None => return None,
MaybeNext::More => continue, // Using continue in a loop instead of calling next helps prevent stack overflow.
}
MaybeNext::Some(Err(e)) => Some(Err(e)),
MaybeNext::None => None,
MaybeNext::More => self.next(),
}
}
}

0 comments on commit ced0938

Please sign in to comment.