Skip to content

Commit

Permalink
rechunk before writing to parquet (#2875)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 11, 2022
1 parent ef66b85 commit 255fb9a
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 9 deletions.
10 changes: 5 additions & 5 deletions polars/polars-io/src/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ mod test {
s.cast(&DataType::Datetime(TimeUnit::Nanoseconds, None))
})?;

ParquetWriter::new(&mut f).finish(&df)?;
ParquetWriter::new(&mut f).finish(&mut df)?;

f.seek(SeekFrom::Start(0))?;

Expand All @@ -68,10 +68,10 @@ mod test {
#[test]
fn test_read_parquet_with_projection() {
let mut buf: Cursor<Vec<u8>> = Cursor::new(Vec::new());
let df = df!("a" => [1, 2, 3], "b" => [2, 3, 4], "c" => [3, 4, 5]).unwrap();
let mut df = df!("a" => [1, 2, 3], "b" => [2, 3, 4], "c" => [3, 4, 5]).unwrap();

ParquetWriter::new(&mut buf)
.finish(&df)
.finish(&mut df)
.expect("parquet writer");
buf.set_position(0);

Expand All @@ -87,10 +87,10 @@ mod test {
#[test]
fn test_read_parquet_with_columns() {
let mut buf: Cursor<Vec<u8>> = Cursor::new(Vec::new());
let df = df!("a" => [1, 2, 3], "b" => [2, 3, 4], "c" => [3, 4, 5]).unwrap();
let mut df = df!("a" => [1, 2, 3], "b" => [2, 3, 4], "c" => [3, 4, 5]).unwrap();

ParquetWriter::new(&mut buf)
.finish(&df)
.finish(&mut df)
.expect("parquet writer");
buf.set_position(0);

Expand Down
3 changes: 2 additions & 1 deletion polars/polars-io/src/parquet/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ where
}

/// Write the given DataFrame in the the writer `W`.
pub fn finish(mut self, df: &DataFrame) -> Result<()> {
pub fn finish(mut self, df: &mut DataFrame) -> Result<()> {
df.rechunk();
let fields = df.schema().to_arrow().fields;
let rb_iter = df.iter_chunks();

Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ fn init_files() {
let f = std::fs::File::create(&out_path).unwrap();
ParquetWriter::new(f)
.with_statistics(true)
.finish(&df)
.finish(&mut df)
.unwrap();
} else {
let f = std::fs::File::create(&out_path).unwrap();
Expand Down
9 changes: 7 additions & 2 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,12 @@ impl PyDataFrame {
}

#[cfg(feature = "parquet")]
pub fn to_parquet(&self, py_f: PyObject, compression: &str, statistics: bool) -> PyResult<()> {
pub fn to_parquet(
&mut self,
py_f: PyObject,
compression: &str,
statistics: bool,
) -> PyResult<()> {
let compression = match compression {
"uncompressed" => ParquetCompression::Uncompressed,
"snappy" => ParquetCompression::Snappy,
Expand All @@ -493,7 +498,7 @@ impl PyDataFrame {
ParquetWriter::new(buf)
.with_compression(compression)
.with_statistics(statistics)
.finish(&self.df)
.finish(&mut self.df)
.map_err(PyPolarsErr::from)?;
Ok(())
}
Expand Down

0 comments on commit 255fb9a

Please sign in to comment.