feat(python): Pass more options to pyarrow in write_parquet (#5278) (#…

…5280)
pola-rs · Oct 20, 2022 · c09f326 · c09f326
1 parent 1799fad
commit c09f326
Showing 1 changed file with 5 additions and 3 deletions.
diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py
@@ -2098,7 +2098,6 @@ def write_parquet(
             Choose "lz4" for fast compression/decompression.
             Choose "snappy" for more backwards compatibility guarantees
             when you deal with older parquet readers.
-            Method "uncompressed" is not supported by pyarrow.
         compression_level
             The level of compression to use. Higher compression means smaller files on
             disk.
@@ -2112,7 +2111,8 @@ def write_parquet(
             Size of the row groups in number of rows.
             If None (default), the chunks of the `DataFrame` are
             used. Writing in smaller chunks may reduce memory pressure and improve
-            writing speeds. This argument has no effect if 'pyarrow' is used.
+            writing speeds. If None and ``use_pyarrow=True``, the row group size
+            will be the minimum of the DataFrame size and 64 * 1024 * 1024.
         use_pyarrow
             Use C++ parquet implementation vs rust parquet implementation.
             At the moment C++ supports more features.
@@ -2149,7 +2149,9 @@ def write_parquet(
             pa.parquet.write_table(
                 table=tbl,
                 where=file,
-                compression=compression,
+                row_group_size=row_group_size,
+                compression=None if compression == "uncompressed" else compression,
+                compression_level=compression_level,
                 write_statistics=statistics,
                 **(pyarrow_options or {}),
             )