deprecate to_<file> and string cache in lazy (#2916)

pola-rs · Mar 16, 2022 · b90bcfa · b90bcfa
1 parent 98a3d85
commit b90bcfa
Show file tree

Hide file tree

Showing 10 changed files with 171 additions and 49 deletions.
diff --git a/polars/polars-lazy/src/frame/mod.rs b/polars/polars-lazy/src/frame/mod.rs
@@ -100,7 +100,7 @@ pub struct OptState {
     pub predicate_pushdown: bool,
     pub type_coercion: bool,
     pub simplify_expr: bool,
-    /// Make sure that all needed columns are scannedn
+    /// Make sure that all needed columns are scanned
     pub agg_scan_projection: bool,
     pub aggregate_pushdown: bool,
     pub global_string_cache: bool,
@@ -114,7 +114,7 @@ impl Default for OptState {
             predicate_pushdown: true,
             type_coercion: true,
             simplify_expr: true,
-            global_string_cache: true,
+            global_string_cache: false,
             slice_pushdown: true,
             // will be toggled by a scan operation such as csv scan or parquet scan
             agg_scan_projection: false,
@@ -597,6 +597,10 @@ impl LazyFrame {
     pub fn collect(self) -> Result<DataFrame> {
         #[cfg(feature = "dtype-categorical")]
         let use_string_cache = self.opt_state.global_string_cache;
+        #[cfg(feature = "dtype-categorical")]
+        if use_string_cache {
+            eprint!("global string cache in combination with LazyFrames is deprecated; please set the global string cache globally.")
+        }
         let mut expr_arena = Arena::with_capacity(256);
         let mut lp_arena = Arena::with_capacity(128);
         let lp_top = self.optimize(&mut lp_arena, &mut expr_arena)?;

diff --git a/py-polars/docs/source/reference/io.rst b/py-polars/docs/source/reference/io.rst
@@ -12,7 +12,7 @@ CSV
 
    read_csv
    scan_csv
-   DataFrame.to_csv
+   DataFrame.write_csv
 
 Feather/ IPC
 ~~~~~~~~~~~~
@@ -22,7 +22,7 @@ Feather/ IPC
    read_ipc
    scan_ipc
    read_ipc_schema
-   DataFrame.to_ipc
+   DataFrame.write_ipc
 
 Parquet
 ~~~~~~~
@@ -31,7 +31,7 @@ Parquet
 
    read_parquet
    scan_parquet
-   DataFrame.to_parquet
+   DataFrame.write_parquet
 
 SQL
 ~~~
@@ -46,12 +46,12 @@ JSON
    :toctree: api/
 
    read_json
-   DataFrame.to_json
+   DataFrame.write_json
 
 AVRO
 ~~~~
 .. autosummary::
    :toctree: api/
 
    read_avro
-   DataFrame.to_avro
+   DataFrame.write_avro
diff --git a/py-polars/polars/internals/frame.py b/py-polars/polars/internals/frame.py
@@ -902,6 +902,60 @@ def to_json(
         json_lines: bool = False,
         *,
         to_string: bool = False,
+    ) -> Optional[str]:
+        """
+        .. deprecated:: 0.13.12
+            Please use `write_json`
+        """
+        warnings.warn("'to_json' is deprecated. please use 'write_json'")
+        return self.write_json(
+            file, pretty, row_oriented, json_lines, to_string=to_string
+        )
+
+    @overload
+    def write_json(
+        self,
+        file: Optional[Union[IOBase, str, Path]] = ...,
+        pretty: bool = ...,
+        row_oriented: bool = ...,
+        json_lines: bool = ...,
+        *,
+        to_string: Literal[True],
+    ) -> str:
+        ...
+
+    @overload
+    def write_json(
+        self,
+        file: Optional[Union[IOBase, str, Path]] = ...,
+        pretty: bool = ...,
+        row_oriented: bool = ...,
+        json_lines: bool = ...,
+        *,
+        to_string: Literal[False] = ...,
+    ) -> None:
+        ...
+
+    @overload
+    def write_json(
+        self,
+        file: Optional[Union[IOBase, str, Path]] = ...,
+        pretty: bool = ...,
+        row_oriented: bool = ...,
+        json_lines: bool = ...,
+        *,
+        to_string: bool = ...,
+    ) -> Optional[str]:
+        ...
+
+    def write_json(
+        self,
+        file: Optional[Union[IOBase, str, Path]] = None,
+        pretty: bool = False,
+        row_oriented: bool = False,
+        json_lines: bool = False,
+        *,
+        to_string: bool = False,
     ) -> Optional[str]:
         """
         Serialize to JSON representation.
@@ -970,7 +1024,7 @@ def to_pandas(
         tbl = pa.Table.from_batches(record_batches)
         return tbl.to_pandas(*args, date_as_object=date_as_object, **kwargs)
 
-    def to_csv(
+    def write_csv(
         self,
         file: Optional[Union[TextIO, BytesIO, str, Path]] = None,
         has_header: bool = True,
@@ -984,7 +1038,7 @@ def to_csv(
         file
             File path to which the file should be written.
         has_header
-            Whether or not to include header in the CSV output.
+            Whether to include header in the CSV output.
         sep
             Separate CSV fields with this symbol.
 
@@ -998,7 +1052,7 @@ def to_csv(
         ...         "ham": ["a", "b", "c", "d", "e"],
         ...     }
         ... )
-        >>> df.to_csv("new_file.csv", sep=",")
+        >>> df.write_csv("new_file.csv", sep=",")
 
         """
         if file is None:
@@ -1012,7 +1066,20 @@ def to_csv(
         self._df.to_csv(file, has_header, ord(sep))
         return None
 
-    def to_avro(
+    def to_csv(
+        self,
+        file: Optional[Union[TextIO, BytesIO, str, Path]] = None,
+        has_header: bool = True,
+        sep: str = ",",
+    ) -> Optional[str]:
+        """
+        .. deprecated:: 0.13.12
+            Please use `write_csv`
+        """
+        warnings.warn("'to_csv' is deprecated. please use 'write_csv'")
+        return self.write_csv(file, has_header, sep)
+
+    def write_avro(
         self,
         file: Union[BinaryIO, BytesIO, str, Path],
         compression: Literal["uncompressed", "snappy", "deflate"] = "uncompressed",
@@ -1035,7 +1102,19 @@ def to_avro(
 
         self._df.to_avro(file, compression)
 
-    def to_ipc(
+    def to_avro(
+        self,
+        file: Union[BinaryIO, BytesIO, str, Path],
+        compression: Literal["uncompressed", "snappy", "deflate"] = "uncompressed",
+    ) -> None:
+        """
+        .. deprecated:: 0.13.12
+            Please use `write_avro`
+        """
+        warnings.warn("'to_avro' is deprecated. please use 'write_avro'")
+        return self.write_avro(file, compression)
+
+    def write_ipc(
         self,
         file: Union[BinaryIO, BytesIO, str, Path],
         compression: Optional[Literal["uncompressed", "lz4", "zstd"]] = "uncompressed",
@@ -1060,6 +1139,18 @@ def to_ipc(
 
         self._df.to_ipc(file, compression)
 
+    def to_ipc(
+        self,
+        file: Union[BinaryIO, BytesIO, str, Path],
+        compression: Optional[Literal["uncompressed", "lz4", "zstd"]] = "uncompressed",
+    ) -> None:
+        """
+        .. deprecated:: 0.13.12
+            Please use `write_ipc`
+        """
+        warnings.warn("'to_ipc' is deprecated. please use 'write_ipc'")
+        return self.write_ipc(file, compression)
+
     def to_dicts(self) -> List[Dict[str, Any]]:
         pydf = self._df
         names = self.columns
@@ -1177,7 +1268,7 @@ def transpose(
             df.columns = names
         return df
 
-    def to_parquet(
+    def write_parquet(
         self,
         file: Union[str, Path, BytesIO],
         compression: Optional[
@@ -1224,7 +1315,7 @@ def to_parquet(
         if use_pyarrow:
             if not _PYARROW_AVAILABLE:
                 raise ImportError(  # pragma: no cover
-                    "'pyarrow' is required when using 'to_parquet(..., use_pyarrow=True)'."
+                    "'pyarrow' is required when using 'write_parquet(..., use_pyarrow=True)'."
                 )
 
             tbl = self.to_arrow()
@@ -1251,6 +1342,28 @@ def to_parquet(
         else:
             self._df.to_parquet(file, compression, statistics)
 
+    def to_parquet(
+        self,
+        file: Union[str, Path, BytesIO],
+        compression: Optional[
+            Union[
+                Literal[
+                    "uncompressed", "snappy", "gzip", "lzo", "brotli", "lz4", "zstd"
+                ],
+                str,
+            ]
+        ] = "snappy",
+        statistics: bool = False,
+        use_pyarrow: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        """
+        .. deprecated:: 0.13.12
+            Please use `write_parquet`
+        """
+        warnings.warn("'to_parquet' is deprecated. please use 'write_parquet'")
+        return self.write_parquet(file, compression, statistics, use_pyarrow, **kwargs)
+
     def to_numpy(self) -> np.ndarray:
         """
         Convert DataFrame to a 2d numpy array.

diff --git a/py-polars/polars/internals/lazy_frame.py b/py-polars/polars/internals/lazy_frame.py
@@ -441,6 +441,8 @@ def collect(
         simplify_expression
             Run simplify expressions optimization.
         string_cache
+            This argument is deprecated. Please set the string cache globally.
+
             Use a global string cache in this query.
             This is needed if you want to join on categorical columns.
 
@@ -478,7 +480,7 @@ def fetch(
         predicate_pushdown: bool = True,
         projection_pushdown: bool = True,
         simplify_expression: bool = True,
-        string_cache: bool = True,
+        string_cache: bool = False,
         no_optimization: bool = False,
         slice_pushdown: bool = True,
     ) -> DF:
@@ -503,6 +505,8 @@ def fetch(
         simplify_expression
             Run simplify expressions optimization.
         string_cache
+            This argument is deprecated. Please set the string cache globally.
+
             Use a global string cache in this query.
             This is needed if you want to join on categorical columns.
         no_optimization

diff --git a/py-polars/tests/io/test_avro.py b/py-polars/tests/io/test_avro.py
@@ -21,7 +21,7 @@ def compressions() -> List[str]:
 def test_from_to_buffer(example_df: pl.DataFrame, compressions: List[str]) -> None:
     for compression in compressions:
         buf = io.BytesIO()
-        example_df.to_avro(buf, compression=compression)  # type: ignore
+        example_df.write_avro(buf, compression=compression)  # type: ignore
         buf.seek(0)
         read_df = pl.read_avro(buf)
         assert example_df.frame_equal(read_df)
@@ -33,7 +33,7 @@ def test_from_to_file(
     f = os.path.join(io_test_dir, "small.avro")
 
     for compression in compressions:
-        example_df.to_avro(f, compression=compression)  # type: ignore
+        example_df.write_avro(f, compression=compression)  # type: ignore
         df_read = pl.read_avro(str(f))
         assert example_df.frame_equal(df_read)
 
@@ -43,7 +43,7 @@ def test_select_columns() -> None:
     expected = pl.DataFrame({"b": [True, False, True], "c": ["a", "b", "c"]})
 
     f = io.BytesIO()
-    df.to_avro(f)
+    df.write_avro(f)
     f.seek(0)
 
     read_df = pl.read_avro(f, columns=["b", "c"])
@@ -55,7 +55,7 @@ def test_select_projection() -> None:
     expected = pl.DataFrame({"b": [True, False, True], "c": ["a", "b", "c"]})
 
     f = io.BytesIO()
-    df.to_avro(f)
+    df.write_avro(f)
     f.seek(0)
 
     read_df = pl.read_avro(f, columns=[1, 2])

diff --git a/py-polars/tests/io/test_csv.py b/py-polars/tests/io/test_csv.py
@@ -15,7 +15,7 @@
 
 def test_to_from_buffer(df: pl.DataFrame) -> None:
     buf = io.BytesIO()
-    df.to_csv(buf)
+    df.write_csv(buf)
     buf.seek(0)
 
     read_df = pl.read_csv(buf, parse_dates=True)
@@ -30,7 +30,7 @@ def test_to_from_file(io_test_dir: str, df: pl.DataFrame) -> None:
     df = df.drop("strings_nulls")
 
     f = os.path.join(io_test_dir, "small.csv")
-    df.to_csv(f)
+    df.write_csv(f)
 
     read_df = pl.read_csv(f, parse_dates=True)
 
@@ -353,7 +353,7 @@ def test_csv_schema_offset(foods_csv: str) -> None:
 def test_empty_string_missing_round_trip() -> None:
     df = pl.DataFrame({"varA": ["A", "", None], "varB": ["B", "", None]})
     f = io.BytesIO()
-    df.to_csv(f)
+    df.write_csv(f)
     f.seek(0)
     df_read = pl.read_csv(f)
     assert df.frame_equal(df_read)
@@ -362,7 +362,7 @@ def test_empty_string_missing_round_trip() -> None:
 def test_write_csv_delimiter() -> None:
     df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
     f = io.BytesIO()
-    df.to_csv(f, sep="\t")
+    df.write_csv(f, sep="\t")
     f.seek(0)
     assert f.read() == b"a\tb\n1\t1\n2\t2\n3\t3\n"