python [breaking]: rename some kwargs/names

pola-rs · Dec 9, 2021 · 222aa3b · 222aa3b
1 parent a074b73
commit 222aa3b
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 30 deletions.
diff --git a/py-polars/polars/internals/frame.py b/py-polars/polars/internals/frame.py
@@ -377,7 +377,7 @@ def read_csv(
         rechunk: bool = True,
         encoding: str = "utf8",
         n_threads: Optional[int] = None,
-        dtype: Union[Dict[str, Type[DataType]], tp.List[Type[DataType]], None] = None,
+        dtypes: Union[Dict[str, Type[DataType]], tp.List[Type[DataType]], None] = None,
         low_memory: bool = False,
         comment_char: Optional[str] = None,
         quote_char: Optional[str] = r'"',
@@ -419,7 +419,7 @@ def read_csv(
             Allowed encodings: `utf8`, `utf8-lossy`. Lossy means that invalid utf8 values are replaced with `�` character.
         n_threads
             Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system.
-        dtype
+        dtypes
             Overwrite the dtypes during inference.
         low_memory
             Reduce memory usage in expense of performance.
@@ -463,13 +463,13 @@ def read_csv(
 
         dtype_list: Optional[tp.List[Tuple[str, Type[DataType]]]] = None
         dtype_slice: Optional[tp.List[Type[DataType]]] = None
-        if dtype is not None:
-            if isinstance(dtype, dict):
+        if dtypes is not None:
+            if isinstance(dtypes, dict):
                 dtype_list = []
-                for k, v in dtype.items():
+                for k, v in dtypes.items():
                     dtype_list.append((k, py_type_to_dtype(v)))
-            elif isinstance(dtype, list):
-                dtype_slice = dtype
+            elif isinstance(dtypes, list):
+                dtype_slice = dtypes
             else:
                 raise ValueError("dtype arg should be list or dict")
 

diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py
@@ -147,7 +147,7 @@ def read_csv(
     rechunk: bool = True,
     encoding: str = "utf8",
     n_threads: Optional[int] = None,
-    dtype: Optional[Union[Dict[str, Type[DataType]], List[Type[DataType]]]] = None,
+    dtypes: Optional[Union[Dict[str, Type[DataType]], List[Type[DataType]]]] = None,
     new_columns: Optional[List[str]] = None,
     use_pyarrow: bool = False,
     low_memory: bool = False,
@@ -156,6 +156,7 @@ def read_csv(
     storage_options: Optional[Dict] = None,
     null_values: Optional[Union[str, List[str], Dict[str, str]]] = None,
     parse_dates: bool = False,
+    **kwargs: Any,
 ) -> DataFrame:
     """
     Read into a DataFrame from a csv file.
@@ -197,7 +198,7 @@ def read_csv(
         - "utf8-lossy"
     n_threads
         Number of threads to use in csv parsing. Defaults to the number of physical cpu's of your system.
-    dtype
+    dtypes
         Overwrite the dtypes during inference.
     new_columns
         Rename columns to these right after parsing. If the given list is shorted than the width of the DataFrame the
@@ -229,6 +230,9 @@ def read_csv(
     -------
     DataFrame
     """
+    # for backward compatibility
+    dtypes = kwargs.get("dtypes", dtypes)
+
     if isinstance(file, bytes) and len(file) == 0:
         raise ValueError("no date in bytes")
 
@@ -249,7 +253,7 @@ def read_csv(
 
     if (
         use_pyarrow
-        and dtype is None
+        and dtypes is None
         and stop_after_n_rows is None
         and n_threads is None
         and encoding == "utf8"
@@ -296,11 +300,11 @@ def read_csv(
             return update_columns(df, new_columns)  # type: ignore
         return df  # type: ignore
 
-    if new_columns and dtype and isinstance(dtype, dict):
+    if new_columns and dtypes and isinstance(dtypes, dict):
         current_columns = None
 
         # As new column names are not available yet while parsing the CSV file, rename column names in
-        # dtype to old names (if possible) so they can be used during CSV parsing.
+        # dtypes to old names (if possible) so they can be used during CSV parsing.
         if columns:
             if len(columns) < len(new_columns):
                 raise ValueError(
@@ -330,28 +334,28 @@ def read_csv(
         else:
             # When a header is present, column names are not known yet.
 
-            if len(dtype) <= len(new_columns):
-                # If dtype dictionary contains less or same amount of values than new column names
-                # a list of dtypes can be created if all listed column names in dtype dictionary
+            if len(dtypes) <= len(new_columns):
+                # If dtypes dictionary contains less or same amount of values than new column names
+                # a list of dtypes can be created if all listed column names in dtypes dictionary
                 # appear in the first consecutive new column names.
                 dtype_list = [
-                    dtype[new_column_name]
-                    for new_column_name in new_columns[0 : len(dtype)]
-                    if new_column_name in dtype
+                    dtypes[new_column_name]
+                    for new_column_name in new_columns[0 : len(dtypes)]
+                    if new_column_name in dtypes
                 ]
 
-                if len(dtype_list) == len(dtype):
-                    dtype = dtype_list
+                if len(dtype_list) == len(dtypes):
+                    dtypes = dtype_list
 
-        if current_columns and isinstance(dtype, dict):
+        if current_columns and isinstance(dtypes, dict):
             new_to_current = {
                 new_column: current_column
                 for new_column, current_column in zip(new_columns, current_columns)
             }
             # Change new column names to current column names in dtype.
-            dtype = {
+            dtypes = {
                 new_to_current.get(column_name, column_name): column_dtype
-                for column_name, column_dtype in dtype.items()
+                for column_name, column_dtype in dtypes.items()
             }
 
     with _prepare_file_arg(file, **storage_options) as data:
@@ -369,7 +373,7 @@ def read_csv(
             rechunk=rechunk,
             encoding=encoding,
             n_threads=n_threads,
-            dtype=dtype,
+            dtypes=dtypes,
             low_memory=low_memory,
             comment_char=comment_char,
             quote_char=quote_char,

diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py
@@ -700,7 +700,7 @@ def test_read_csv_categorical() -> None:
     f = BytesIO()
     f.write(b"col1,col2,col3,col4,col5,col6\n'foo',2,3,4,5,6\n'bar',8,9,10,11,12")
     f.seek(0)
-    df = pl.DataFrame.read_csv(f, has_headers=True, dtype={"col1": pl.Categorical})
+    df = pl.DataFrame.read_csv(f, has_headers=True, dtypes={"col1": pl.Categorical})
     assert df["col1"].dtype == pl.Categorical
 
 

diff --git a/py-polars/tests/test_io.py b/py-polars/tests/test_io.py
@@ -189,7 +189,7 @@ def test_partial_dtype_overwrite() -> None:
 1,2,3
 """
     f = io.StringIO(csv)
-    df = pl.read_csv(f, dtype=[pl.Utf8])
+    df = pl.read_csv(f, dtypes=[pl.Utf8])
     assert df.dtypes == [pl.Utf8, pl.Int64, pl.Int64]
 
 
@@ -216,7 +216,7 @@ def test_column_rename_and_dtype_overwrite() -> None:
     df = pl.read_csv(
         f,
         new_columns=["A", "B", "C"],
-        dtype={"A": pl.Utf8, "B": pl.Int64, "C": pl.Float32},
+        dtypes={"A": pl.Utf8, "B": pl.Int64, "C": pl.Float32},
     )
     assert df.dtypes == [pl.Utf8, pl.Int64, pl.Float32]
 
@@ -225,7 +225,7 @@ def test_column_rename_and_dtype_overwrite() -> None:
         f,
         columns=["a", "c"],
         new_columns=["A", "C"],
-        dtype={"A": pl.Utf8, "C": pl.Float32},
+        dtypes={"A": pl.Utf8, "C": pl.Float32},
     )
     assert df.dtypes == [pl.Utf8, pl.Float32]
 
@@ -237,7 +237,7 @@ def test_column_rename_and_dtype_overwrite() -> None:
     df = pl.read_csv(
         f,
         new_columns=["A", "B", "C"],
-        dtype={"A": pl.Utf8, "C": pl.Float32},
+        dtypes={"A": pl.Utf8, "C": pl.Float32},
         has_headers=False,
     )
     assert df.dtypes == [pl.Utf8, pl.Int64, pl.Float32]
@@ -396,7 +396,7 @@ def test_ignore_parse_dates() -> None:
     dtypes: Dict[str, Type[DataType]] = {
         k: pl.Utf8 for k in headers
     }  # Forces Utf8 type for every column
-    df = pl.read_csv(csv, columns=headers, dtype=dtypes)
+    df = pl.read_csv(csv, columns=headers, dtypes=dtypes)
     assert df.dtypes == [pl.Utf8, pl.Utf8, pl.Utf8]