Overload pl.from_arrow type hints (#4236)

pola-rs · Aug 3, 2022 · 20032d1 · 20032d1
1 parent c4fc26c
commit 20032d1
Show file tree

Hide file tree

Showing 7 changed files with 27 additions and 18 deletions.
diff --git a/py-polars/polars/convert.py b/py-polars/polars/convert.py
@@ -220,7 +220,18 @@ def from_numpy(
     return DataFrame._from_numpy(data, columns=columns, orient=orient)
 
 
-# Note that we cannot overload because pyarrow has no stubs :(
+@overload
+def from_arrow(a: pa.Table, rechunk: bool = True) -> DataFrame:
+    ...
+
+
+@overload
+def from_arrow(  # type: ignore[misc]
+    a: pa.Array | pa.ChunkedArray, rechunk: bool = True
+) -> Series:
+    ...
+
+
 def from_arrow(
     a: pa.Table | pa.Array | pa.ChunkedArray, rechunk: bool = True
 ) -> DataFrame | Series:

diff --git a/py-polars/polars/internals/anonymous_scan.py b/py-polars/polars/internals/anonymous_scan.py
@@ -53,9 +53,7 @@ def _scan_ds_impl(
     """
     if not _PYARROW_AVAILABLE:  # pragma: no cover
         raise ImportError("'pyarrow' is required for scanning from pyarrow datasets.")
-    return pl.from_arrow(  # type: ignore[return-value]
-        ds.to_table(columns=with_columns)
-    )
+    return pl.from_arrow(ds.to_table(columns=with_columns))
 
 
 def _scan_ds(ds: pa.dataset.dataset) -> pli.LazyFrame:

diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py
@@ -3,7 +3,7 @@
 
 from io import BytesIO, IOBase, StringIO
 from pathlib import Path
-from typing import Any, BinaryIO, Callable, Mapping, TextIO, cast
+from typing import Any, BinaryIO, Callable, Mapping, TextIO
 
 from polars.utils import format_path, handle_projection_columns
 
@@ -271,7 +271,7 @@ def read_csv(
                 [f"column_{int(column[1:]) + 1}" for column in tbl.column_names]
             )
 
-        df = cast(DataFrame, from_arrow(tbl, rechunk))
+        df = from_arrow(tbl, rechunk)
         if new_columns:
             return _update_columns(df, new_columns)
         return df
@@ -909,7 +909,7 @@ def read_parquet(
                     " 'read_parquet(..., use_pyarrow=True)'."
                 )
 
-            return from_arrow(  # type: ignore[return-value]
+            return from_arrow(
                 pa.parquet.read_table(
                     source_prep,
                     memory_map=memory_map,
@@ -1029,7 +1029,7 @@ def read_sql(
             partition_num=partition_num,
             protocol=protocol,
         )
-        return cast(DataFrame, from_arrow(tbl))
+        return from_arrow(tbl)
     else:
         raise ImportError(
             "connectorx is not installed. Please run `pip install connectorx>=0.2.2`."

diff --git a/py-polars/tests/io/test_other.py b/py-polars/tests/io/test_other.py
@@ -22,7 +22,7 @@ def test_categorical_round_trip() -> None:
     tbl = df.to_arrow()
     assert "dictionary" in str(tbl["cat"].type)
 
-    df2: pl.DataFrame = pl.from_arrow(tbl)  # type: ignore[assignment]
+    df2 = pl.from_arrow(tbl)
     assert df2.dtypes == [pl.Int64, pl.Categorical]
 
 

diff --git a/py-polars/tests/test_datelike.py b/py-polars/tests/test_datelike.py
@@ -531,7 +531,7 @@ def test_microseconds_accuracy() -> None:
         ),
     )
 
-    assert pl.from_arrow(a)["timestamp"].to_list() == timestamps  # type: ignore[index]
+    assert pl.from_arrow(a)["timestamp"].to_list() == timestamps
 
 
 def test_cast_time_units() -> None:

diff --git a/py-polars/tests/test_df.py b/py-polars/tests/test_df.py
@@ -742,7 +742,7 @@ def test_from_arrow_table() -> None:
     data = {"a": [1, 2], "b": [1, 2]}
     tbl = pa.table(data)
 
-    df: pl.DataFrame = pl.from_arrow(tbl)  # type: ignore[assignment]
+    df = pl.from_arrow(tbl)
     df.frame_equal(pl.DataFrame(data))
 
 
@@ -800,7 +800,7 @@ def test_column_names() -> None:
             "b": pa.array([1, 2, 3, 4, 5], pa.int64()),
         }
     )
-    df: pl.DataFrame = pl.from_arrow(tbl)  # type: ignore[assignment]
+    df = pl.from_arrow(tbl)
     assert df.columns == ["a", "b"]
 
 

diff --git a/py-polars/tests/test_interop.py b/py-polars/tests/test_interop.py
@@ -363,19 +363,19 @@ def test_from_empty_pandas_strings() -> None:
 
 def test_from_empty_arrow() -> None:
     df = pl.from_arrow(pa.table(pd.DataFrame({"a": [], "b": []})))
-    assert df.columns == ["a", "b"]  # type: ignore[union-attr]
-    assert df.dtypes == [pl.Float64, pl.Float64]  # type: ignore[union-attr]
+    assert df.columns == ["a", "b"]
+    assert df.dtypes == [pl.Float64, pl.Float64]
 
     # 2705
     df1 = pd.DataFrame(columns=["b"], dtype=float)
     tbl = pa.Table.from_pandas(df1)
     out = pl.from_arrow(tbl)
-    assert out.columns == ["b", "__index_level_0__"]  # type: ignore[union-attr]
-    assert out.dtypes == [pl.Float64, pl.Utf8]  # type: ignore[union-attr]
+    assert out.columns == ["b", "__index_level_0__"]
+    assert out.dtypes == [pl.Float64, pl.Utf8]
     tbl = pa.Table.from_pandas(df1, preserve_index=False)
     out = pl.from_arrow(tbl)
-    assert out.columns == ["b"]  # type: ignore[union-attr]
-    assert out.dtypes == [pl.Float64]  # type: ignore[union-attr]
+    assert out.columns == ["b"]
+    assert out.dtypes == [pl.Float64]
 
 
 def test_from_null_column() -> None: