Skip to content

Commit

Permalink
Tighten mypy config (#2197)
Browse files Browse the repository at this point in the history
* Tighten mypy config

Two configs:
* `warn_unused_ignores = true` => flags all `#type: ignore` items in the code that are not needed (anymore)
* `show_error_codes = true` => will print out the error code, so we can more narrowly define what to ignore in the future, if needed

* Fix typing discrepancies between py3.7 and py3.10

* Break out list comprehension in several functions

* Revert to class() and issubclass() calls

Typing.cast calls are needed to resolve the differences between py3.7 and py3.10
  • Loading branch information
zundertj committed Dec 28, 2021
1 parent 3008586 commit e66d7c9
Show file tree
Hide file tree
Showing 10 changed files with 43 additions and 35 deletions.
2 changes: 1 addition & 1 deletion py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def exclude(
"""
if isinstance(columns, str):
columns = [columns] # type: ignore
columns = [columns]
return wrap_expr(self._pyexpr.exclude(columns))
elif not isinstance(columns, list) and issubclass(columns, DataType): # type: ignore
columns = [columns] # type: ignore
Expand Down
32 changes: 18 additions & 14 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,7 @@ def _pos_idx(self, idx: int, dim: int) -> int:
# __getitem__() mostly returns a dataframe. The major exception is when a string is passed in. Note that there are
# more subtle cases possible where a non-string value leads to a Series.
@overload
def __getitem__(self, item: str) -> "pli.Series": # type: ignore
def __getitem__(self, item: str) -> "pli.Series":
...

@overload
Expand Down Expand Up @@ -1272,7 +1272,7 @@ def __getitem__(
series_list = [self.to_series(i) for i in col_selection]
df = DataFrame(series_list)
return df[row_selection]
df = self.__getitem__(col_selection) # type: ignore
df = self.__getitem__(col_selection)
return df.__getitem__(row_selection)

# select single column
Expand All @@ -1292,7 +1292,7 @@ def __getitem__(
if isinstance(item, slice):
# special case df[::-1]
if item.start is None and item.stop is None and item.step == -1:
return self.select(pli.col("*").reverse()) # type: ignore
return self.select(pli.col("*").reverse())

if getattr(item, "end", False):
raise ValueError("A slice with steps larger than 1 is not supported.")
Expand All @@ -1312,7 +1312,7 @@ def __getitem__(
else:
# df[start:stop:step]
return self.select(
pli.col("*").slice(start, length).take_every(item.step) # type: ignore
pli.col("*").slice(start, length).take_every(item.step)
)

# select rows by numpy mask or index
Expand Down Expand Up @@ -1743,10 +1743,10 @@ def describe_cast(self: "DataFrame") -> "DataFrame":
describe_cast(self.median()),
]
)
summary.insert_at_idx( # type: ignore
summary.insert_at_idx(
0, pli.Series("describe", ["mean", "std", "min", "max", "median"])
)
return summary # type: ignore
return summary

def replace_at_idx(self, index: int, series: "pli.Series") -> None:
"""
Expand Down Expand Up @@ -4218,20 +4218,24 @@ def agg(
└─────┴─────┘
"""

# a single list comprehension would be cleaner, but mypy complains on different
# lines for py3.7 vs py3.10 about typing errors, so this is the same logic,
# but broken down into two small functions
def _str_to_list(y: Any) -> Any:
return [y] if isinstance(y, str) else y

def _wrangle(x: Any) -> list:
return [(xi[0], _str_to_list(xi[1])) for xi in x]

if isinstance(column_to_agg, pli.Expr):
column_to_agg = [column_to_agg]
if isinstance(column_to_agg, dict):
column_to_agg = [
(column, [agg] if isinstance(agg, str) else agg)
for (column, agg) in column_to_agg.items()
]
column_to_agg = _wrangle(column_to_agg.items())
elif isinstance(column_to_agg, list):

if isinstance(column_to_agg[0], tuple):
column_to_agg = [ # type: ignore[misc]
(column, [agg] if isinstance(agg, str) else agg) # type: ignore[misc]
for (column, agg) in column_to_agg
]
column_to_agg = _wrangle(column_to_agg)

elif isinstance(column_to_agg[0], pli.Expr):
return (
Expand Down
17 changes: 10 additions & 7 deletions py-polars/polars/internals/lazy_functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import date, datetime, timezone
from inspect import isclass
from typing import Any, Callable, List, Optional, Sequence, Type, Union, overload
from typing import Any, Callable, List, Optional, Sequence, Type, Union, cast, overload

import numpy as np

Expand Down Expand Up @@ -126,8 +126,11 @@ def col(
if isinstance(name, pli.Series):
name = name.to_list() # type: ignore

if isclass(name) and issubclass(name, DataType): # type: ignore
name = [name] # type: ignore
# note: we need the typing.cast call here twice to make mypy happy under Python 3.7
# On Python 3.10, it is not needed. We use cast as it works across versions, ignoring
# the typing error would lead to unneeded ignores under Python 3.10.
if isclass(name) and issubclass(cast(type, name), DataType):
name = [cast(type, name)]

if isinstance(name, list):
if len(name) == 0 or isinstance(name[0], str):
Expand Down Expand Up @@ -949,13 +952,13 @@ def _datetime(
day_expr = pli.expr_to_lit_or_expr(day, str_to_lit=False)

if hour is not None:
hour = pli.expr_to_lit_or_expr(hour, str_to_lit=False)._pyexpr # type: ignore
hour = pli.expr_to_lit_or_expr(hour, str_to_lit=False)._pyexpr
if minute is not None:
minute = pli.expr_to_lit_or_expr(minute, str_to_lit=False)._pyexpr # type: ignore
minute = pli.expr_to_lit_or_expr(minute, str_to_lit=False)._pyexpr
if second is not None:
second = pli.expr_to_lit_or_expr(second, str_to_lit=False)._pyexpr # type: ignore
second = pli.expr_to_lit_or_expr(second, str_to_lit=False)._pyexpr
if millisecond is not None:
millisecond = pli.expr_to_lit_or_expr(millisecond, str_to_lit=False)._pyexpr # type: ignore
millisecond = pli.expr_to_lit_or_expr(millisecond, str_to_lit=False)._pyexpr
return pli.wrap_expr(
py_datetime(
year_expr._pyexpr,
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3531,7 +3531,7 @@ def concat(self, other: Union[List[Series], Series]) -> "Series":
names.insert(0, s.name)
df = pli.DataFrame(other)
df.insert_at_idx(0, s)
return df.select(pli.concat_list(names))[s.name] # type: ignore
return df.select(pli.concat_list(names))[s.name]

def get(self, index: int) -> "Series":
"""
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,9 @@ def read_csv(
if not has_header:
# Convert 'column_1', 'column_2', ... column names to 'f0', 'f1', ... column names for pyarrow,
# if CSV file does not contain a header.
include_columns = [f"f{int(column[7:]) - 1}" for column in columns] # type: ignore
include_columns = [f"f{int(column[7:]) - 1}" for column in columns]
else:
include_columns = columns # type: ignore
include_columns = columns

if not columns and projection:
# Convert column indices from projection to 'f0', 'f1', ... column names for pyarrow.
Expand Down
2 changes: 2 additions & 0 deletions py-polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ profile = "black"

[tool.mypy]
disallow_untyped_defs = true
warn_unused_ignores = true
show_error_codes = true
files = ["polars", "tests"]

[[tool.mypy.overrides]]
Expand Down
11 changes: 5 additions & 6 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,8 +743,8 @@ def test_from_pandas_nan_to_none() -> None:
"nulls": [None, np.nan, np.nan],
}
)
out_true: pl.DataFrame = pl.from_pandas(df) # type: ignore
out_false: pl.DataFrame = pl.from_pandas(df, nan_to_none=False) # type: ignore
out_true = pl.from_pandas(df)
out_false = pl.from_pandas(df, nan_to_none=False)
df.loc[2, "nulls"] = pd.NA
assert all(val is None for val in out_true["nulls"])
assert all(np.isnan(val) for val in out_false["nulls"][1:])
Expand Down Expand Up @@ -785,7 +785,7 @@ def test_concat() -> None:
assert a.shape == (2, 2)

with pytest.raises(ValueError):
_ = pl.concat([]) # type: ignore
_ = pl.concat([])

with pytest.raises(ValueError):
pl.concat([df, df], how="rubbish")
Expand Down Expand Up @@ -1059,8 +1059,7 @@ def test_rename(df: pl.DataFrame) -> None:
def test_to_json(df: pl.DataFrame) -> None:
# text based conversion loses time info
df = df.select(pl.all().exclude(["cat", "time"]))
s: str = df.to_json(to_string=True) # type: ignore
# TODO add overload on to_json()
s = df.to_json(to_string=True)
out = pl.read_json(s)
assert df.frame_equal(out, null_equal=True)

Expand Down Expand Up @@ -1124,7 +1123,7 @@ def test_join_dates() -> None:
)
dts = (
pl.from_pandas(date_times)
.apply(lambda x: x + np.random.randint(1_000 * 60, 60_000 * 60)) # type: ignore
.apply(lambda x: x + np.random.randint(1_000 * 60, 60_000 * 60))
.cast(pl.Datetime)
)

Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_from_arrow() -> None:

# if not a PyArrow type, raise a ValueError
with pytest.raises(ValueError):
_ = pl.from_arrow([1, 2]) # type: ignore
_ = pl.from_arrow([1, 2])


def test_from_pandas_dataframe() -> None:
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_select_columns_and_projection_from_buffer() -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": [True, False, True], "c": ["a", "b", "c"]})
expected = pl.DataFrame({"b": [True, False, True], "c": ["a", "b", "c"]})
for to_fn, from_fn in zip(
[df.to_parquet, df.to_ipc], [pl.read_parquet, pl.read_ipc] # type: ignore
[df.to_parquet, df.to_ipc], [pl.read_parquet, pl.read_ipc]
):
f = io.BytesIO()
to_fn(f) # type: ignore
Expand All @@ -58,7 +58,7 @@ def test_select_columns_and_projection_from_buffer() -> None:
assert df_1.frame_equal(expected)

for to_fn, from_fn in zip(
[df.to_parquet, df.to_ipc], [pl.read_parquet, pl.read_ipc] # type: ignore
[df.to_parquet, df.to_ipc], [pl.read_parquet, pl.read_ipc]
):
f = io.BytesIO()
to_fn(f) # type: ignore
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ def test_abs() -> None:
)
testing.assert_series_equal(
pl.select(pl.lit(s).abs()).to_series(), pl.Series([1.0, 2.0, 3.0, 4.0])
) # type: ignore
)


def test_to_dummies() -> None:
Expand Down

0 comments on commit e66d7c9

Please sign in to comment.