Skip to content

Commit

Permalink
TYP: Typ part of python_parser (#44406)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Nov 28, 2021
1 parent 7f06a8a commit 3d99081
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 57 deletions.
7 changes: 6 additions & 1 deletion pandas/io/parsers/arrow_parser_wrapper.py
Expand Up @@ -110,7 +110,12 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
multi_index_named = False
frame.columns = self.names
# we only need the frame not the names
frame.columns, frame = self._do_date_conversions(frame.columns, frame)
# error: Incompatible types in assignment (expression has type
# "Union[List[Union[Union[str, int, float, bool], Union[Period, Timestamp,
# Timedelta, Any]]], Index]", variable has type "Index") [assignment]
frame.columns, frame = self._do_date_conversions( # type: ignore[assignment]
frame.columns, frame
)
if self.index_col is not None:
for i, item in enumerate(self.index_col):
if is_integer(item):
Expand Down
63 changes: 53 additions & 10 deletions pandas/io/parsers/base_parser.py
Expand Up @@ -10,10 +10,13 @@
Any,
Callable,
DefaultDict,
Hashable,
Iterable,
Mapping,
Sequence,
cast,
final,
overload,
)
import warnings

Expand Down Expand Up @@ -56,6 +59,7 @@
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.missing import isna

from pandas import DataFrame
from pandas.core import algorithms
from pandas.core.arrays import Categorical
from pandas.core.indexes.api import (
Expand Down Expand Up @@ -241,7 +245,7 @@ def _open_handles(
errors=kwds.get("encoding_errors", "strict"),
)

def _validate_parse_dates_presence(self, columns: list[str]) -> Iterable:
def _validate_parse_dates_presence(self, columns: Sequence[Hashable]) -> Iterable:
"""
Check if parse_dates are in columns.
Expand Down Expand Up @@ -337,11 +341,24 @@ def _should_parse_dates(self, i: int) -> bool:

@final
def _extract_multi_indexer_columns(
self, header, index_names, passed_names: bool = False
self,
header,
index_names: list | None,
passed_names: bool = False,
):
"""
extract and return the names, index_names, col_names
header is a list-of-lists returned from the parsers
Extract and return the names, index_names, col_names if the column
names are a MultiIndex.
Parameters
----------
header: list of lists
The header rows
index_names: list, optional
The names of the future index
passed_names: bool, default False
A flag specifying if names where passed
"""
if len(header) < 2:
return header[0], index_names, None, passed_names
Expand Down Expand Up @@ -400,15 +417,15 @@ def extract(r):
return names, index_names, col_names, passed_names

@final
def _maybe_dedup_names(self, names):
def _maybe_dedup_names(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
# see gh-7160 and gh-9424: this helps to provide
# immediate alleviation of the duplicate names
# issue and appears to be satisfactory to users,
# but ultimately, not needing to butcher the names
# would be nice!
if self.mangle_dupe_cols:
names = list(names) # so we can index
counts: DefaultDict[int | str | tuple, int] = defaultdict(int)
counts: DefaultDict[Hashable, int] = defaultdict(int)
is_potential_mi = _is_potential_multi_index(names, self.index_col)

for i, col in enumerate(names):
Expand All @@ -418,6 +435,8 @@ def _maybe_dedup_names(self, names):
counts[col] = cur_count + 1

if is_potential_mi:
# for mypy
assert isinstance(col, tuple)
col = col[:-1] + (f"{col[-1]}.{cur_count}",)
else:
col = f"{col}.{cur_count}"
Expand Down Expand Up @@ -572,7 +591,7 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
@final
def _convert_to_ndarrays(
self,
dct: dict,
dct: Mapping,
na_values,
na_fvalues,
verbose: bool = False,
Expand Down Expand Up @@ -664,7 +683,7 @@ def _convert_to_ndarrays(

@final
def _set_noconvert_dtype_columns(
self, col_indices: list[int], names: list[int | str | tuple]
self, col_indices: list[int], names: Sequence[Hashable]
) -> set[int]:
"""
Set the columns that should not undergo dtype conversions.
Expand Down Expand Up @@ -848,7 +867,27 @@ def _cast_types(self, values, cast_type, column):
) from err
return values

def _do_date_conversions(self, names, data):
@overload
def _do_date_conversions(
self,
names: Index,
data: DataFrame,
) -> tuple[Sequence[Hashable] | Index, DataFrame]:
...

@overload
def _do_date_conversions(
self,
names: Sequence[Hashable],
data: Mapping[Hashable, ArrayLike],
) -> tuple[Sequence[Hashable], Mapping[Hashable, ArrayLike]]:
...

def _do_date_conversions(
self,
names: Sequence[Hashable] | Index,
data: Mapping[Hashable, ArrayLike] | DataFrame,
) -> tuple[Sequence[Hashable] | Index, Mapping[Hashable, ArrayLike] | DataFrame]:
# returns data, columns

if self.parse_dates is not None:
Expand All @@ -864,7 +903,11 @@ def _do_date_conversions(self, names, data):

return names, data

def _check_data_length(self, columns: list[str], data: list[ArrayLike]) -> None:
def _check_data_length(
self,
columns: Sequence[Hashable],
data: Sequence[ArrayLike],
) -> None:
"""Checks if length of data is equal to length of column names.
One set of trailing commas is allowed. self.index_col not False
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/parsers/c_parser_wrapper.py
Expand Up @@ -279,7 +279,7 @@ def read(self, nrows=None):
data_tups = sorted(data.items())
data = {k: v for k, (i, v) in zip(names, data_tups)}

names, data = self._do_date_conversions(names, data)
names, date_data = self._do_date_conversions(names, data)

else:
# rename dict keys
Expand All @@ -302,13 +302,13 @@ def read(self, nrows=None):

data = {k: v for k, (i, v) in zip(names, data_tups)}

names, data = self._do_date_conversions(names, data)
index, names = self._make_index(data, alldata, names)
names, date_data = self._do_date_conversions(names, data)
index, names = self._make_index(date_data, alldata, names)

# maybe create a mi on the columns
names = self._maybe_make_multi_index_columns(names, self.col_names)

return index, names, data
return index, names, date_data

def _filter_usecols(self, names):
# hackish
Expand Down

0 comments on commit 3d99081

Please sign in to comment.