-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
Zip Strict specification for pandas/core/indexes
#62533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
krishna-datta
wants to merge
4
commits into
pandas-dev:main
Choose a base branch
from
krishna-datta:addB905Check
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+27
−26
Open
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -720,7 +720,7 @@ def from_frame( | |
if not isinstance(df, ABCDataFrame): | ||
raise TypeError("Input must be a DataFrame") | ||
|
||
column_names, columns = zip(*df.items()) | ||
column_names, columns = zip(*df.items(), strict=True) | ||
names = column_names if names is None else names | ||
return cls.from_arrays(columns, sortorder=sortorder, names=names) | ||
|
||
|
@@ -878,7 +878,10 @@ def levels(self) -> FrozenList: | |
# Use cache_readonly to ensure that self.get_locs doesn't repeatedly | ||
# create new IndexEngine | ||
# https://github.com/pandas-dev/pandas/issues/31648 | ||
result = [x._rename(name=name) for x, name in zip(self._levels, self._names)] | ||
result = [ | ||
x._rename(name=name) | ||
for x, name in zip(self._levels, self._names, strict=True) | ||
] | ||
for level in result: | ||
# disallow midx.levels[0].name = "foo" | ||
level._no_setting_name = True | ||
|
@@ -912,7 +915,7 @@ def _set_levels( | |
else: | ||
level_numbers = [self._get_level_number(lev) for lev in level] | ||
new_levels_list = list(self._levels) | ||
for lev_num, lev in zip(level_numbers, levels): | ||
for lev_num, lev in zip(level_numbers, levels, strict=False): | ||
new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() | ||
new_levels = FrozenList(new_levels_list) | ||
|
||
|
@@ -1148,13 +1151,13 @@ def _set_codes( | |
if level is None: | ||
new_codes = FrozenList( | ||
_coerce_indexer_frozen(level_codes, lev, copy=copy).view() | ||
for lev, level_codes in zip(self._levels, codes) | ||
for lev, level_codes in zip(self._levels, codes, strict=False) | ||
) | ||
level_numbers = range(len(new_codes)) | ||
else: | ||
level_numbers = [self._get_level_number(lev) for lev in level] | ||
new_codes_list = list(self._codes) | ||
for lev_num, level_codes in zip(level_numbers, codes): | ||
for lev_num, level_codes in zip(level_numbers, codes, strict=False): | ||
lev = self.levels[lev_num] | ||
new_codes_list[lev_num] = _coerce_indexer_frozen( | ||
level_codes, lev, copy=copy | ||
|
@@ -1478,7 +1481,7 @@ def _formatter_func(self, tup): | |
Formats each item in tup according to its level's formatter function. | ||
""" | ||
formatter_funcs = (level._formatter_func for level in self.levels) | ||
return tuple(func(val) for func, val in zip(formatter_funcs, tup)) | ||
return tuple(func(val) for func, val in zip(formatter_funcs, tup, strict=False)) | ||
|
||
def _get_values_for_csv( | ||
self, *, na_rep: str = "nan", **kwargs | ||
|
@@ -1487,7 +1490,7 @@ def _get_values_for_csv( | |
new_codes = [] | ||
|
||
# go through the levels and format them | ||
for level, level_codes in zip(self.levels, self.codes): | ||
for level, level_codes in zip(self.levels, self.codes, strict=False): | ||
level_strs = level._get_values_for_csv(na_rep=na_rep, **kwargs) | ||
# add nan values, if there are any | ||
mask = level_codes == -1 | ||
|
@@ -1527,7 +1530,7 @@ def _format_multi( | |
return [] | ||
|
||
stringified_levels = [] | ||
for lev, level_codes in zip(self.levels, self.codes): | ||
for lev, level_codes in zip(self.levels, self.codes, strict=False): | ||
na = _get_na_rep(lev.dtype) | ||
|
||
if len(lev) > 0: | ||
|
@@ -1550,7 +1553,7 @@ def _format_multi( | |
stringified_levels.append(formatted) | ||
|
||
result_levels = [] | ||
for lev, lev_name in zip(stringified_levels, self.names): | ||
for lev, lev_name in zip(stringified_levels, self.names, strict=False): | ||
level = [] | ||
|
||
if include_names: | ||
|
@@ -1627,7 +1630,7 @@ def _set_names(self, names, *, level=None) -> None: | |
level = (self._get_level_number(lev) for lev in level) | ||
|
||
# set the name | ||
for lev, name in zip(level, names): | ||
for lev, name in zip(level, names, strict=False): | ||
if name is not None: | ||
# GH 20527 | ||
# All items in 'names' need to be hashable: | ||
|
@@ -2094,7 +2097,7 @@ def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIn | |
new_levels = [] | ||
new_codes = [] | ||
|
||
for lev, level_codes in zip(self.levels, self.codes): | ||
for lev, level_codes in zip(self.levels, self.codes, strict=False): | ||
if not lev.is_monotonic_increasing: | ||
try: | ||
# indexer to reorder the levels | ||
|
@@ -2173,7 +2176,7 @@ def remove_unused_levels(self) -> MultiIndex: | |
new_codes = [] | ||
|
||
changed = False | ||
for lev, level_codes in zip(self.levels, self.codes): | ||
for lev, level_codes in zip(self.levels, self.codes, strict=False): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What fails when this is |
||
# Since few levels are typically unused, bincount() is more | ||
# efficient than unique() - however it only accepts positive values | ||
# (and drops order): | ||
|
@@ -2240,7 +2243,7 @@ def __getitem__(self, key): | |
key = com.cast_scalar_indexer(key) | ||
|
||
retval = [] | ||
for lev, level_codes in zip(self.levels, self.codes): | ||
for lev, level_codes in zip(self.levels, self.codes, strict=False): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What fails when this is |
||
if level_codes[key] == -1: | ||
retval.append(np.nan) | ||
else: | ||
|
@@ -3078,7 +3081,7 @@ def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left" | |
|
||
n = len(tup) | ||
start, end = 0, len(self) | ||
zipped = zip(tup, self.levels, self.codes) | ||
zipped = zip(tup, self.levels, self.codes, strict=False) | ||
for k, (lab, lev, level_codes) in enumerate(zipped): | ||
section = level_codes[start:end] | ||
|
||
|
@@ -3362,7 +3365,7 @@ def maybe_mi_droplevels(indexer, levels): | |
"Key for location must have same length as number of levels" | ||
) | ||
result = None | ||
for lev, k in zip(level, key): | ||
for lev, k in zip(level, key, strict=False): | ||
loc, new_index = self._get_loc_level(k, level=lev) | ||
if isinstance(loc, slice): | ||
mask = np.zeros(len(self), dtype=bool) | ||
|
@@ -3948,7 +3951,7 @@ def _union(self, other, sort) -> MultiIndex: | |
if isinstance(result, MultiIndex): | ||
return result | ||
return MultiIndex.from_arrays( | ||
zip(*result), sortorder=None, names=result_names | ||
zip(*result, strict=False), sortorder=None, names=result_names | ||
) | ||
|
||
else: | ||
|
@@ -3995,7 +3998,7 @@ def _maybe_match_names(self, other): | |
if len(self.names) != len(other.names): | ||
return [None] * len(self.names) | ||
names = [] | ||
for a_name, b_name in zip(self.names, other.names): | ||
for a_name, b_name in zip(self.names, other.names, strict=False): | ||
if a_name == b_name: | ||
names.append(a_name) | ||
else: | ||
|
@@ -4092,7 +4095,7 @@ def putmask(self, mask, value: MultiIndex) -> MultiIndex: | |
new_codes = [] | ||
|
||
for i, (value_level, level, level_codes) in enumerate( | ||
zip(subset.levels, self.levels, self.codes) | ||
zip(subset.levels, self.levels, self.codes, strict=False) | ||
): | ||
new_level = level.union(value_level, sort=False) | ||
value_codes = new_level.get_indexer_for(subset.get_level_values(i)) | ||
|
@@ -4123,7 +4126,7 @@ def insert(self, loc: int, item) -> MultiIndex: | |
|
||
new_levels = [] | ||
new_codes = [] | ||
for k, level, level_codes in zip(item, self.levels, self.codes): | ||
for k, level, level_codes in zip(item, self.levels, self.codes, strict=True): | ||
if k not in level: | ||
# have to insert into level | ||
# must insert at end otherwise you have to recompute all the | ||
|
@@ -4219,7 +4222,7 @@ def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: | |
|
||
|
||
def sparsify_labels(label_list, start: int = 0, sentinel: object = ""): | ||
pivoted = list(zip(*label_list)) | ||
pivoted = list(zip(*label_list, strict=True)) | ||
k = len(label_list) | ||
|
||
result = pivoted[: start + 1] | ||
|
@@ -4228,7 +4231,7 @@ def sparsify_labels(label_list, start: int = 0, sentinel: object = ""): | |
for cur in pivoted[start + 1 :]: | ||
sparse_cur = [] | ||
|
||
for i, (p, t) in enumerate(zip(prev, cur)): | ||
for i, (p, t) in enumerate(zip(prev, cur, strict=True)): | ||
if i == k - 1: | ||
sparse_cur.append(t) | ||
result.append(sparse_cur) # type: ignore[arg-type] | ||
|
@@ -4243,7 +4246,7 @@ def sparsify_labels(label_list, start: int = 0, sentinel: object = ""): | |
|
||
prev = cur | ||
|
||
return list(zip(*result)) | ||
return list(zip(*result, strict=True)) | ||
|
||
|
||
def _get_na_rep(dtype: DtypeObj) -> str: | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What fails when this is
True
?