Skip to content

Commit edd8a1f

Browse files
author
T. Koskamp
committed
BUG: Inconsistent behavior of Groupby with None values with filter (#62501)
- Add test cases - Add tuple support - Incorporate feedback
1 parent 197ac83 commit edd8a1f

File tree

2 files changed

+48
-7
lines changed

2 files changed

+48
-7
lines changed

pandas/core/groupby/groupby.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -680,21 +680,32 @@ def get_converter(s):
680680
)
681681
raise ValueError(msg) from err
682682

683-
converters = (get_converter(s) for s in index_sample)
683+
has_nan = any(isna(n) for n in name_sample)
684+
685+
sample = name_sample if has_nan else index_sample
686+
converters = (get_converter(s) for s in sample)
687+
684688
names = (
685689
tuple(f(n) for f, n in zip(converters, name, strict=True))
686690
for name in names
687691
)
688692

689-
elif any(isna(k) for k in self.indices.keys()):
690-
converters = [get_converter(name) for name in names]
691-
names = (converter(name) for converter, name in zip(converters, names))
692-
693+
indices = self.indices
694+
if not self.dropna and has_nan:
695+
indices = {}
696+
for k, v in self.indices.items():
697+
k = tuple(np.nan if isna(e) else e for e in k)
698+
indices[k] = v
693699
else:
694-
converter = get_converter(index_sample)
700+
has_nan = isna(name_sample)
701+
702+
convert_sample = name_sample if has_nan else index_sample
703+
converter = get_converter(convert_sample)
695704
names = (converter(name) for name in names)
696705

697-
indices = {np.nan if isna(k) else k: v for k, v in self.indices.items()}
706+
indices = self.indices
707+
if not self.dropna and has_nan:
708+
indices = {np.nan if isna(k) else k: v for k, v in indices.items()}
698709

699710
return [indices.get(name, []) for name in names]
700711

pandas/tests/groupby/test_filters.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,3 +606,33 @@ def test_filter_consistent_result_before_after_agg_func():
606606
grouper.sum()
607607
result = grouper.filter(lambda x: True)
608608
tm.assert_frame_equal(result, expected)
609+
610+
611+
def test_filter_with_non_values():
612+
# GH 62501
613+
df = DataFrame(
614+
[
615+
[1],
616+
[None],
617+
],
618+
columns=["a"],
619+
)
620+
621+
result = df.groupby("a", dropna=False).filter(lambda x: True)
622+
tm.assert_frame_equal(result, df)
623+
624+
625+
def test_filter_with_non_values_multi_index():
626+
# GH 62501
627+
df = DataFrame(
628+
[
629+
[1, 2],
630+
[3, None],
631+
[None, 4],
632+
[None, None],
633+
],
634+
columns=["a", "b"],
635+
)
636+
637+
result = df.groupby(["a", "b"], dropna=False).filter(lambda x: True)
638+
tm.assert_frame_equal(result, df)

0 commit comments

Comments
 (0)