From b5b447e982c43a97d3a667acf42237e08f2b5707 Mon Sep 17 00:00:00 2001
From: "T. Koskamp" <tristan@tristans-mini.home>
Date: Sun, 23 Nov 2025 12:52:15 +0100
Subject: [PATCH 1/4] BUG: Inconsistent behavior of Groupby with None values
 with filter (#62501)

---
 doc/source/whatsnew/v2.3.4.rst |  1 +
 pandas/core/groupby/groupby.py | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.3.4.rst b/doc/source/whatsnew/v2.3.4.rst
index 6e729c4bf2e2a..897cbacb03170 100644
--- a/doc/source/whatsnew/v2.3.4.rst
+++ b/doc/source/whatsnew/v2.3.4.rst
@@ -14,6 +14,7 @@ Bug fixes
 ^^^^^^^^^
 - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
 - Bug in :meth:`Series.str.replace` raising an error on valid group references (``\1``, ``\2``, etc.) on series converted to PyArrow backend dtype (:issue:`62653`)
+- Bug in :meth:`~DataFrame.groupby` with ``None`` values with filter (:issue:`62501`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_234.contributors:
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 2c8ec599a19ef..62bceace6acbe 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -650,6 +650,8 @@ def get_converter(s):
                 return lambda key: Timestamp(key)
             elif isinstance(s, np.datetime64):
                 return lambda key: Timestamp(key).asm8
+            elif isna(s):
+                return lambda key: np.nan
             else:
                 return lambda key: key
 
@@ -684,11 +686,17 @@ def get_converter(s):
                 for name in names
             )
 
+        elif any(isna(k) for k in self.indices.keys()):
+            converters = [get_converter(name) for name in names]
+            names = (converter(name) for converter, name in zip(converters, names))
+
         else:
             converter = get_converter(index_sample)
             names = (converter(name) for name in names)
 
-        return [self.indices.get(name, []) for name in names]
+        indices = {np.nan if isna(k) else k: v for k, v in self.indices.items()}
+
+        return [indices.get(name, []) for name in names]
 
     @final
     def _get_index(self, name):

From d2046e9fc94d35aef1e8df4d9bfdbd924e955499 Mon Sep 17 00:00:00 2001
From: "T. Koskamp" <tristan@tristans-mini.home>
Date: Tue, 25 Nov 2025 20:51:26 +0100
Subject: [PATCH 2/4] BUG: Inconsistent behavior of Groupby with None values
 with filter (#62501)

- Add test cases
- Add tuple support
- Incorporate feedback
---
 pandas/core/groupby/groupby.py       | 25 ++++++++++++++++-------
 pandas/tests/groupby/test_filters.py | 30 ++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 62bceace6acbe..d71342876280d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -680,21 +680,32 @@ def get_converter(s):
                     )
                     raise ValueError(msg) from err
 
-            converters = (get_converter(s) for s in index_sample)
+            has_nan = any(isna(n) for n in name_sample)
+
+            sample = name_sample if has_nan else index_sample
+            converters = (get_converter(s) for s in sample)
+
             names = (
                 tuple(f(n) for f, n in zip(converters, name, strict=True))
                 for name in names
             )
 
-        elif any(isna(k) for k in self.indices.keys()):
-            converters = [get_converter(name) for name in names]
-            names = (converter(name) for converter, name in zip(converters, names))
-
+            indices = self.indices
+            if not self.dropna and has_nan:
+                indices = {}
+                for k, v in self.indices.items():
+                    k = tuple(np.nan if isna(e) else e for e in k)
+                    indices[k] = v
         else:
-            converter = get_converter(index_sample)
+            has_nan = isna(name_sample)
+
+            convert_sample = name_sample if has_nan else index_sample
+            converter = get_converter(convert_sample)
             names = (converter(name) for name in names)
 
-        indices = {np.nan if isna(k) else k: v for k, v in self.indices.items()}
+            indices = self.indices
+            if not self.dropna and has_nan:
+                indices = {np.nan if isna(k) else k: v for k, v in indices.items()}
 
         return [indices.get(name, []) for name in names]
 
diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py
index 4fe3aac629513..c20fc9e3d62e7 100644
--- a/pandas/tests/groupby/test_filters.py
+++ b/pandas/tests/groupby/test_filters.py
@@ -606,3 +606,33 @@ def test_filter_consistent_result_before_after_agg_func():
     grouper.sum()
     result = grouper.filter(lambda x: True)
     tm.assert_frame_equal(result, expected)
+
+
+def test_filter_with_non_values():
+    # GH 62501
+    df = DataFrame(
+        [
+            [1],
+            [None],
+        ],
+        columns=["a"],
+    )
+
+    result = df.groupby("a", dropna=False).filter(lambda x: True)
+    tm.assert_frame_equal(result, df)
+
+
+def test_filter_with_non_values_multi_index():
+    # GH 62501
+    df = DataFrame(
+        [
+            [1, 2],
+            [3, None],
+            [None, 4],
+            [None, None],
+        ],
+        columns=["a", "b"],
+    )
+
+    result = df.groupby(["a", "b"], dropna=False).filter(lambda x: True)
+    tm.assert_frame_equal(result, df)

From 74057eb2efeae34e1cd2b59a717a9d1ae668ce95 Mon Sep 17 00:00:00 2001
From: "T. Koskamp" <tristan@tristans-mini.home>
Date: Wed, 26 Nov 2025 23:01:49 +0100
Subject: [PATCH 3/4] Update indices property from groupby

---
 pandas/core/groupby/groupby.py | 50 ++++++++++------------------------
 pandas/core/groupby/ops.py     | 21 ++++++++++++--
 2 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d71342876280d..5dd2266d0bb22 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -637,7 +637,7 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
         return self._grouper.indices
 
     @final
-    def _get_indices(self, names):
+    def _get_indices(self, name):
         """
         Safe get multiple indices, translate keys for
         datelike to underlying repr.
@@ -650,28 +650,27 @@ def get_converter(s):
                 return lambda key: Timestamp(key)
             elif isinstance(s, np.datetime64):
                 return lambda key: Timestamp(key).asm8
-            elif isna(s):
-                return lambda key: np.nan
             else:
                 return lambda key: key
 
-        if len(names) == 0:
-            return []
+        if isna(name):
+            return self.indices.get(np.nan, [])
+        if isinstance(name, tuple):
+            name = tuple(np.nan if isna(comp) else comp for comp in name)
 
         if len(self.indices) > 0:
             index_sample = next(iter(self.indices))
         else:
             index_sample = None  # Dummy sample
 
-        name_sample = names[0]
         if isinstance(index_sample, tuple):
-            if not isinstance(name_sample, tuple):
+            if not isinstance(name, tuple):
                 msg = "must supply a tuple to get_group with multiple grouping keys"
                 raise ValueError(msg)
-            if not len(name_sample) == len(index_sample):
+            if not len(name) == len(index_sample):
                 try:
                     # If the original grouper was a tuple
-                    return [self.indices[name] for name in names]
+                    return self.indices[name]
                 except KeyError as err:
                     # turns out it wasn't a tuple
                     msg = (
@@ -680,41 +679,20 @@ def get_converter(s):
                     )
                     raise ValueError(msg) from err
 
-            has_nan = any(isna(n) for n in name_sample)
-
-            sample = name_sample if has_nan else index_sample
-            converters = (get_converter(s) for s in sample)
-
-            names = (
-                tuple(f(n) for f, n in zip(converters, name, strict=True))
-                for name in names
-            )
-
-            indices = self.indices
-            if not self.dropna and has_nan:
-                indices = {}
-                for k, v in self.indices.items():
-                    k = tuple(np.nan if isna(e) else e for e in k)
-                    indices[k] = v
+            converters = (get_converter(s) for s in index_sample)
+            name = tuple(f(n) for f, n in zip(converters, name, strict=True))
         else:
-            has_nan = isna(name_sample)
-
-            convert_sample = name_sample if has_nan else index_sample
-            converter = get_converter(convert_sample)
-            names = (converter(name) for name in names)
-
-            indices = self.indices
-            if not self.dropna and has_nan:
-                indices = {np.nan if isna(k) else k: v for k, v in indices.items()}
+            converter = get_converter(index_sample)
+            name = converter(name)
 
-        return [indices.get(name, []) for name in names]
+        return self.indices.get(name, [])
 
     @final
     def _get_index(self, name):
         """
         Safe get index, translate keys for datelike to underlying repr.
         """
-        return self._get_indices([name])[0]
+        return self._get_indices(name)
 
     @final
     @cache_readonly
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index d86264cb95dc5..2591426906655 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -652,9 +652,24 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
         """dict {group name -> group indices}"""
         if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex):
             # This shows unused categories in indices GH#38642
-            return self.groupings[0].indices
-        codes_list = [ping.codes for ping in self.groupings]
-        return get_indexer_dict(codes_list, self.levels)
+            result = self.groupings[0].indices
+        else:
+            codes_list = [ping.codes for ping in self.groupings]
+            result = get_indexer_dict(codes_list, self.levels)
+        if not self.dropna:
+            has_mi = isinstance(self.result_index, MultiIndex)
+            if not has_mi and self.result_index.hasnans:
+                result = {
+                    np.nan if isna(key) else key: value for key, value in result.items()
+                }
+            elif has_mi:
+                # MultiIndex has no efficient way to tell if there are NAs
+                result = {
+                    tuple(np.nan if isna(comp) else comp for comp in key): value
+                    for key, value in result.items()
+                }
+
+        return result
 
     @final
     @cache_readonly

From f7c5e23876e6f1fea7d4277195893d1b790f2e0a Mon Sep 17 00:00:00 2001
From: "T. Koskamp" <tristan@tristans-mini.home>
Date: Sat, 29 Nov 2025 17:00:27 +0100
Subject: [PATCH 4/4] Incorporate review suggestion for issue #63178

 BUG: Inconsistent behavior of Groupby with None values with filter
---
 pandas/core/groupby/groupby.py | 9 +--------
 pandas/core/groupby/ops.py     | 3 ++-
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 5dd2266d0bb22..4b8b7717ad7ee 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -637,7 +637,7 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
         return self._grouper.indices
 
     @final
-    def _get_indices(self, name):
+    def _get_index(self, name):
         """
         Safe get multiple indices, translate keys for
         datelike to underlying repr.
@@ -687,13 +687,6 @@ def get_converter(s):
 
         return self.indices.get(name, [])
 
-    @final
-    def _get_index(self, name):
-        """
-        Safe get index, translate keys for datelike to underlying repr.
-        """
-        return self._get_indices(name)
-
     @final
     @cache_readonly
     def _selected_obj(self):
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 2591426906655..f6600f39bbc57 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -665,7 +665,8 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
             elif has_mi:
                 # MultiIndex has no efficient way to tell if there are NAs
                 result = {
-                    tuple(np.nan if isna(comp) else comp for comp in key): value
+                    # error: "Hashable" has no attribute "__iter__" (not iterable)
+                    tuple(np.nan if isna(comp) else comp for comp in key): value  # type: ignore[attr-defined]
                     for key, value in result.items()
                 }