From 784ad59440d8da7d122616ceb4c541018453d366 Mon Sep 17 00:00:00 2001
From: parthava-adabala <parthava.adabala@gmail.com>
Date: Sat, 25 Oct 2025 16:41:28 -0500
Subject: [PATCH 1/3] BUG: Handle non-dict items in json_normalize with
 max_level

---
 doc/source/whatsnew/v3.0.0.rst         |  1 +
 pandas/io/json/_normalize.py           |  5 ++++-
 pandas/tests/io/json/test_normalize.py | 13 +++++++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index a7027b9b26ebb..99c14f84ea4a2 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1107,6 +1107,7 @@ I/O
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
+- Bug in :func:`pandas.json_normalize` raising ``AttributeError`` when ``max_level`` was set and the input data contained ``NaN`` values (:issue:`62829`)
 - Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`)
 - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
 - Bug in :meth:`read_csv` for the ``c`` and ``python`` engines where parsing numbers with large exponents caused overflows. Now, numbers with large positive exponents are parsed as ``inf`` or ``-inf`` depending on the sign of the mantissa, while those with large negative exponents are parsed as ``0.0`` (:issue:`62617`, :issue:`38794`, :issue:`62740`)
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
index 16ec73ddeb743..4545865b1ec81 100644
--- a/pandas/io/json/_normalize.py
+++ b/pandas/io/json/_normalize.py
@@ -117,6 +117,9 @@ def nested_to_record(
         singleton = True
     new_ds = []
     for d in ds:
+        if not isinstance(d, dict):
+            new_ds.append({})
+            continue
         new_d = copy.deepcopy(d)
         for k, v in d.items():
             # each key gets renamed with prefix
@@ -517,7 +520,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
         return DataFrame(_simple_json_normalize(data, sep=sep), index=index)
 
     if record_path is None:
-        if any([isinstance(x, dict) for x in y.values()] for y in data):
+        if any(isinstance(y, dict) for y in data):
             # naive normalization, this is idempotent for flat records
             # and potentially will inflate the data considerably for
             # deeply nested structures:
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index b6212b514673f..cb73b943b1544 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -511,6 +511,19 @@ def test_max_level_with_records_path(self, max_level, expected):
         expected_df = DataFrame(data=expected, columns=result.columns.values)
         tm.assert_equal(expected_df, result)
 
+    def test_json_normalize_max_level_with_nan(self):
+        # GH 62829 - test for bug where max_level=0 fails with nan in input list
+        d = {
+            1: {"id": 10, "status": "AVAL"},
+            2: {"id": 30, "status": "AVAL", "items": {"id": 12, "size": 20}},
+            3: {"id": 50, "status": "AVAL", "items": {"id": 13, "size": 30}},
+        }
+        df = DataFrame.from_dict(d, orient="index")
+        data_list = df["items"].tolist()
+        expected = DataFrame({"id": [np.nan, 12.0, 13.0], "size": [np.nan, 20.0, 30.0]})
+        result = json_normalize(data_list, max_level=0)
+        tm.assert_frame_equal(result, expected)
+
     def test_nested_flattening_consistent(self):
         # see gh-21537
         df1 = json_normalize([{"A": {"B": 1}}])

From 6ebda259a10e1116f595e5ede8105d70b83c251e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 25 Oct 2025 21:57:04 +0000
Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 99c14f84ea4a2..5087fce369977 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1092,6 +1092,7 @@ I/O
 - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`)
 - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
   ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
+- Bug in :func:`pandas.json_normalize` raising ``AttributeError`` when ``max_level`` was set and the input data contained ``NaN`` values (:issue:`62829`)
 - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
 - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
 - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
@@ -1107,7 +1108,6 @@ I/O
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
-- Bug in :func:`pandas.json_normalize` raising ``AttributeError`` when ``max_level`` was set and the input data contained ``NaN`` values (:issue:`62829`)
 - Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`)
 - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
 - Bug in :meth:`read_csv` for the ``c`` and ``python`` engines where parsing numbers with large exponents caused overflows. Now, numbers with large positive exponents are parsed as ``inf`` or ``-inf`` depending on the sign of the mantissa, while those with large negative exponents are parsed as ``0.0`` (:issue:`62617`, :issue:`38794`, :issue:`62740`)

From 3ec415fccfafbd21cf6ba388db40ccdb6a4bc151 Mon Sep 17 00:00:00 2001
From: parthava-adabala <parthava.adabala@gmail.com>
Date: Sat, 25 Oct 2025 16:41:28 -0500
Subject: [PATCH 3/3] BUG: Raise TypeError for non-dict items in json_normalize

---
 doc/source/whatsnew/v3.0.0.rst         |  2 +-
 pandas/io/json/_normalize.py           | 12 ++++++++----
 pandas/tests/io/json/test_normalize.py | 22 ++++++++++------------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 5087fce369977..bae1334398fcb 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -1092,7 +1092,7 @@ I/O
 - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`)
 - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
   ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
-- Bug in :func:`pandas.json_normalize` raising ``AttributeError`` when ``max_level`` was set and the input data contained ``NaN`` values (:issue:`62829`)
+- Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`)
 - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
 - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
 - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
index 4545865b1ec81..dc03d9141a4e9 100644
--- a/pandas/io/json/_normalize.py
+++ b/pandas/io/json/_normalize.py
@@ -117,9 +117,6 @@ def nested_to_record(
         singleton = True
     new_ds = []
     for d in ds:
-        if not isinstance(d, dict):
-            new_ds.append({})
-            continue
         new_d = copy.deepcopy(d)
         for k, v in d.items():
             # each key gets renamed with prefix
@@ -504,6 +501,13 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
         # GH35923 Fix pd.json_normalize to not skip the first element of a
         # generator input
         data = list(data)
+        for item in data:
+            if not isinstance(item, dict):
+                msg = (
+                    "All items in data must be of type dict, "
+                    f"found {type(item).__name__}"
+                )
+                raise TypeError(msg)
     else:
         raise NotImplementedError
 
@@ -520,7 +524,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
         return DataFrame(_simple_json_normalize(data, sep=sep), index=index)
 
     if record_path is None:
-        if any(isinstance(y, dict) for y in data):
+        if any([isinstance(x, dict) for x in y.values()] for y in data):
             # naive normalization, this is idempotent for flat records
             # and potentially will inflate the data considerably for
             # deeply nested structures:
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index cb73b943b1544..f03fd235fef85 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -511,18 +511,16 @@ def test_max_level_with_records_path(self, max_level, expected):
         expected_df = DataFrame(data=expected, columns=result.columns.values)
         tm.assert_equal(expected_df, result)
 
-    def test_json_normalize_max_level_with_nan(self):
-        # GH 62829 - test for bug where max_level=0 fails with nan in input list
-        d = {
-            1: {"id": 10, "status": "AVAL"},
-            2: {"id": 30, "status": "AVAL", "items": {"id": 12, "size": 20}},
-            3: {"id": 50, "status": "AVAL", "items": {"id": 13, "size": 30}},
-        }
-        df = DataFrame.from_dict(d, orient="index")
-        data_list = df["items"].tolist()
-        expected = DataFrame({"id": [np.nan, 12.0, 13.0], "size": [np.nan, 20.0, 30.0]})
-        result = json_normalize(data_list, max_level=0)
-        tm.assert_frame_equal(result, expected)
+    def test_json_normalize_non_dict_items(self):
+        # gh-62829
+        data_list = [np.nan, {"id": 12}, {"id": 13}]
+        msg = "All items in data must be of type dict, found float"
+
+        with pytest.raises(TypeError, match=msg):
+            json_normalize(data_list, max_level=0)
+
+        with pytest.raises(TypeError, match=msg):
+            json_normalize(data_list)
 
     def test_nested_flattening_consistent(self):
         # see gh-21537