diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a7027b9b26ebb..bae1334398fcb 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1092,6 +1092,7 @@ I/O - Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`) - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`) +- Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`) - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`) - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 16ec73ddeb743..dc03d9141a4e9 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -501,6 +501,13 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list: # GH35923 Fix pd.json_normalize to not skip the first element of a # generator input data = list(data) + for item in data: + if not isinstance(item, dict): + msg = ( + "All items in data must be of type dict, " + f"found {type(item).__name__}" + ) + raise TypeError(msg) else: raise NotImplementedError diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index b6212b514673f..f03fd235fef85 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -511,6 +511,17 @@ def test_max_level_with_records_path(self, max_level, expected): expected_df = DataFrame(data=expected, columns=result.columns.values) tm.assert_equal(expected_df, result) + def test_json_normalize_non_dict_items(self): + # gh-62829 + data_list = [np.nan, {"id": 12}, {"id": 13}] + msg = "All items in data must be of type dict, found float" + + with pytest.raises(TypeError, match=msg): + json_normalize(data_list, max_level=0) + + with pytest.raises(TypeError, match=msg): + json_normalize(data_list) + def test_nested_flattening_consistent(self): # see gh-21537 df1 = json_normalize([{"A": {"B": 1}}])