diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 01650940c4692..1795eddb1b587 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1198,6 +1198,7 @@ Reshaping - Bug in :meth:`DataFrame.combine_first` with non-unique columns incorrectly raising (:issue:`29135`) - Bug in :meth:`DataFrame.combine` with non-unique columns incorrectly raising (:issue:`51340`) - Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`) +- Bug in :meth:`DataFrame.from_dict` where rows corresponding to an empty :class:`Series` or ``dict`` would be dropped when ``orient='index'`` (:issue:`62775`) - Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Bug in :meth:`DataFrame.join` when a :class:`DataFrame` with a :class:`MultiIndex` would raise an ``AssertionError`` when :attr:`MultiIndex.names` contained ``None``. (:issue:`58721`) - Bug in :meth:`DataFrame.merge` where merging on a column containing only ``NaN`` values resulted in an out-of-bounds array access (:issue:`59421`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 90d73ca067260..b8c8c2903989b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1931,7 +1931,11 @@ def from_dict( if len(data) > 0: # TODO speed up Series case if isinstance(next(iter(data.values())), (Series, dict)): + original_keys = list(data.keys()) data = _from_nested_dict(data) + if not data and columns is None: + columns = [] + index = original_keys else: index = list(data.keys()) # error: Incompatible types in assignment (expression has type @@ -14423,13 +14427,26 @@ def values(self) -> np.ndarray: def _from_nested_dict( data: Mapping[HashableT, Mapping[HashableT2, T]], -) -> collections.defaultdict[HashableT2, dict[HashableT, T]]: - new_data: collections.defaultdict[HashableT2, dict[HashableT, T]] = ( +) -> collections.defaultdict[HashableT2, dict[HashableT, Any]]: + new_data: collections.defaultdict[HashableT2, dict[HashableT, Any]] = ( collections.defaultdict(dict) ) + all_cols_dict = {} + for s in data.values(): + if isinstance(s, (dict, ABCSeries)): + all_cols_dict.update(dict.fromkeys(s.keys())) + all_cols_list = list(all_cols_dict.keys()) + if not all_cols_list: + return new_data for index, s in data.items(): - for col, v in s.items(): - new_data[col][index] = v + if isinstance(s, (dict, ABCSeries)): + for col in all_cols_list: + new_data[col][index] = s.get(col, None) + elif s is None or is_scalar(s): + for col in all_cols_list: + new_data[col][index] = s + else: + raise TypeError(f"Value at index {index} is not a dict/Series/scalar/None") return new_data diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 264011edb65b5..067c79755705d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2649,6 +2649,48 @@ def test_error_from_2darray(self, col_a, col_b): with pytest.raises(ValueError, match=msg): DataFrame({"a": col_a, "b": col_b}) + @pytest.mark.parametrize( + "data, expected", + [ + ( + { + "good": Series({"a": 1, "b": 2}), + "blank": Series(dtype="float64"), + }, + DataFrame( + {"a": [1.0, np.nan], "b": [2.0, np.nan]}, index=["good", "blank"] + ), + ), + ( + { + "blank": Series(dtype="float64"), + "good": Series({"a": 1, "b": 2}), + }, + DataFrame( + {"a": [np.nan, 1.0], "b": [np.nan, 2.0]}, index=["blank", "good"] + ), + ), + ( + {"blank": Series(dtype="float64")}, + DataFrame(index=["blank"], columns=[]), + ), + ( + { + "good": Series({"a": 1, "b": 2}), + "blank_dict": {}, + }, + DataFrame( + {"a": [1.0, np.nan], "b": [2.0, np.nan]}, + index=["good", "blank_dict"], + ), + ), + ], + ) + def test_from_dict_orient_index_empty_series_or_dict(self, data, expected): + # GH-62775 + result = DataFrame.from_dict(data, orient="index") + tm.assert_frame_equal(result, expected) + def test_from_dict_with_missing_copy_false(self): # GH#45369 filled columns should not be views of one another df = DataFrame(index=[1, 2, 3], columns=["a", "b", "c"], copy=False)