Skip to content

Commit

Permalink
Backport PR #52426 on branch 2.0.x (API: Series/DataFrame from empty …
Browse files Browse the repository at this point in the history
…dict should have RangeIndex) (#52578)

Backport PR #52426: API: Series/DataFrame from empty dict should have RangeIndex

Co-authored-by: Terji Petersen <terji78@gmail.com>
  • Loading branch information
meeseeksmachine and topper-123 committed Apr 10, 2023
1 parent bedfb57 commit 865c0d6
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 42 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ Bug fixes

Other
~~~~~
-
- :class:`DataFrame` created from empty dicts had :attr:`~DataFrame.columns` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)
- :class:`Series` created from empty dicts had :attr:`~Series.index` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)

.. ---------------------------------------------------------------------------
.. _whatsnew_201.contributors:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ def dict_to_mgr(

else:
keys = list(data.keys())
columns = Index(keys)
columns = Index(keys) if keys else default_index(0)
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ def _init_dict(
values = []
keys = index
else:
keys, values = (), []
keys, values = default_index(0), []

# Input is now list-like, so rely on "standard" construction:
s = Series(values, index=keys, dtype=dtype)
Expand Down
24 changes: 15 additions & 9 deletions pandas/tests/frame/constructors/test_from_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
DataFrame,
Index,
MultiIndex,
RangeIndex,
Series,
)
import pandas._testing as tm
Expand Down Expand Up @@ -152,21 +153,26 @@ def test_from_dict_columns_parameter(self):
DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"])

@pytest.mark.parametrize(
"data_dict, keys, orient",
"data_dict, orient, expected",
[
({}, [], "index"),
([{("a",): 1}, {("a",): 2}], [("a",)], "columns"),
([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"),
([{("a", "b"): 1}], [("a", "b")], "columns"),
({}, "index", RangeIndex(0)),
(
[{("a",): 1}, {("a",): 2}],
"columns",
Index([("a",)], tupleize_cols=False),
),
(
[OrderedDict([(("a",), 1), (("b",), 2)])],
"columns",
Index([("a",), ("b",)], tupleize_cols=False),
),
([{("a", "b"): 1}], "columns", Index([("a", "b")], tupleize_cols=False)),
],
)
def test_constructor_from_dict_tuples(self, data_dict, keys, orient):
def test_constructor_from_dict_tuples(self, data_dict, orient, expected):
# GH#16769
df = DataFrame.from_dict(data_dict, orient)

result = df.columns
expected = Index(keys, dtype="object", tupleize_cols=False)

tm.assert_index_equal(result, expected)

def test_frame_dict_constructor_empty_series(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def test_empty_constructor(self, constructor):
],
)
def test_empty_constructor_object_index(self, constructor):
expected = DataFrame(columns=Index([]))
expected = DataFrame(index=RangeIndex(0), columns=RangeIndex(0))
result = constructor()
assert len(result.index) == 0
assert len(result.columns) == 0
Expand Down
12 changes: 4 additions & 8 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,7 @@ def test_roundtrip_empty(self, orient, convert_axes):
idx = pd.Index([], dtype=(float if convert_axes else object))
expected = DataFrame(index=idx, columns=idx)
elif orient in ["index", "columns"]:
# TODO: this condition is probably a bug
idx = pd.Index([], dtype=(float if convert_axes else object))
expected = DataFrame(columns=idx)
expected = DataFrame()
else:
expected = empty_frame.copy()

Expand Down Expand Up @@ -651,11 +649,9 @@ def test_series_roundtrip_empty(self, orient):
data = empty_series.to_json(orient=orient)
result = read_json(data, typ="series", orient=orient)

expected = empty_series
if orient in ("values", "records"):
expected = expected.reset_index(drop=True)
else:
expected.index = expected.index.astype(float)
expected = empty_series.reset_index(drop=True)
if orient in ("split"):
expected.index = expected.index.astype(np.float64)

tm.assert_series_equal(result, expected)

Expand Down
8 changes: 2 additions & 6 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,9 +1203,8 @@ def test_error_on_using_partition_cols_and_partition_on(

def test_empty_dataframe(self, fp):
# GH #27339
df = pd.DataFrame(index=[], columns=[])
df = pd.DataFrame()
expected = df.copy()
expected.index.name = "index"
check_round_trip(df, fp, expected=expected)

def test_timezone_aware_index(self, fp, timezone_aware_date_list):
Expand Down Expand Up @@ -1260,8 +1259,5 @@ def test_invalid_dtype_backend(self, engine):
def test_empty_columns(self, fp):
# GH 52034
df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
expected = pd.DataFrame(
columns=pd.Index([], dtype=object),
index=pd.Index(["a", "b", "c"], name="custom name"),
)
expected = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
check_round_trip(df, fp, expected=expected)
29 changes: 14 additions & 15 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,35 +93,34 @@ def test_unparseable_strings_with_dt64_dtype(self):
Series(np.array(vals, dtype=object), dtype="datetime64[ns]")

@pytest.mark.parametrize(
"constructor,check_index_type",
"constructor",
[
# NOTE: some overlap with test_constructor_empty but that test does not
# test for None or an empty generator.
# test_constructor_pass_none tests None but only with the index also
# passed.
(lambda idx: Series(index=idx), True),
(lambda idx: Series(None, index=idx), True),
(lambda idx: Series({}, index=idx), False), # creates an Index[object]
(lambda idx: Series((), index=idx), True),
(lambda idx: Series([], index=idx), True),
(lambda idx: Series((_ for _ in []), index=idx), True),
(lambda idx: Series(data=None, index=idx), True),
(lambda idx: Series(data={}, index=idx), False), # creates an Index[object]
(lambda idx: Series(data=(), index=idx), True),
(lambda idx: Series(data=[], index=idx), True),
(lambda idx: Series(data=(_ for _ in []), index=idx), True),
(lambda idx: Series(index=idx)),
(lambda idx: Series(None, index=idx)),
(lambda idx: Series({}, index=idx)),
(lambda idx: Series((), index=idx)),
(lambda idx: Series([], index=idx)),
(lambda idx: Series((_ for _ in []), index=idx)),
(lambda idx: Series(data=None, index=idx)),
(lambda idx: Series(data={}, index=idx)),
(lambda idx: Series(data=(), index=idx)),
(lambda idx: Series(data=[], index=idx)),
(lambda idx: Series(data=(_ for _ in []), index=idx)),
],
)
@pytest.mark.parametrize("empty_index", [None, []])
def test_empty_constructor(self, constructor, check_index_type, empty_index):
# TODO: share with frame test of the same name
def test_empty_constructor(self, constructor, empty_index):
# GH 49573 (addition of empty_index parameter)
expected = Series(index=empty_index)
result = constructor(empty_index)

assert result.dtype == object
assert len(result.index) == 0
tm.assert_series_equal(result, expected, check_index_type=check_index_type)
tm.assert_series_equal(result, expected, check_index_type=True)

def test_invalid_dtype(self):
# GH15520
Expand Down

0 comments on commit 865c0d6

Please sign in to comment.