Skip to content

Commit

Permalink
BUG: from_dict ignored order of OrderedDict (pandas-dev#8425)
Browse files Browse the repository at this point in the history
  • Loading branch information
mazayo committed Jun 16, 2019
1 parent 2d2606d commit 1c3b0dc
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Expand Up @@ -633,6 +633,7 @@ Indexing
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
- Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`)
- Bug in which :meth:`DataFrame.from_dict` ignored order of OrderedDict when orient='index' (:issue:`8425`).


Missing
Expand Down
5 changes: 3 additions & 2 deletions pandas/_libs/lib.pyx
Expand Up @@ -240,7 +240,8 @@ def fast_unique_multiple(list arrays, sort: bool=True):

@cython.wraparound(False)
@cython.boundscheck(False)
def fast_unique_multiple_list(lists: list, sort: bool=True) -> list:
def fast_unique_multiple_list(lists: list, sort: bool=True,
ordered: bool=False) -> list:
cdef:
list buf
Py_ssize_t k = len(lists)
Expand All @@ -257,7 +258,7 @@ def fast_unique_multiple_list(lists: list, sort: bool=True) -> list:
if val not in table:
table[val] = stub
uniques.append(val)
if sort:
if sort and not ordered:
try:
uniques.sort()
except Exception:
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/indexes/api.py
Expand Up @@ -125,7 +125,7 @@ def _get_combined_index(indexes, intersect=False, sort=False):
return index


def _union_indexes(indexes, sort=True):
def _union_indexes(indexes, sort=True, ordered=False):
"""
Return the union of indexes.
Expand All @@ -136,6 +136,8 @@ def _union_indexes(indexes, sort=True):
indexes : list of Index or list objects
sort : bool, default True
Whether the result index should come out sorted or not.
ordered : bool, default False
Whether ordered indexes, such as keys of OrderedDict, is passed
Returns
-------
Expand Down Expand Up @@ -171,7 +173,8 @@ def conv(i):
return i

return Index(
lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort))
lib.fast_unique_multiple_list([conv(i) for i in inds],
sort=sort, ordered=ordered))

if kind == 'special':
result = indexes[0]
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/construction.py
Expand Up @@ -285,13 +285,16 @@ def extract_index(data):
have_raw_arrays = False
have_series = False
have_dicts = False
have_ordered = False

for val in data:
if isinstance(val, ABCSeries):
have_series = True
indexes.append(val.index)
elif isinstance(val, dict):
have_dicts = True
if isinstance(val, OrderedDict):
have_ordered = True
indexes.append(list(val.keys()))
elif is_list_like(val) and getattr(val, 'ndim', 1) == 1:
have_raw_arrays = True
Expand All @@ -302,7 +305,7 @@ def extract_index(data):
' an index')

if have_series or have_dicts:
index = _union_indexes(indexes)
index = _union_indexes(indexes, ordered=have_ordered)

if have_raw_arrays:
lengths = list(set(raw_lengths))
Expand Down
14 changes: 12 additions & 2 deletions pandas/tests/frame/test_constructors.py
Expand Up @@ -1153,7 +1153,7 @@ def test_constructor_list_of_series(self):

sdict = OrderedDict(zip(['x', 'Unnamed 0'], data))
expected = DataFrame.from_dict(sdict, orient='index')
tm.assert_frame_equal(result.sort_index(), expected)
tm.assert_frame_equal(result, expected)

# none named
data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]),
Expand Down Expand Up @@ -1288,7 +1288,7 @@ def test_constructor_list_of_namedtuples(self):
def test_constructor_orient(self):
data_dict = self.mixed_frame.T._series
recons = DataFrame.from_dict(data_dict, orient='index')
expected = self.mixed_frame.sort_index()
expected = self.mixed_frame
tm.assert_frame_equal(recons, expected)

# dict of sequence
Expand All @@ -1298,6 +1298,16 @@ def test_constructor_orient(self):
xp = DataFrame.from_dict(a).T.reindex(list(a.keys()))
tm.assert_frame_equal(rs, xp)

def test_constructor_from_ordered_dict(self):
# GH8425
a = OrderedDict([
('one', OrderedDict([('col_a', 'foo1'), ('col_b', 'bar1')])),
('two', OrderedDict([('col_a', 'foo2'), ('col_b', 'bar2')])),
('three', OrderedDict([('col_a', 'foo3'), ('col_b', 'bar3')]))])
expected = DataFrame.from_dict(a, orient='columns').T
result = DataFrame.from_dict(a, orient='index')
tm.assert_frame_equal(result, expected)

def test_from_dict_columns_parameter(self):
# GH 18529
# Test new columns parameter for from_dict that was added to make
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/test_lib.py
Expand Up @@ -203,3 +203,20 @@ def test_get_reverse_indexer(self):
def test_cache_readonly_preserve_docstrings():
# GH18197
assert Index.hasnans.__doc__ is not None


def test_fast_unique_multiple_list_with_sort():
keys = [['p', 'a'], ['n', 'd'], ['a', 's']]

expected = np.array(['a', 'd', 'n', 'p', 's'])
result = lib.fast_unique_multiple_list(keys, sort=True)
tm.assert_numpy_array_equal(np.array(result), expected)


def test_fast_unique_multiple_list_with_ordered():
# GH8425
keys = [['p', 'a'], ['n', 'd'], ['a', 's']]

expected = np.array(['p', 'a', 'n', 'd', 's'])
result = lib.fast_unique_multiple_list(keys, sort=True, ordered=True)
tm.assert_numpy_array_equal(np.array(result), expected)

0 comments on commit 1c3b0dc

Please sign in to comment.