diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 04c3f80df669a..91e0bf8a7aa69 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -10,6 +10,7 @@ from pandas.core.series import Series from pandas.core.panel import Panel from pandas.util.decorators import cache_readonly, Appender +from pandas.util.compat import OrderedDict import pandas.core.algorithms as algos import pandas.core.common as com import pandas.lib as lib @@ -1525,7 +1526,7 @@ def aggregate(self, arg, *args, **kwargs): if isinstance(arg, basestring): return getattr(self, arg)(*args, **kwargs) - result = {} + result = OrderedDict() if isinstance(arg, dict): if self.axis != 0: # pragma: no cover raise ValueError('Can only pass dict with axis=0') @@ -1533,7 +1534,7 @@ def aggregate(self, arg, *args, **kwargs): obj = self._obj_with_exclusions if any(isinstance(x, (list, tuple, dict)) for x in arg.values()): - new_arg = {} + new_arg = OrderedDict() for k, v in arg.iteritems(): if not isinstance(v, (tuple, list, dict)): new_arg[k] = [v] diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 542e5ee964362..eade7d357dfad 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -793,7 +793,8 @@ def test_dict_entries(self): df = DataFrame({'A': [{'a':1, 'b':2}]}) val = df.to_string() - self.assertTrue("{'a': 1, 'b': 2}" in val) + self.assertTrue("'a': 1" in val) + self.assertTrue("'b': 2" in val) def test_to_latex(self): # it works! diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 8361bae1f890b..c948fc28ca333 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1859,6 +1859,7 @@ def test_agg_multiple_functions_too_many_lambdas(self): def test_more_flexible_frame_multi_function(self): from pandas import concat + from pandas.util.compat import OrderedDict grouped = self.df.groupby('A') @@ -1868,8 +1869,8 @@ def test_more_flexible_frame_multi_function(self): expected = concat([exmean, exstd], keys=['mean', 'std'], axis=1) expected = expected.swaplevel(0, 1, axis=1).sortlevel(0, axis=1) - result = grouped.aggregate({'C' : [np.mean, np.std], - 'D' : [np.mean, np.std]}) + d=OrderedDict([['C',[np.mean, np.std]],['D',[np.mean, np.std]]]) + result = grouped.aggregate(d) assert_frame_equal(result, expected) @@ -1884,10 +1885,12 @@ def test_more_flexible_frame_multi_function(self): def foo(x): return np.mean(x) def bar(x): return np.std(x, ddof=1) result = grouped.aggregate({'C' : np.mean, - 'D' : {'foo': np.mean, - 'bar': np.std}}) + 'D' : OrderedDict([['foo', np.mean], + ['bar', np.std]])}) + expected = grouped.aggregate({'C' : [np.mean], 'D' : [foo, bar]}) + assert_frame_equal(result, expected) def test_multi_function_flexible_mix(self): @@ -1896,7 +1899,7 @@ def test_multi_function_flexible_mix(self): grouped = self.df.groupby('A') result = grouped.aggregate({'C' : {'foo' : 'mean', - 'bar' : 'std'}, + 'bar' : 'std'}, 'D' : 'sum'}) result2 = grouped.aggregate({'C' : {'foo' : 'mean', 'bar' : 'std'}, diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 82b3a0fa7115e..bc1770d58b0bc 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -339,7 +339,10 @@ def get_period_alias(offset_str): _offset_map[_name] = offsets.WeekOfMonth(week=_iweek, weekday=_i) _rule_aliases[_name.replace('-', '@')] = _name -_legacy_reverse_map = dict((v, k) for k, v in _rule_aliases.iteritems()) +# Note that _rule_aliases is not 1:1 (d[BA]==d[A@DEC]), and so traversal +# order matters when constructing an inverse. we pick one. #2331 +_legacy_reverse_map = dict((v, k) for k, v in + reversed(sorted(_rule_aliases.iteritems()))) # for helping out with pretty-printing and name-lookups diff --git a/pandas/util/compat.py b/pandas/util/compat.py index 894f94d11a8b8..613f6a06d3f30 100644 --- a/pandas/util/compat.py +++ b/pandas/util/compat.py @@ -12,3 +12,269 @@ def product(*args, **kwds): result = [x + [y] for x in result for y in pool] for prod in result: yield tuple(prod) + + +# OrderedDict Shim from Raymond Hettinger, python core dev +# http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ +# here to support versions before 2.6 +import sys +try: + from thread import get_ident as _get_ident +except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class _OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the linked + # list, and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in self.__map.itervalues(): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does: for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. + If key is not found, d is returned if given, otherwise KeyError is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S + and values equal to v (which defaults to None). + + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return len(self)==len(other) and self.items() == other.items() + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) + +if sys.version_info[:2] < (2,7): + OrderedDict=_OrderedDict +else: + from collections import OrderedDict