-
Notifications
You must be signed in to change notification settings - Fork 3
/
collection.py
129 lines (106 loc) · 3.88 KB
/
collection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
""" Tools for working with collections """
import itertools
import sys
from warnings import warn
if sys.version_info < (3, 3):
from collections import Iterable
else:
from collections.abc import Iterable
__author__ = "Vince Reuter"
__email__ = "vreuter@virginia.edu"
__all__ = ["is_collection_like", "powerset", "asciify_dict", "merge_dicts"]
def merge_dicts(x, y):
"""
Merge dictionaries
:param Mapping x: dict to merge
:param Mapping y: dict to merge
:return Mapping: merged dict
"""
z = x.copy()
z.update(y)
return z
def is_collection_like(c):
"""
Determine whether an object is collection-like.
:param object c: Object to test as collection
:return bool: Whether the argument is a (non-string) collection
"""
return isinstance(c, Iterable) and not isinstance(c, str)
def uniqify(seq): # Dave Kirby
"""
Return only unique items in a sequence, preserving order
:param list seq: List of items to uniqify
:return list[object]: Original list with duplicates removed
"""
# Order preserving
seen = set()
return [x for x in seq if x not in seen and not seen.add(x)]
def powerset(items, min_items=None, include_full_pop=True, nonempty=False):
"""
Build the powerset of a collection of items.
:param Iterable[object] items: "Pool" of all items, the population for
which to build the power set.
:param int min_items: Minimum number of individuals from the population
to allow in any given subset.
:param bool include_full_pop: Whether to include the full population in
the powerset (default True to accord with genuine definition)
:param bool nonempty: force each subset returned to be nonempty
:return list[object]: Sequence of subsets of the population, in
nondecreasing size order
:raise TypeError: if minimum item count is specified but is not an integer
:raise ValueError: if minimum item count is insufficient to guarantee
nonempty subsets
"""
if min_items is None:
min_items = 1 if nonempty else 0
else:
if not isinstance(min_items, int):
raise TypeError(
"Min items count for each subset isn't an integer: "
"{} ({})".format(min_items, type(min_items))
)
if nonempty and min_items < 1:
raise ValueError(
"When minimum item count is {}, nonempty subsets "
"cannot be guaranteed.".format(min_items)
)
# Account for iterable burn possibility; besides, collection should be
# relatively small if building the powerset.
items = list(items)
n = len(items)
if n == 0 or n < min_items:
return []
max_items = len(items) + 1 if include_full_pop else len(items)
return list(
itertools.chain.from_iterable(
itertools.combinations(items, k) for k in range(min_items, max_items)
)
)
def asciify_dict(data):
""" https://gist.github.com/chris-hailstorm/4989643 """
def _asciify_list(lst):
ret = []
for item in lst:
if isinstance(item, unicode):
item = item.encode("utf-8")
elif isinstance(item, list):
item = _asciify_list(item)
elif isinstance(item, dict):
item = asciify_dict(item)
ret.append(item)
return ret
if sys.version_info[0] >= 3:
warn("This operation is supported only in Python2", UserWarning)
return data
ret = {}
for key, value in data.iteritems():
if isinstance(key, unicode):
key = key.encode("utf-8")
if isinstance(value, unicode):
value = value.encode("utf-8")
elif isinstance(value, list):
value = _asciify_list(value)
elif isinstance(value, dict):
value = asciify_dict(value)
ret[key] = value
return ret