In [1]:
# 1.1 将序列分解为单独的变量
p = (4, 5)
x, y = p

In [2]:
print(x, y)

4 5


In [3]:
# 1.2 解压可迭代对象赋值给多个变量
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
name, email, *phone_numbers = record
print(phone_numbers)

['773-555-1212', '847-555-1212']


In [4]:
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname, *fields, homedir, sh = line.split(':')
print(fields)

['*', '-2', '-2', 'Unprivileged User']


In [5]:
# 使用一个普通的废弃名称，比如 _ 或者 ign （ignore）,解压一些元素后丢弃它们
record = ('ACME', 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record
print(name, year)

ACME 2012


In [6]:
items = [1, 10, 7, 4, 5, 9]
head, *tail = items
print(tail)

[10, 7, 4, 5, 9]


In [8]:
# 1.3 保留最后 N 个元素
from collections import deque
q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
q.append(4)
print(q)

deque([2, 3, 4], maxlen=3)


In [9]:
q

deque([2, 3, 4])

In [10]:
# 1.4 查找最大/最小的 k 个元素
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))
print(heapq.nsmallest(3, nums))

[42, 37, 23]
[-4, 1, 2]


In [14]:
import pprint
portfolio = [
    {'name': 'IBM', 'shares': 100, 'price': 91.1},
    {'name': 'AAPL', 'shares': 50, 'price': 543.22},
    {'name': 'FB', 'shares': 200, 'price': 21.09},
    {'name': 'HPQ', 'shares': 35, 'price': 31.75},
    {'name': 'YHOO', 'shares': 45, 'price': 16.35},
    {'name': 'ACME', 'shares': 75, 'price': 115.65}
]
# heapq.nlargest(count, heapq(, key))
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
pprint.pprint(cheap)
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
pprint.pprint(expensive)

[{'name': 'YHOO', 'price': 16.35, 'shares': 45},
 {'name': 'FB', 'price': 21.09, 'shares': 200},
 {'name': 'HPQ', 'price': 31.75, 'shares': 35}]
[{'name': 'AAPL', 'price': 543.22, 'shares': 50},
 {'name': 'ACME', 'price': 115.65, 'shares': 75},
 {'name': 'IBM', 'price': 91.1, 'shares': 100}]


In [15]:
# 1.5 实现一个优先级队列
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0

    def push(self, item, priority):
        # 引入另外的 index 变量组成三元组 (priority, index, item)
        # 先比较优先级，再比较插入顺序
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]


class Item:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)

In [17]:
q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('foo'), 3)
q.push(Item('oo'), 5)
q.push(Item('foof'), 2)
q.pop()

Item('oo')

In [18]:
q.pop()

Item('foo')

In [19]:
q.pop()

Item('foof')

In [20]:
# 1.6 字典中的键映射多个值 multidict
from collections import defaultdict

d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
print(d)

defaultdict(<class 'list'>, {'a': [1, 2], 'b': [4]})


In [21]:
d = {} # 一个普通的字典
d.setdefault('a', []).append(1)
d.setdefault('a', []).append(2)
d.setdefault('b', []).append(4)
print(d)

{'a': [1, 2], 'b': [4]}


In [22]:
pairs = [('a',3),('a',4),('b',5)]
d = {}
for key, value in pairs:
    if key not in d:
        d[key] = []
    d[key].append(value)
d

{'a': [3, 4], 'b': [5]}

In [23]:
d = defaultdict(list)
for key, value in pairs:
    d[key].append(value)
d

defaultdict(list, {'a': [3, 4], 'b': [5]})

In [24]:
# 1.7 字典排序 collections.OrderedDict [PASS]

In [25]:
# 1.8 字典的运算 [最小值、最大值、排序]
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
min_price = min(zip(prices.values(), prices.keys()))
min_price

(10.75, 'FB')

In [26]:
max_price = max(zip(prices.values(), prices.keys()))
max_price

(612.78, 'AAPL')

In [27]:
prices_sorted = sorted(zip(prices.values(), prices.keys()))
prices_sorted

[(10.75, 'FB'),
 (37.2, 'HPQ'),
 (45.23, 'ACME'),
 (205.55, 'IBM'),
 (612.78, 'AAPL')]

In [28]:
min(prices, key=lambda k: prices[k])

'FB'

In [29]:
prices = { 'AAA' : 45.23, 'ZZZ': 45.23 }
prices_sorted = sorted(zip(prices.values(), prices.keys()))
prices_sorted

[(45.23, 'AAA'), (45.23, 'ZZZ')]

In [33]:
# 1.9 查找两字典的相同点
a = {'x' : 1, 'y' : 2, 'z' : 3}
b = {'w' : 10, 'x' : 11, 'y' : 2}
# keys(), items()
#     values() 方法不支持集合操作, 某种程度上是因为值视图不能保证所有的值互不相同，这样会导致某些集合操作会出现问题
print("key& ", a.keys() & b.keys()) # { 'x', 'y' }
print("key- ", a.keys() - b.keys()) # { 'z' }
print("item& ", a.items() & b.items()) # { ('y', 2) }

key&  {'x', 'y'}
key-  {'z'}
item&  {('y', 2)}


In [35]:
# 用于修改或者过滤字典元素
c = {key:a[key] for key in a.keys() - {'z', 'w'}}
c

{'x': 1, 'y': 2}

In [36]:
# 1.10 删除序列相同元素并保持顺序
# 怎样在一个序列上面保持元素顺序的同时消除重复的值?
# hashtable 类型
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)
a = [1, 5, 2, 1, 9, 1, 5, 10]
list(dedupe(a))

[1, 5, 2, 9, 10]

In [46]:
# 不可hash类型
def dedupe(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)  # key(item) ??
        if val not in seen:
            yield item
            seen.add(val)
a = [ {'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}]
list(dedupe(a, key=lambda d: (d['x'], d['y'])))

[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]

In [40]:
list(dedupe(a, key=lambda d: d['x']))

[{'x': 1, 'y': 2}, {'x': 2, 'y': 4}]

In [47]:
# 仅消除重复元素， 不考虑元素顺序
a = [1, 5, 2, 1, 9, 1, 5, 10]
set(a)

{1, 2, 5, 9, 10}

In [48]:
# 1.11 命名切片
######    0123456789012345678901234567890123456789012345678901234567890'
record = '....................100 .......513.25 ..........'
cost = int(record[20:23]) * float(record[31:37])
cost

51325.0

In [52]:
SHARES = slice(20, 23)  # 命名切片
PRICE = slice(31, 37)
# record[SHARES] == record[20:23]
cost = int(record[SHARES]) * float(record[PRICE])
cost

51325.0

In [53]:
# slice 对象，可以分别调用它的 a.start , a.stop , a.step 属性
SHARES.start, SHARES.stop, SHARES.step

(20, 23, 1)

In [55]:
# 1.12 序列中出现次数最多的元素
# collections.Counter
words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)
print("word_counts=", word_counts)
top_three = word_counts.most_common(3)
print(top_three)

word_counts= Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, "don't": 1, "you're": 1, 'under': 1})
[('eyes', 8), ('the', 5), ('look', 4)]


In [56]:
morewords = ['why','are','you','not','looking','in','my','eyes']
word_counts.update(morewords)
word_counts

Counter({'look': 4,
         'into': 3,
         'my': 4,
         'eyes': 9,
         'the': 5,
         'not': 2,
         'around': 2,
         "don't": 1,
         "you're": 1,
         'under': 1,
         'why': 1,
         'are': 1,
         'you': 1,
         'looking': 1,
         'in': 1})

In [58]:
a = Counter(words)
b = Counter(morewords)
print(a+b)
print(a-b)

Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})
Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1})


In [61]:
# 1.13 通过某个关键字排序一个字典列表
# operator.itemgetter
rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
from operator import itemgetter
# 使用 itemgetter() 方式会运行的稍微快点
rows_by_fname = sorted(rows, key=itemgetter('fname'))  # sorted(rows, key=lambda r: r['fname'])
rows_by_uid = sorted(rows, key=itemgetter('uid'))
print(rows_by_fname)
print()
print(rows_by_uid)
print(min(rows, key=itemgetter('uid')))

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]

[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}


In [62]:
# 1.14 排序不支持原生比较的对象
# 你想排序类型相同的对象，但是他们不支持原生的比较操作?
# operator.attrgetter() [PASS]

In [63]:
# 1.15 通过某个字段记录分组
# itertools.groupby()
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
from operator import itemgetter
from itertools import groupby

# Sort by the desired field first
rows.sort(key=itemgetter('date'))
# Iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print(' ', i)

07/01/2012
  {'address': '5412 N CLARK', 'date': '07/01/2012'}
  {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
  {'address': '5800 E 58TH', 'date': '07/02/2012'}
  {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
  {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
  {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
  {'address': '5148 N CLARK', 'date': '07/04/2012'}
  {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


In [64]:
# 1.16 过滤序列元素
# 列表推导
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
[n for n in mylist if n > 0]

[1, 4, 10, 2, 3]

In [65]:
# 使用列表推导的一个潜在缺陷就是如果输入非常大的时候会产生一个非常大的结果集，占用大量内存
# 迭代器
pos = (n for n in mylist if n > 0)
print(pos)
for x in pos:
    print(x)

<generator object <genexpr> at 0x1135bb740>
1
4
10
2
3


In [66]:
# 过滤规则比较复杂，不能简单的在列表推导或者生成器表达式中表达出来
# 用内建的 filter(filter_func, iter)
values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False
ivals = list(filter(is_int, values))
ivals

['1', '2', '-3', '4', '5']

In [68]:
# 1.17 从字典中提取子集
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
tech_names = { 'AAPL', 'IBM', 'HPQ', 'MSFT' }
p2 = {key: value for key, value in prices.items() if key in tech_names}
p2

{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}

In [70]:
p2_better = { key:prices[key] for key in prices.keys() & tech_names }
p2_better

{'HPQ': 37.2, 'AAPL': 612.78, 'IBM': 205.55}

In [73]:
# 1.18 映射名称到序列元素
from collections import namedtuple
Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')
print(sub.addr, sub.joined)
addr, joined = sub  # 元组类型是可交换的，支持所有的普通元组操作，比如索引和解压
print(addr, joined)

jonesy@example.com 2012-10-19
jonesy@example.com 2012-10-19


In [74]:
# 1.19 转换并同时计算数据
nums = [1, 2, 3, 4, 5]
s = sum(x * x for x in nums)
s

55

In [76]:
# Determine if any .py files exist in a directory
import os
files = os.listdir('.')
if any(name.endswith('.py') for name in files):
    print('There be python!')
else:
    print('Sorry, no python.')

There be python!


In [77]:
s = ('ACME', 50, 123.45)
print(','.join(str(x) for x in s))

ACME,50,123.45


In [79]:
portfolio = [
    {'name':'GOOG', 'shares': 50},
    {'name':'YHOO', 'shares': 75},
    {'name':'AOL', 'shares': 20},
    {'name':'SCOX', 'shares': 65}
]
min_shares = min(s['shares'] for s in portfolio)
min_shares

20

In [80]:
min_shares = min(portfolio, key=lambda s: s['shares'])
min_shares

{'name': 'AOL', 'shares': 20}

In [83]:
# 1.20 合并多个字典或映射
# collections.ChainMap 使用原来的字典，它自己不创建新的字典
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }
from collections import ChainMap
c = ChainMap(a,b)
print(c)
# 对于字典的更新或删除操作总是影响的是列表中第一个字典
c['z'] = 10
print(c)
del c['x']
print(c)
del c['y']  # exception 'Key not found in the first mapping: 'y'"

ChainMap({'x': 1, 'z': 3}, {'y': 2, 'z': 4})
ChainMap({'x': 1, 'z': 10}, {'y': 2, 'z': 4})
ChainMap({'z': 10}, {'y': 2, 'z': 4})


KeyError: "Key not found in the first mapping: 'y'"