### 1.2 解压可迭代对象赋值给多个变量

In [1]:
def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)

In [2]:
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')

In [3]:
name, email, *phone_numbers = record

In [5]:
print(name,email,phone_numbers)

Dave dave@example.com ['773-555-1212', '847-555-1212']


## phone_numbers 的类型为list，这一点需要注意 

In [6]:
records = [
    ('foo',1,2),
    ('bar','hello'),
    ('foo',3,4)
]

In [7]:
def do_foo(x,y):
    print('foo', x, y)

In [8]:
def do_bar(s):
    print("bar", s)

In [9]:
for tag, *args in records:
    if tag == 'foo':
        do_foo(*args)
    elif tag == 'bar':
        do_bar(*args)

foo 1 2
bar hello
foo 3 4


In [10]:
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'

In [11]:
uname, *fields, home_dir, sh = line.split(':')
print(uname, home_dir, sh)

nobody /var/empty /usr/bin/false


In [12]:
items = [1,2,3,4,5,6]

In [13]:
head, *tail = items

In [14]:
tail

[2, 3, 4, 5, 6]

In [15]:
tail = [1,2,3]

In [16]:
items

[1, 2, 3, 4, 5, 6]

In [17]:
new_items = items * 2

In [19]:
print(new_items)

[1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]


In [20]:
def sum(items):
    head, *tail = items
    return head + sum(tail) if tail else head

In [21]:
sum([1,2,3,4,5,6])

21

##  1.3 保留最后 N 个元素

In [22]:
from collections import deque

In [24]:
def search(lines, pattern, history=5):
    prev_lines = deque(maxlen=history)
    for line in lines:
        if pattern in line:
            yield line, prev_lines
        prev_lines.append(line)

### 在写查询元素的代码时，可以使用包含yield表达式的生成器函数，从而使得搜索过程的代码和使用搜索结果的代码 实现解耦。

## 1.4 查找最大或最小的 N 个元素 

In [25]:
import heapq

In [27]:
help(heapq)

Help on module heapq:

NAME
    heapq - Heap queue algorithm (a.k.a. priority queue).

DESCRIPTION
    Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
    all k, counting elements from 0.  For the sake of comparison,
    non-existing elements are considered to be infinite.  The interesting
    property of a heap is that a[0] is always its smallest element.
    
    Usage:
    
    heap = []            # creates an empty heap
    heappush(heap, item) # pushes a new item on the heap
    item = heappop(heap) # pops the smallest item from the heap
    item = heap[0]       # smallest item on the heap without popping it
    heapify(x)           # transforms list into a heap, in-place, in linear time
    item = heapreplace(heap, item) # pops and returns smallest item, and adds
                                   # new item; the heap size is unchanged
    
    Our API differs from textbook heap algorithms as follows:
    
    - We use 0-based indexing.  This makes the relat

In [28]:
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]

In [29]:
print(heapq.nlargest(n=3,iterable=nums))

[42, 37, 23]


In [30]:
print(heapq.nsmallest(n=3,iterable=nums))

[-4, 1, 2]


In [31]:
portfolio = [
    {'name': 'IBM', 'shares': 100, 'price': 91.1},
    {'name': 'AAPL', 'shares': 50, 'price': 543.22},
    {'name': 'FB', 'shares': 200, 'price': 21.09},
    {'name': 'HPQ', 'shares': 35, 'price': 31.75},
    {'name': 'YHOO', 'shares': 45, 'price': 16.35},
    {'name': 'ACME', 'shares': 75, 'price': 115.65}
]

In [33]:
print(heapq.nsmallest(3, portfolio, key=lambda s: s['price']))
print(heapq.nlargest(3, portfolio, key=lambda s: s['price']))

[{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
[{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]


## 1.5  实现一个优先级队列

In [23]:
import heapq

In [26]:
## make a PriorityClass
class PriorityQueue():
    def __init__(self):
        self._index = 0
        self._queue = []
        
    def push(self, item, priority):
        heapq.heappush(self._queue,(-priority, self._index, item))
        self._index += 1
    
    def pop(self):
        return heapq.heappop(self._queue)[-1]
    

In [27]:
pq.push('sandy', 2)
pq.push('andy',1)
pq.push('frank',5)
pq.push('addison',2)
print(pq.pop())
print(pq.pop())
print(pq.pop())
print(pq.pop())


frank
sandy
addison
andy


## 1.6 字典中的键映射多个值

In [28]:
from collections import defaultdict

In [30]:
d = defaultdict(list) ## 自动使用list来作为所有key的value

In [31]:
d['a'].append(1)

In [32]:
d['a'].append(2)

In [33]:
d['b'].append(4)

In [35]:
# d = defaultdict(list)
# for key, value in pairs:
#     d['key'].append(value)

## 1.7 字典排序

In [36]:
from collections import OrderedDict

In [37]:
d = OrderedDict()

In [38]:
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4

for key in d:
    print(key, d[key])

foo 1
bar 2
spam 3
grok 4


In [39]:
import json

In [41]:
json.dumps(d) ## 用于严格控制顺序的序列化，这会变得很方便

'{"foo": 1, "bar": 2, "spam": 3, "grok": 4}'

### OrderedDict 里面维护着一个链表，用来记录key-value插入的顺序，因此OrderedDict相比于普通Dict，占用2倍空间。。

## 1.8 字典的运算

In [42]:
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}

### 对字典进行计算操作，通常是先要将key和value反过来，这样sorted之类的函数就可以通过迭代获取到value

In [47]:
for item in zip(prices.values(), prices.keys()):
    print(item)

(45.23, 'ACME')
(612.78, 'AAPL')
(205.55, 'IBM')
(37.2, 'HPQ')
(10.75, 'FB')


In [49]:
min_price = min(zip(prices.values(), prices.keys()))
max_price = max(zip(prices.values(), prices.keys()))

print(min_price)
print(max_price)

(10.75, 'FB')
(612.78, 'AAPL')


In [51]:
prices_sorted = sorted(zip(prices.values(), prices.keys()))
print(type(prices_sorted))
print(prices_sorted)

<class 'list'>
[(10.75, 'FB'), (37.2, 'HPQ'), (45.23, 'ACME'), (205.55, 'IBM'), (612.78, 'AAPL')]


### 需要注意的是，zip创建的是不走回头路，也就是只能访问一次的迭代器。下面的使用方法是错误的：
prices_and_names = zip(prices.values(), prices.keys())
print(min(prices_and_names)) # OK
print(max(prices_and_names)) # ValueError: max() arg is an empty sequence

In [55]:
def func(key):
    return prices[key]

print(sorted(prices,key=func)) ## 只显示key，这不是我们想要的。

['FB', 'HPQ', 'ACME', 'IBM', 'AAPL']


In [57]:
print(type(prices.keys()))

<class 'dict_keys'>


## 1.9 查找两字典的相同点

### 字典的values()方法、keys()方法、items()方法都支持集合的交并差操作。因此可以很方便的求两个字典的交并差。

In [58]:
## 构造一个新的字典
a = {
    'x' : 1,
    'y' : 2,
    'z' : 3
}

b = {
    'w' : 10,
    'x' : 11,
    'y' : 2
}

In [59]:
a.keys() & b.keys()

{'x', 'y'}

In [62]:
c = {key:a[key] for key in a.keys() - b.keys()}

In [63]:
print(c)

{'z': 3}


## 1.10 删除序列相同元素并保持顺序

In [64]:
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)

In [67]:
for item in dedupe([1,1,2,3,2,12,3,4,3,2,1,2,44,5,5,4,2,2,4,5,56,677,78,8,6,5,54,6,7,]):
    print(item)
    
## 但是这个item是hashable的话，上面的方法是ok的，如果不是hashable，就会有点问题，需要对item做一些处理    

1
2
3
12
4
44
5
56
677
78
8
6
54
7


### 但是这个item是hashable的话，上面的方法是ok的，如果不是hashable，就会有点问题，需要对item做一些处理

## 1.11 命名切片

In [69]:
## 使用slice来生成某个具体范围的切片索引
string = 'hello world'
first_part = slice(0,5)
second_part = slice(6,len(string))
print(string[first_part],'+',string[second_part])

hello + world


In [70]:
## slice创建一个切片对象，所有使用切片的地方都可以使用切片对象

In [71]:
a = slice(5,50,2)

In [72]:
a.step

2

In [73]:
a.start

5

In [74]:
a.stop

50

In [75]:
## slice.indeces(size) 可以自动进行自适应
s = 'HelloWorld'
b = slice(3,400,3)
b.indices(len(s))

(3, 10, 3)

In [79]:
range_arg = b.indices(len(s))
for ch in range(*range_arg):
    print(s[ch])

l
o
d


## 1.12 序列中出现次数最多的元素

### collections.Counter

In [80]:
words = [
    'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
    'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
    'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
    'my', 'eyes', "you're", 'under'
]

In [81]:
from collections import Counter

In [82]:
word_counts = Counter(words)

In [83]:
word_counts.most_common(3)

[('eyes', 8), ('the', 5), ('look', 4)]

In [84]:
## Counter 本质上是一个字典，将输入的元素与该元素出现的次数建立映射关系，因此要求输入的元素需要时hashable的，
## 也就是要求输入对象是不可变元素


In [85]:
morewords = ['why','are','you','not','looking','in','my','eyes']

In [86]:
word_counts.update(morewords)

In [87]:
word_counts.most_common(4)

[('eyes', 9), ('the', 5), ('look', 4), ('my', 4)]

### 当需要做计数操作的时候，应该首选Counter

## 1.13 通过某个关键字排序一个字典列表

In [91]:
from operator import itemgetter
#from operator import itemgetter

In [92]:
rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

In [93]:
rows_by_fname = sorted(rows, key=itemgetter('fname'))

In [94]:
print(rows_by_fname)

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]


In [95]:
### itemgetter也支持多个keys, itemgetter比lambda表达式，运行速度快。
rows_by_lfname = sorted(rows, key=itemgetter('lname','fname'))
print(rows_by_lfname)

[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]


In [97]:
print(min(rows, key=itemgetter('uid')))
print(max(rows, key=itemgetter('uid')))

{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}


## 1.14 排序不支持原生比较的对象

In [104]:
class User:
    def __init__(self, user_id):
        self.user_id = user_id
        
    def __repr__(self):
        return 'User: %d' % (self.user_id)
    

In [105]:
def sort_notcompare():
    users = [User(23), User(3), User(99)]
    print(users)
    print(sorted(users, key=lambda x:x.user_id))

In [106]:
sort_notcompare()

[User: 23, User: 3, User: 99]
[User: 3, User: 23, User: 99]


In [107]:
## attrgetter 和 itemgetter很类似，只不过区别在于前者是获取类里的属性
from operator import attrgetter

In [108]:
def sort_notcompare2():
    users = [User(23), User(3), User(99)]
    print(users)
    print(sorted(users, key=attrgetter('user_id')))

In [109]:
sort_notcompare2()

[User: 23, User: 3, User: 99]
[User: 3, User: 23, User: 99]


## 1.15 通过某个字段将记录分组

In [122]:
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

In [123]:
from operator import itemgetter
from itertools import groupby

In [124]:
rows.sort(key=itemgetter('date'))

In [125]:
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print(' ',i)

07/01/2012
  {'address': '5412 N CLARK', 'date': '07/01/2012'}
  {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
  {'address': '5800 E 58TH', 'date': '07/02/2012'}
  {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
  {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
  {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
  {'address': '5148 N CLARK', 'date': '07/04/2012'}
  {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


### groupby的一个非常重要的步骤，是根据指定的字段将数据进行事先排序。因为groupby仅仅检查连续的元素，
### 如果实现并没有排序完成的话，分组函数将得不到想要的结果！

### 或者还有一个方法，就是用defaultdict

In [128]:
from collections import defaultdict

In [129]:
d = defaultdict(list)
for item in rows:
    d[item['date']].append(item)

In [132]:
for r in d['07/01/2012']:
    print(r)

{'address': '5412 N CLARK', 'date': '07/01/2012'}
{'address': '4801 N BROADWAY', 'date': '07/01/2012'}


## 1.16 过滤序列元素

In [133]:
mylist = [1, 4, -5, 10, -7, 2, 3, -1]

In [143]:
pos = (n for n in mylist if n > 0)

In [144]:
for x in pos:
    print(x)

1
4
10
2
3


In [149]:
clip_neg = [n  if n > 0 else 0 for n in mylist ]
print(clip_neg)

[1, 4, 0, 10, 0, 2, 3, 0]


### 序列过滤问题，首先应该想到用列表生成式或者generator，其次是filter，另外还可以用itertools.compress来做，例如：

In [152]:
addresses = [
    '5412 N CLARK',
    '5148 N CLARK',
    '5800 E 58TH',
    '2122 N CLARK',
    '5645 N RAVENSWOOD',
    '1060 W ADDISON',
    '4801 N BROADWAY',
    '1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]

In [153]:
bool_list = [ n > 5 for n in counts ]

In [154]:
print(bool_list)

[False, False, True, False, False, True, True, False]


In [159]:
from itertools import compress
for i in compress(addresses,bool_list):
    print(i)


5800 E 58TH
1060 W ADDISON
4801 N BROADWAY


## 1.17 从字典中提取子集

In [160]:
## 创建字典的任务，最好用字典推导来完成。
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
p1 = {key:value for key,value in prices.items() if value > 200}

In [161]:
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = {key:value for key, value in prices.items() if key in tech_names}

In [172]:
print(p1)
print(p2)

{'AAPL': 612.78, 'IBM': 205.55}
{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}


## 1.18 映射名称到序列元素

In [173]:
from collections import namedtuple

In [174]:
suscriber = namedtuple('Suscriber',['addr','joined'])

In [175]:
sub = suscriber('jonesy@example.com', '2012-10-19')

In [176]:
sub

Suscriber(addr='jonesy@example.com', joined='2012-10-19')

In [183]:
help(sub._make)

Help on method _make:

_make(iterable, new=<built-in method __new__ of type object at 0x000000006C47C0D0>, len=<built-in function len>) method of builtins.type instance
    Make a new Suscriber object from a sequence or iterable



### namedtuple 实际上是元组的子类，支持索引和解压

In [185]:
len(sub)

2

In [186]:
a, b = sub

In [187]:
a


'jonesy@example.com'

In [188]:
b

'2012-10-19'

In [189]:
from collections import namedtuple

In [190]:
stock = namedtuple('Stock',['name','shares','price'])

In [191]:
def compute_cost(records):
    total = 0.0
    for rec in records:
        ## 先将记录转换成namedtuple，然后就可以拜托index的束缚了！，所以思路是：tuple作为记录，然后tuple->namedtuple
        s = stock(*rec)
        total += s.shares * s.price
    return total

In [192]:
compute_cost([('a',1,2),('b',2,3)])

8.0

## 1.19 转换并同时计算数据

### 一个非常优雅的方式去结合数据计算与转换 就是用一个生成器表达式参数， 比如， 如果向计算平方和，可以像下面这样做：

In [194]:
nums = [1,2,3,4,5]
s = sum(x*x for x in nums)
print(s)

55


In [199]:
# import os
# files = os.listdir('dirname')
# if any(name.endswith('.py') for name in files):
#     print('there be a python!')
# else:
#     print("sorry, no python")

s = ('ACME',50,123.45)
print(','.join(str(x) for x in s))
#print(','.join(s)) ## wrong!

ACME,50,123.45


In [202]:
portfolio = [
    {'name':'GOOG', 'shares': 50},
    {'name':'YHOO', 'shares': 75},
    {'name':'AOL', 'shares': 20},
    {'name':'SCOX', 'shares': 65}
]

min_shares = min(item['shares']  for item in portfolio)
print(min_shares)

20


## 1.20 合并多个字典或映射

In [203]:
## collections模块中的ChainMap类，可以将多个字典进行逻辑上的合并，而不需要生成一个新的字典

In [204]:
from collections import ChainMap

In [205]:
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }

In [206]:
c = ChainMap(a,b)

In [209]:
print(list(c.keys()))

['x', 'z', 'y']


In [210]:
c['z'] = 10
c['w'] = 40

In [211]:
a

{'x': 1, 'z': 10, 'w': 40}

In [212]:
del c['x']

In [213]:
a

{'z': 10, 'w': 40}

In [214]:
del c['y']
## 报错了，因为对chainmap的删除或者更新，只会影响列表的第一个字典。

KeyError: "Key not found in the first mapping: 'y'"