# 数据结构

## 列表中出现频率最高

In [1]:
words = [
   'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
   'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
   'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
   'my', 'eyes', "you're", 'under'
]

from collections import Counter
word_counts = Counter(words)
top_three = word_counts.most_common(3)
top_three

[('eyes', 8), ('the', 5), ('look', 4)]

## 列表中最大或最小的几项

In [2]:
import heapq

portfolio = [
   {'name': 'IBM', 'shares': 100, 'price': 91.1},
   {'name': 'AAPL', 'shares': 50, 'price': 543.22},
   {'name': 'FB', 'shares': 200, 'price': 21.09},
   {'name': 'HPQ', 'shares': 35, 'price': 31.75},
   {'name': 'YHOO', 'shares': 45, 'price': 16.35},
   {'name': 'ACME', 'shares': 75, 'price': 115.65}
]

cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
{'cheap': cheap, 'expensive': expensive}

{'cheap': [{'name': 'YHOO', 'price': 16.35, 'shares': 45},
  {'name': 'FB', 'price': 21.09, 'shares': 200},
  {'name': 'HPQ', 'price': 31.75, 'shares': 35}],
 'expensive': [{'name': 'AAPL', 'price': 543.22, 'shares': 50},
  {'name': 'ACME', 'price': 115.65, 'shares': 75},
  {'name': 'IBM', 'price': 91.1, 'shares': 100}]}

## 对字典作集合运算

In [3]:
a = {
   'x' : 1,
   'y' : 2,
   'z' : 3
}

b = {
   'w' : 10,
   'x' : 11,
   'y' : 2
}

print('Common keys:', a.keys() & b.keys())
print('Keys in a not in b:', a.keys() - b.keys())
print('(key,value) pairs in common:', a.items() & b.items())

Common keys: {'y', 'x'}
Keys in a not in b: {'z'}
(key,value) pairs in common: {('y', 2)}


## 对列表中的数据分组

In [4]:
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

from itertools import groupby

rows.sort(key=lambda r: r['date'])
for date, items in groupby(rows, key=lambda r: r['date']):
    print(date)
    for i in items:
        print('    ', i)

07/01/2012
     {'date': '07/01/2012', 'address': '5412 N CLARK'}
     {'date': '07/01/2012', 'address': '4801 N BROADWAY'}
07/02/2012
     {'date': '07/02/2012', 'address': '5800 E 58TH'}
     {'date': '07/02/2012', 'address': '5645 N RAVENSWOOD'}
     {'date': '07/02/2012', 'address': '1060 W ADDISON'}
07/03/2012
     {'date': '07/03/2012', 'address': '2122 N CLARK'}
07/04/2012
     {'date': '07/04/2012', 'address': '5148 N CLARK'}
     {'date': '07/04/2012', 'address': '1039 W GRANVILLE'}


或者使用 defaultdict 来实现：

In [5]:
from collections import defaultdict
rows_by_date = defaultdict(list)
for row in rows:
    rows_by_date[row['date']].append(row)
rows_by_date

defaultdict(list,
            {'07/01/2012': [{'address': '5412 N CLARK', 'date': '07/01/2012'},
              {'address': '4801 N BROADWAY', 'date': '07/01/2012'}],
             '07/02/2012': [{'address': '5800 E 58TH', 'date': '07/02/2012'},
              {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
              {'address': '1060 W ADDISON', 'date': '07/02/2012'}],
             '07/03/2012': [{'address': '2122 N CLARK', 'date': '07/03/2012'}],
             '07/04/2012': [{'address': '5148 N CLARK', 'date': '07/04/2012'},
              {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}]})

## 优先队列

In [6]:
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0

    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]

# Example use
class Item:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)

q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)

print("Should be bar:", q.pop())
print("Should be spam:", q.pop())
print("Should be foo:", q.pop())
print("Should be grok:", q.pop())

Should be bar: Item('bar')
Should be spam: Item('spam')
Should be foo: Item('foo')
Should be grok: Item('grok')


## 消除序列中的重复数据，同时保持数据顺序

In [7]:
def dedupe(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)


a = [ 
        {'x': 2, 'y': 3},
        {'x': 1, 'y': 4},
        {'x': 2, 'y': 3},
        {'x': 2, 'y': 3},
        {'x': 10, 'y': 15}
    ]
print(list(dedupe(a, key=lambda a: (a['x'],a['y']))))

[{'y': 3, 'x': 2}, {'y': 4, 'x': 1}, {'y': 15, 'x': 10}]


In [8]:
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)

a = [1, 5, 2, 1, 9, 1, 5, 10]
print(list(dedupe(a)))

[1, 5, 2, 9, 10]


## 序列解包 (unpack)

In [9]:
records = [
     ('foo', 1, 2),
     ('bar', 'hello'),
     ('foo', 3, 4),
]

def do_foo(x,y):
    print('foo', x, y)

def do_bar(s):
    print('bar', s)

for tag, *args in records:
    if tag == 'foo':
        do_foo(*args)
    elif tag == 'bar':
        do_bar(*args)

foo 1 2
bar hello
foo 3 4


## 组合多个字典当作一个字典使用

In [10]:
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }

# (a) Simple example of combining
from collections import ChainMap
c = ChainMap(a,b)

print("c:", c)

print(c['x'])      # Outputs 1  (from a)
print(c['y'])      # Outputs 2  (from b)
print(c['z'])      # Outputs 3  (from a)

# Output some common values
print('len(c):', len(c))
print('c.keys():', list(c.keys()))
print('c.values():', list(c.values()))

# Modify some values
c['z'] = 10
c['w'] = 40
del c['x']
print("a:", a)

c: ChainMap({'z': 3, 'x': 1}, {'y': 2, 'z': 4})
1
2
3
len(c): 3
c.keys(): ['z', 'y', 'x']
c.values(): [3, 2, 1]
a: {'z': 10, 'w': 40}


## 字典栈

In [11]:
# Example of stacking mappings (like scopes)
values = ChainMap()
values['x'] = 1

# Add a new mapping
values = values.new_child()
values['x'] = 2

# Add a new mapping
values = values.new_child()
values['x'] = 3

print(values)
print(values['x'])

# Discard last mapping
values = values.parents
print(values)
print(values['x'])

# Discard last mapping
values = values.parents
print(values)
print(values['x'])

ChainMap({'x': 3}, {'x': 2}, {'x': 1})
3
ChainMap({'x': 2}, {'x': 1})
2
ChainMap({'x': 1})
1


## flatten 列表

In [12]:
from collections import Iterable

def flatten(items, ignore_types=(str, bytes)):
    for x in items:
        if isinstance(x, Iterable) and not isinstance(x, ignore_types):
            yield from flatten(x)
        else:
            yield x

items = [1, 2, [3, 4, [5, 6], 7], 8]

# Produces 1 2 3 4 5 6 7 8
for x in flatten(items):
    print(x)

items = ['Dave', 'Paula', ['Thomas', 'Lewis']]
for x in flatten(items):
    print(x)


1
2
3
4
5
6
7
8
Dave
Paula
Thomas
Lewis
