In [1]:
import sys

print(sys.version)

3.9.7 (default, Sep 16 2021, 08:50:36) 
[Clang 10.0.0 ]


## 字典的现代句法

### 字典推导式

In [2]:
dial_codes = [
    (86, 'China'),
    (91, 'India'),
    (1, 'United States'),
    (62, 'Indonesia'),
    (55, 'Brazil'),
    (92, 'Pakistan'),
    (880, 'Bangladesh'),
    (234, 'Nigeria'),
    (7, 'Russia'),
    (81, 'Japan'),
]

In [3]:
country_dial = {country: code for code, country in dial_codes}
country_dial

{'China': 86,
 'India': 91,
 'United States': 1,
 'Indonesia': 62,
 'Brazil': 55,
 'Pakistan': 92,
 'Bangladesh': 880,
 'Nigeria': 234,
 'Russia': 7,
 'Japan': 81}

In [7]:
{code:country.upper()
    for country, code in sorted(country_dial.items())
    if code < 70}

{55: 'BRAZIL', 62: 'INDONESIA', 7: 'RUSSIA', 1: 'UNITED STATES'}

### 映射拆包

In [8]:
def dump(**kwargs):
    return kwargs

In [9]:
dump(**{'x': 1}, y=2, **{'z': 3})

{'x': 1, 'y': 2, 'z': 3}

In [10]:
{'a':0, **{'x': 1}, 'y':2, **{'z': 3, 'x': 4}}

{'a': 0, 'x': 4, 'y': 2, 'z': 3}

### 使用 | 合并映射

In [11]:
d1 = {'a':1, 'b':3}
d2 = {'a':2, 'b':4, 'c':6}
d1 | d2

{'a': 2, 'b': 4, 'c': 6}

In [12]:
d1

{'a': 1, 'b': 3}

In [13]:
d2

{'a': 2, 'b': 4, 'c': 6}

In [14]:
d1 |= d2
d1

{'a': 2, 'b': 4, 'c': 6}

## 使用模式匹配处理映射

In [1]:
# Python 3.10 支持

In [2]:
# def get_creators(record: dict) -> list:
#     match record:
#         case {'type': 'book', 'api': 2, 'authors': [*names]}:  # <1>
#             return names
#         case {'type': 'book', 'api': 1, 'author': name}:  # <2>
#             return [name]
#         case {'type': 'book'}:  # <3>
#             raise ValueError(f"Invalid 'book' record: {record!r}")
#         case {'type': 'movie', 'director': name}:  # <4>
#             return [name]
#         case _:  # <5>
#             raise ValueError(f'Invalid record: {record!r}')

## 映射类型的标准 API

In [3]:
my_dict = {}

In [5]:
from collections import abc

isinstance(my_dict, abc.Mapping)

True

In [6]:
isinstance(my_dict, abc.MutableMapping)

True

### 可哈希指什么

In [7]:
tt = (1, 2, (30, 40))

In [8]:
hash(tt)

-3907003130834322577

In [9]:
t1 = (1, 2, [30, 40])

In [10]:
hash(t1)

TypeError: unhashable type: 'list'

In [11]:
# 仅当所有项均可哈希，tuple 对象才是可哈希的

In [13]:
tf = (1, 2, frozenset([30, 40]))
hash(tf)

5149391500123939311

### 插入或更新可变的值

In [12]:
"""
my_dict.setdefault(key, []).append(new_value)

等价于

occurrences = index.get(word, [])
occurrences.append(location)
index[word] = occurrences

"""
! python index0.py zen.txt

a [(19, 48), (20, 53)]
Although [(11, 1), (16, 1), (18, 1)]
ambiguity [(14, 16)]
and [(15, 23)]
are [(21, 12)]
aren [(10, 15)]
at [(16, 38)]
bad [(19, 50)]
be [(15, 14), (16, 27), (20, 50)]
beats [(11, 23)]
Beautiful [(3, 1)]
better [(3, 14), (4, 13), (5, 11), (6, 12), (7, 9), (8, 11), (17, 8), (18, 25)]
break [(10, 40)]
by [(1, 20)]
cases [(10, 9)]
complex [(5, 23)]
Complex [(6, 1)]
complicated [(6, 24)]
counts [(9, 13)]
dense [(8, 23)]
do [(15, 64), (21, 48)]
Dutch [(16, 61)]
easy [(20, 26)]
enough [(10, 30)]
Errors [(12, 1)]
explain [(19, 34), (20, 34)]
Explicit [(4, 1)]
explicitly [(13, 8)]
face [(14, 8)]
first [(16, 41)]
Flat [(7, 1)]
good [(20, 55)]
great [(21, 28)]
guess [(14, 52)]
hard [(19, 26)]
honking [(21, 20)]
idea [(19, 54), (20, 60), (21, 34)]
If [(19, 1), (20, 1)]
implementation [(19, 8), (20, 8)]
implicit [(4, 25)]
In [(14, 1)]
is [(3, 11), (4, 10), (5, 8), (6, 9), (7, 6), (8, 8), (17, 5), (18, 16), (19, 23), (20, 23)]
it [(15, 67), (19, 43), (20, 43)]
let [(21, 42)]
m

## 自动处理缺失的键

### defaultdict：处理缺失键的一种选择

In [14]:
# 使用 defaultdict 代替 setdefault 方法

In [21]:
"""
构建一个索引映射，列出词出现的位置
"""

import collections
import re
import sys

def list_word_index(file_path):
    WORD_RE = re.compile(r'\w+')
    
    # 创建一个 defaultdict 对象，把 default_factory 设为 list 构造函数    
    index = collections.defaultdict(list)
    with open(file_path, encoding='utf-8') as fp:
        for line_no, line in enumerate(fp, 1):
            for match in WORD_RE.finditer(line):
                word = match.group()
                column_no = match.start() + 1
                location = (line_no, column_no)
                # index.setdefault(word, []).append(location)
                index[word].append(location)


    # 按字母顺序显示
    for word in sorted(index, key=str.upper):
        print(word, index[word])

In [22]:
list_word_index('zen.txt')

a [(19, 48), (20, 53)]
Although [(11, 1), (16, 1), (18, 1)]
ambiguity [(14, 16)]
and [(15, 23)]
are [(21, 12)]
aren [(10, 15)]
at [(16, 38)]
bad [(19, 50)]
be [(15, 14), (16, 27), (20, 50)]
beats [(11, 23)]
Beautiful [(3, 1)]
better [(3, 14), (4, 13), (5, 11), (6, 12), (7, 9), (8, 11), (17, 8), (18, 25)]
break [(10, 40)]
by [(1, 20)]
cases [(10, 9)]
complex [(5, 23)]
Complex [(6, 1)]
complicated [(6, 24)]
counts [(9, 13)]
dense [(8, 23)]
do [(15, 64), (21, 48)]
Dutch [(16, 61)]
easy [(20, 26)]
enough [(10, 30)]
Errors [(12, 1)]
explain [(19, 34), (20, 34)]
Explicit [(4, 1)]
explicitly [(13, 8)]
face [(14, 8)]
first [(16, 41)]
Flat [(7, 1)]
good [(20, 55)]
great [(21, 28)]
guess [(14, 52)]
hard [(19, 26)]
honking [(21, 20)]
idea [(19, 54), (20, 60), (21, 34)]
If [(19, 1), (20, 1)]
implementation [(19, 8), (20, 8)]
implicit [(4, 25)]
In [(14, 1)]
is [(3, 11), (4, 10), (5, 8), (6, 9), (7, 6), (8, 8), (17, 5), (18, 16), (19, 23), (20, 23)]
it [(15, 67), (19, 43), (20, 43)]
let [(21, 42)]
m

### `__missing__` 方法

In [23]:
class StrKeyDict0(dict):
    
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
    
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

In [24]:
# 搜索非字符串键时，StrKeyDict0 把未找到的键转换成字符串

In [25]:
d = StrKeyDict0([('2', 'two'), ('4', 'four')])

In [26]:
d['2']

'two'

In [27]:
d['4']

'four'

In [31]:
d['1']

KeyError: '1'

In [29]:
d[4]

'four'

In [30]:
d.get(1, 'N/A')

'N/A'

## dict 的变体

### collections.OrderedDict

### collections.ChainMap

In [33]:
# ChainMap 实例存放一组映射，可作为一个整体来搜索。
# 查找操作按照输入映射在构造函数调用中出现的顺序执行，一旦在某个映射中找到指定的键，旋即结束。

In [34]:
d1 = dict(a=1, b=3)
d2 = dict(a=2, b=4, c=6)

from collections import ChainMap
chain = ChainMap(d1, d2)

In [35]:
chain['a']

1

In [36]:
chain['c']

6

In [37]:
# ChainMap 的更新或插入操作只影响第一个输入映射

In [38]:
chain['c'] = -1

In [39]:
d1

{'a': 1, 'b': 3, 'c': -1}

In [40]:
d2

{'a': 2, 'b': 4, 'c': 6}

### collections.Counter

In [41]:
# 这是一种对键计数的映射。更新现有的键，计数随之增加。

In [42]:
ct = collections.Counter('abraaacsdawdkiw')
ct

Counter({'a': 5,
         'b': 1,
         'r': 1,
         'c': 1,
         's': 1,
         'd': 2,
         'w': 2,
         'k': 1,
         'i': 1})

In [43]:
ct.update('bbccrr')

In [44]:
ct

Counter({'a': 5,
         'b': 3,
         'r': 3,
         'c': 3,
         's': 1,
         'd': 2,
         'w': 2,
         'k': 1,
         'i': 1})

In [45]:
ct.most_common(3)

[('a', 5), ('b', 3), ('r', 3)]

### shelve.Shelf

### 子类应继承 UserDict 而不是 dict

In [50]:
# 创建新的映射类型，最好扩展 collections.UserDict，而不是 dict。
# 主要原因是，内置的 dict 在实现上走了一些捷径，如果继承 dict，那就不得不覆盖一些方法，而继承 UserDict 则没有这些问题。

#### StrKeyDict

In [56]:
"""
StrKeyDict 继承 UserDict，实现过程比 StrKeyDict0（示例 3-8）更简洁，而且功能更丰富：
所有键都以 str 类型存储，使用包含非字符串键的数据构建或更新实例不会发生意外情况。
"""

import collections


class StrKeyDict(collections.UserDict):

    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def __contains__(self, key):
        return str(key) in self.data

    def __setitem__(self, key, item):
        self.data[str(key)] = item

## 不可变映射

### MappingProxyType

In [57]:
# MappingProxyType 根据 dict 对象构建只读的 mappingproxy 实例

In [58]:
from types import MappingProxyType

In [63]:
d = {1:'A'}
d_proxy = MappingProxyType(d)
d_proxy

mappingproxy({1: 'A'})

In [64]:
d_proxy[1]

'A'

In [65]:
d_proxy[2] = 'x'

TypeError: 'mappingproxy' object does not support item assignment

In [66]:
d[2] = 'B'

In [67]:
d_proxy

mappingproxy({1: 'A', 2: 'B'})

In [68]:
d_proxy[2]

'B'

## 字典视图

In [70]:
# dict 的实例方法 .keys()、.values() 和 .items() 分别返回 dict_keys、dict_values 和 dict_items 类的实例

### `.values()` 方法返回 dict 对象的值视图

In [81]:
d = dict(a=10, b=20, c=30)
values = d.values()
values

dict_values([10, 20, 30])

In [82]:
len(values)

3

In [83]:
list(values)

[10, 20, 30]

In [84]:
# 视图实现了 __reversed__ 方法，返回一个自定义迭代器
reversed(values)

<dict_reversevalueiterator at 0x7f8d10ae7f40>

In [85]:
# 不能使用 [] 获取视图中的项
values[0]

TypeError: 'dict_values' object is not subscriptable

In [91]:
# 视图对象是动态代理。更新原 dict 对象后，现有视图立即就能看到变化。

d['z'] = 99
d

{'a': 10, 'b': 20, 'c': 30, 'z': 99}

In [92]:
values

dict_values([10, 20, 30, 99])

In [93]:
# dict_keys、dict_values 和 dict_items 是内部类，不能通过 __builtins__ 或标准库中的任何模块获取，
# 尽管可以得到实例，但是在 Python 代码中不能自己动手创建。

In [94]:
values_class = type({}.values())

In [95]:
v= values_class()

TypeError: cannot create 'dict_values' instances

## 集合 set

In [3]:
# 集合是一组唯一的对象。集合的基本作用是去除重复项。

In [1]:
l = ['spam', 'spam', 'eggs', 'spam', 'bacon', 'eggs']
set(l)

{'bacon', 'eggs', 'spam'}

In [2]:
list(set(l))

['eggs', 'spam', 'bacon']

In [6]:
# 如果想去除重复项，同时保留每一项首次出现位置的顺序，那么现在使用普通的 dict 即可

dict.fromkeys(l).keys()

dict_keys(['spam', 'eggs', 'bacon'])

### set 字面量

In [7]:
# 在 Python 3 中，集合的标准字符串表示形式始终使用 {...} 表示法，唯有空集例外

In [8]:
s = {1}
type(s)

set

In [9]:
s

{1}

In [10]:
s.pop()

1

In [11]:
s

set()

In [13]:
frozenset(range(10))

frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

### 集合推导式

In [14]:
from unicodedata import name

{chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}

{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}

## 字典视图的集合运算

In [16]:
d1 = dict(a=1, b=2, c=3, d=4)
d2 = dict(b=20, d=40, e=50)

In [17]:
d1.keys() & d2.keys()

{'b', 'd'}