# 字典和集合

In [1]:
from collections import abc

my_dict = {}
isinstance(my_dict, abc.Mapping)

True

In [2]:
tt = (1, 2, (30, 40))
hash(tt)

-3907003130834322577

In [3]:
tl = (1, 2, [30, 40])
hash(tl)

TypeError: unhashable type: 'list'

In [5]:
tf = (1, 2, frozenset([30, 40]))
hash(tf)

5149391500123939311

In [6]:
a = dict(one=1, two=2, three=3)
b = {'one': 1, 'two': 2, 'three': 3}
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
d = dict([('two', 2), ('one', 1), ('three', 3)])
e = dict({'three': 3, 'one': 1, 'two': 2})
a == b == c == d == e

True

In [7]:
DIAL_CODES = [(86, 'China'), (91, 'India'), (1, 'United States'), (62, 'Indonesia'), (55, 'Brazil'), 
              (92, 'Pakistan'), (880, 'Bangladesh'), (234, 'Nigeria'), (7, 'Russia'), (81, 'Japan')]

country_code = {country: code for code, country in DIAL_CODES}
country_code

{'China': 86,
 'India': 91,
 'United States': 1,
 'Indonesia': 62,
 'Brazil': 55,
 'Pakistan': 92,
 'Bangladesh': 880,
 'Nigeria': 234,
 'Russia': 7,
 'Japan': 81}

In [8]:
{code: country.upper() for country, code in country_code.items() if code < 66}

{1: 'UNITED STATES', 62: 'INDONESIA', 55: 'BRAZIL', 7: 'RUSSIA'}

In [9]:
import sys
import re

WORD_RE = re.compile(r'\w+')

index = {}
with open('./zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            occurrences = index.get(word, [])
            occurrences.append(location)
            index[word] = occurrences

for word in sorted(index, key=str.upper):
    print(word, index[word])

2014 [(3, 15)]
a [(5, 71)]
above [(12, 5)]
all [(12, 76)]
and [(6, 18), (8, 63), (9, 25), (12, 28)]
any [(5, 50)]
associated [(6, 22)]
be [(12, 61)]
c [(3, 12)]
charge [(5, 39)]
conditions [(10, 46)]
copies [(9, 1), (13, 1)]
copy [(5, 73), (8, 9)]
Copyright [(3, 1)]
copyright [(12, 11)]
deal [(6, 74)]
distribute [(8, 39)]
do [(10, 14)]
documentation [(6, 33)]
files [(6, 47)]
following [(10, 36)]
free [(5, 31)]
furnished [(10, 1)]
granted [(5, 22)]
hereby [(5, 15)]
in [(7, 1), (12, 73)]
included [(12, 64)]
including [(7, 38)]
is [(5, 12), (9, 68)]
License [(1, 9)]
limitation [(7, 56)]
Luciano [(3, 20)]
merge [(8, 23)]
MIT [(1, 5), (1, 18)]
modify [(8, 15)]
notice [(12, 21), (12, 48)]
obtaining [(5, 61)]
of [(5, 36), (6, 1), (9, 8), (13, 32)]
or [(8, 67), (13, 8)]
Permission [(5, 1)]
permission [(12, 37)]
permit [(9, 32)]
person [(5, 54)]
persons [(9, 39)]
portions [(13, 23)]
publish [(8, 30)]
Ramalho [(3, 28)]
restriction [(7, 25)]
rights [(7, 71)]
sell [(8, 70)]
shall [(12, 55)]
so [(1

In [10]:
index = {}
with open('./zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index.setdefault(word, []).append(location)

for word in sorted(index, key=str.upper):
    print(word, index[word])

2014 [(3, 15)]
a [(5, 71)]
above [(12, 5)]
all [(12, 76)]
and [(6, 18), (8, 63), (9, 25), (12, 28)]
any [(5, 50)]
associated [(6, 22)]
be [(12, 61)]
c [(3, 12)]
charge [(5, 39)]
conditions [(10, 46)]
copies [(9, 1), (13, 1)]
copy [(5, 73), (8, 9)]
Copyright [(3, 1)]
copyright [(12, 11)]
deal [(6, 74)]
distribute [(8, 39)]
do [(10, 14)]
documentation [(6, 33)]
files [(6, 47)]
following [(10, 36)]
free [(5, 31)]
furnished [(10, 1)]
granted [(5, 22)]
hereby [(5, 15)]
in [(7, 1), (12, 73)]
included [(12, 64)]
including [(7, 38)]
is [(5, 12), (9, 68)]
License [(1, 9)]
limitation [(7, 56)]
Luciano [(3, 20)]
merge [(8, 23)]
MIT [(1, 5), (1, 18)]
modify [(8, 15)]
notice [(12, 21), (12, 48)]
obtaining [(5, 61)]
of [(5, 36), (6, 1), (9, 8), (13, 32)]
or [(8, 67), (13, 8)]
Permission [(5, 1)]
permission [(12, 37)]
permit [(9, 32)]
person [(5, 54)]
persons [(9, 39)]
portions [(13, 23)]
publish [(8, 30)]
Ramalho [(3, 28)]
restriction [(7, 25)]
rights [(7, 71)]
sell [(8, 70)]
shall [(12, 55)]
so [(1

In [11]:
import collections

index = collections.defaultdict(list)
with open('./zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index[word].append(location)

for word in sorted(index, key=str.upper):
    print(word, index[word])

2014 [(3, 15)]
a [(5, 71)]
above [(12, 5)]
all [(12, 76)]
and [(6, 18), (8, 63), (9, 25), (12, 28)]
any [(5, 50)]
associated [(6, 22)]
be [(12, 61)]
c [(3, 12)]
charge [(5, 39)]
conditions [(10, 46)]
copies [(9, 1), (13, 1)]
copy [(5, 73), (8, 9)]
Copyright [(3, 1)]
copyright [(12, 11)]
deal [(6, 74)]
distribute [(8, 39)]
do [(10, 14)]
documentation [(6, 33)]
files [(6, 47)]
following [(10, 36)]
free [(5, 31)]
furnished [(10, 1)]
granted [(5, 22)]
hereby [(5, 15)]
in [(7, 1), (12, 73)]
included [(12, 64)]
including [(7, 38)]
is [(5, 12), (9, 68)]
License [(1, 9)]
limitation [(7, 56)]
Luciano [(3, 20)]
merge [(8, 23)]
MIT [(1, 5), (1, 18)]
modify [(8, 15)]
notice [(12, 21), (12, 48)]
obtaining [(5, 61)]
of [(5, 36), (6, 1), (9, 8), (13, 32)]
or [(8, 67), (13, 8)]
Permission [(5, 1)]
permission [(12, 37)]
permit [(9, 32)]
person [(5, 54)]
persons [(9, 39)]
portions [(13, 23)]
publish [(8, 30)]
Ramalho [(3, 28)]
restriction [(7, 25)]
rights [(7, 71)]
sell [(8, 70)]
shall [(12, 55)]
so [(1

In [12]:
class StrKeyDict0(dict):
    
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
        
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()
    
    
d = StrKeyDict0([('2', 'two'), ('4', 'four')])
d['2']

'two'

In [13]:
d[4]

'four'

In [14]:
d[1]

KeyError: '1'

In [15]:
d.get('2')

'two'

In [16]:
d.get(4)

'four'

In [17]:
d.get(1, 'N/A')

'N/A'

In [18]:
4 in d

True

In [19]:
1 in d

False

In [20]:
'4' in d

True

In [21]:
ct = collections.Counter('abracadabra')
ct

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

In [22]:
ct.update('aaaaazzz')
ct

Counter({'a': 10, 'b': 2, 'r': 2, 'c': 1, 'd': 1, 'z': 3})

In [23]:
ct.most_common(2)

[('a', 10), ('z', 3)]

In [24]:
class StrKeyDict(collections.UserDict):
    
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    
    def __contains__(self, key):
        return str(key) in self.data
    
    def __setitem__(self, key, item):
        self.data[str(key)] = item

In [25]:
from types import MappingProxyType

d = {1: 'A'}
d_proxy = MappingProxyType(d)
d_proxy

mappingproxy({1: 'A'})

In [26]:
d_proxy[1]

'A'

In [27]:
d_proxy[2] = 'x'

TypeError: 'mappingproxy' object does not support item assignment

In [29]:
d[2] = 'B'
d_proxy

mappingproxy({1: 'A', 2: 'B'})

In [30]:
d_proxy[2]

'B'

In [31]:
l = ['spam', 'spam', 'eggs', 'spam']
set(l)

{'eggs', 'spam'}

In [32]:
list(set(l))

['spam', 'eggs']

In [33]:
s = {1}
type(s)

set

In [34]:
s

{1}

In [35]:
s.pop()

1

In [36]:
s

set()

In [37]:
from dis import dis

dis('{1}')

  1           0 LOAD_CONST               0 (1)
              2 BUILD_SET                1
              4 RETURN_VALUE


In [38]:
dis('set([1])')

  1           0 LOAD_NAME                0 (set)
              2 LOAD_CONST               0 (1)
              4 BUILD_LIST               1
              6 CALL_FUNCTION            1
              8 RETURN_VALUE


In [39]:
frozenset(range(10))

frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

In [40]:
from unicodedata import name

{chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}

{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}

In [43]:
hash(1)

1

In [46]:
DIAL_CODES = [(86, 'China'), (91, 'India'), (1, 'United States'), (62, 'Indonesia'), (55, 'Brazil'), 
              (92, 'Pakistan'), (880, 'Bangladesh'), (234, 'Nigeria'), (7, 'Russia'), (81, 'Japan')]

d1 = dict(DIAL_CODES)
print('d1:', d1.keys())
d2 = dict(sorted(DIAL_CODES))
print('d2:', d2.keys())
d3 = dict(sorted(DIAL_CODES, key=lambda x: x[1]))
print('d3:', d3.keys())
assert d1 == d2 and d2 == d3

d1: dict_keys([86, 91, 1, 62, 55, 92, 880, 234, 7, 81])
d2: dict_keys([1, 7, 55, 62, 81, 86, 91, 92, 234, 880])
d3: dict_keys([880, 55, 86, 91, 62, 81, 234, 92, 7, 1])
