In [6]:
# What is hashable?
tt = (1, 2, (30, 40))
print(hash(tt))
tf = (1, 2, frozenset([30, 40]))
print(hash(tf))
tl = (1, 2, [30, 40])
print(hash(tl))

8027212646858338501
-4118419923444501110


TypeError: unhashable type: 'list'

In [9]:
# Creating dictionary
b = {'one': 1, 'two': 2, 'three': 3}
e = dict({'three': 3, 'one': 1, 'two': 2})
a = dict(one=1, two=2, three=3)
d = dict([('two', 2), ('one', 1), ('three', 3)])
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
a == b == c == d == e

True

In [13]:
# KWO: dict append and update
index={}
index['1'] = 1
print(index)
index['1'] = 2
print(index)

{'1': 1}
{'1': 2}


In [11]:
# Example 3-1. Examples of dict comprehensions
DIAL_CODES = [
    (86, 'China'),
    (91, 'India'),
    (1, 'United States'),
    (62, 'Indonesia'),
    (55, 'Brazil'),
    (92, 'Pakistan'),
    (880, 'Bangladesh'),
    (234, 'Nigeria'),
    (7, 'Russia'),
    (81, 'Japan'),
    ]
country_code = {country: code for code, country in DIAL_CODES}
print('country_code =', country_code)
print()
{code: country.upper() for country, code in country_code.items() if code < 66}

country_code = {'Russia': 7, 'Nigeria': 234, 'Bangladesh': 880, 'United States': 1, 'India': 91, 'Japan': 81, 'China': 86, 'Indonesia': 62, 'Pakistan': 92, 'Brazil': 55}



{1: 'UNITED STATES', 7: 'RUSSIA', 55: 'BRAZIL', 62: 'INDONESIA'}

In [22]:
# Example 3-2 This example generates an index like that in Example 3-3.
# BEGIN INDEX0
"""Build an index mapping word -> list of occurrences"""

import sys
import re

WORD_RE = re.compile('\w+')

index = {}
with open('files/Example_3-2.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            print(match)
            word = match.group()
            column_no = match.start()+1
            location = (line_no, column_no)
            # this is ugly; coded like this to make a point
            occurrences = index.get(word, [])  # <1>
            occurrences.append(location)       # <2>
            index[word] = occurrences          # <3>

# print in alphabetical order
for word in sorted(index, key=str.upper):  # <4>
    print(word, index[word])
# END INDEX0


<_sre.SRE_Match object; span=(0, 5), match='Wlazł'>
<_sre.SRE_Match object; span=(6, 11), match='kotek'>
<_sre.SRE_Match object; span=(12, 14), match='na'>
<_sre.SRE_Match object; span=(15, 21), match='płotek'>
<_sre.SRE_Match object; span=(0, 1), match='i'>
<_sre.SRE_Match object; span=(2, 7), match='mruga'>
<_sre.SRE_Match object; span=(0, 5), match='ładna'>
<_sre.SRE_Match object; span=(6, 8), match='to'>
<_sre.SRE_Match object; span=(9, 17), match='piosenka'>
<_sre.SRE_Match object; span=(0, 8), match='niedługa'>
<_sre.SRE_Match object; span=(9, 17), match='niedługa'>
<_sre.SRE_Match object; span=(0, 8), match='niedługa'>
<_sre.SRE_Match object; span=(9, 17), match='niedługa'>
<_sre.SRE_Match object; span=(18, 26), match='niedługa'>
i [(2, 1)]
kotek [(1, 7)]
mruga [(2, 3)]
na [(1, 13)]
niedługa [(4, 1), (4, 10), (5, 1), (5, 10), (5, 19)]
piosenka [(3, 10)]
płotek [(1, 16)]
to [(3, 7)]
Wlazł [(1, 1)]
ładna [(3, 1)]


In [26]:
# KWO: match.group
m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist")
print(m)
print(m.group(0))       # The entire match
print(m.group(1))       # The first parenthesized subgroup.
print(m.group(2))       # The second parenthesized subgroup.
print(m.group(1, 2))    # Multiple arguments give us a tuple.

<_sre.SRE_Match object; span=(0, 12), match='Isaac Newton'>
Isaac Newton
Isaac
Newton
('Isaac', 'Newton')


In [28]:
# Example 3-4. index.py uses dict.setdefault to fetch and update a list of word occurrences
# BEGIN INDEX_DEFAULT
"""Build an index mapping word -> list of occurrences"""

import sys
import re
import collections

WORD_RE = re.compile('\w+')

index = {}
with open('files/Example_3-2.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start()+1
            location = (line_no, column_no)
            index.setdefault(word, []).append(location)  # <1>

# print in alphabetical order
for word in sorted(index, key=str.upper):
    print(word, index[word])
# END INDEX_DEFAULT

i [(2, 1)]
kotek [(1, 7)]
mruga [(2, 3)]
na [(1, 13)]
niedługa [(4, 1), (4, 10), (5, 1), (5, 10), (5, 19)]
piosenka [(3, 10)]
płotek [(1, 16)]
to [(3, 7)]
Wlazł [(1, 1)]
ładna [(3, 1)]


In [29]:
# Example 3-5. index.py uses defaultdict to fetch and update a list of word occurrences
# BEGIN INDEX_DEFAULT
"""Build an index mapping word -> list of occurrences"""

import sys
import re
import collections

WORD_RE = re.compile('\w+')

index = collections.defaultdict(list)     # <1>
with open('files/Example_3-2.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start()+1
            location = (line_no, column_no)
            index[word].append(location)  # <2>

# print in alphabetical order
for word in sorted(index, key=str.upper):
    print(word, index[word])
# END INDEX_DEFAULT

i [(2, 1)]
kotek [(1, 7)]
mruga [(2, 3)]
na [(1, 13)]
niedługa [(4, 1), (4, 10), (5, 1), (5, 10), (5, 19)]
piosenka [(3, 10)]
płotek [(1, 16)]
to [(3, 7)]
Wlazł [(1, 1)]
ładna [(3, 1)]


In [34]:
# Example 3-6. Searching a non-string key, StrKeyDict0 converts it to str when it's' not found.
class StrKeyDict0(dict):  # <1>
    def __missing__(self, key):
        if isinstance(key, str):  # <2>
            raise KeyError(key)
        return self[str(key)]  # <3>
    def get(self, key, default=None):
        try:
            return self[key]  # <4>
        except KeyError:
            return default  # <5>
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()  # <6>
    
# Tests for item retrieval using `d[key]` notation::
d = StrKeyDict0([('2', 'two'), ('4', 'four')])
print("d['2']", d['2'])
print("d[4]", d[4])
print("d[1]", d[1])

d['2'] two
d[4] four


KeyError: '1'

In [36]:
# Example 3-6. - continued
# Tests for item retrieval using `d.get(key)` notation::
print(d.get('2'))
print(d.get(4))
print(d.get(1, 'N/A'))

# Tests for the `in` operator::
print(2 in d)
print(1 in d)

two
four
N/A
True
False


In [50]:
# KWO: collections.OrderedDict 
import collections
my_ord_dict = collections.OrderedDict([])
my_ord_dict[2]=2
my_ord_dict[1]=1
my_ord_dict[3]=3
print('my_ord_dict =', my_ord_dict)
my_ord_dict.popitem(last=False)
print('my_ord_dict =', my_ord_dict)
my_ord_dict.popitem()
print('my_ord_dict =', my_ord_dict)


my_ord_dict = OrderedDict([(2, 2), (1, 1), (3, 3)])
my_ord_dict = OrderedDict([(1, 1), (3, 3)])
my_ord_dict = OrderedDict([(1, 1)])


In [51]:
# KWO: plain dict
my_dict = dict([])
my_dict[2]=2
my_dict[1]=1
my_dict[3]=3
print('my_dict =', my_dict)
my_dict.popitem()
print('my_dict =', my_dict)
my_dict.popitem()
print('my_dict =', my_dict)


my_dict = {1: 1, 2: 2, 3: 3}
my_dict = {2: 2, 3: 3}
my_dict = {3: 3}


In [85]:
# KWO: ChainMap
from collections import ChainMap

d = ChainMap({'zebra': 'black'}, {'elephant': 'blue'}, {'lion': 'yellow'})
print(d)
d['lion'] = 'orange'         # update an existing key two levels down
print(d)
d['snake'] = 'red'           # new keys get added to the topmost dict
print(d)

ChainMap({'zebra': 'black'}, {'elephant': 'blue'}, {'lion': 'yellow'})
ChainMap({'zebra': 'black', 'lion': 'orange'}, {'elephant': 'blue'}, {'lion': 'yellow'})
ChainMap({'zebra': 'black', 'lion': 'orange', 'snake': 'red'}, {'elephant': 'blue'}, {'lion': 'yellow'})


In [86]:
# KWO: ChainMap
from collections import ChainMap
c = ChainMap()        # Create root context
print(c)
d = c.new_child()     # Create nested child context
c['lion'] = 'orange' 
print(c)
c['lion'] = 'yellow' 
print(c)
d['snake'] = 'red'  
print(d)
print(d.parents )

ChainMap({})
ChainMap({'lion': 'orange'})
ChainMap({'lion': 'yellow'})
ChainMap({'snake': 'red'}, {'lion': 'yellow'})
ChainMap({'lion': 'yellow'})


In [91]:
# KWO: collections.Counter
import collections
ct = collections.Counter('abracadabra')
print(ct)
ct.update('aaaaazzz')
print(ct)
print(ct.most_common(2))

Counter({'a': 5, 'r': 2, 'b': 2, 'd': 1, 'c': 1})
Counter({'a': 10, 'z': 3, 'b': 2, 'r': 2, 'd': 1, 'c': 1})
[('a', 10), ('z', 3)]


In [112]:
# Example 3-8. StrKeyDict always converts non-string keys to str — on insert, update and lookup.
import collections

class StrKeyDict(collections.UserDict):  # <1> StrKeyDict extends UserDict
    def __missing__(self, key):  # <2> __missing__ is exactly as in Example 3-7
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]
    def __contains__(self, key):
        return str(key) in self.data  # <3> __contains__ is simpler
    def __setitem__(self, key, item):
        self.data[str(key)] = item   # <4> __setitem__ converts any key to a str

# Test for initializer: keys are converted to `str`.
d = StrKeyDict([(2, 'two'), ('4', 'four')])
print("sorted(d.keys()) = ", sorted(d.keys()))
# Tests for item retrieval using `d[key]` notation::
print("d['2'] =", d['2'])
print("d[4] =", d[4])
print("d['1'] =", d['1'])


sorted(d.keys()) =  ['2', '4']
d['2'] = two
d[4] = four


KeyError: '1'

In [110]:
# Example 3-8. - continued
# Tests for item retrieval using `d.get(key)` notation::
print("d.get('2') =", d.get('2'))
print("d.get(4) =", d.get(4))
print("d.get(1, 'N/A') =", d.get(1, 'N/A'))

# Tests for the `in` operator::
print("2 in d =", 2 in d)
print("1 in d =", 1 in d)
# Test for item assignment using non-string key::
d[0] = 'zero'
print("d['0'] =", d['0'])

d.get('2') = two
d.get(4) = four
d.get(1, 'N/A') = N/A
2 in d = True
1 in d = False
d['0'] = zero


In [114]:
# Example 3-8. - continued
# Tests for update using a `dict` or a sequence of pairs::
d.update({6:'six', '8':'eight'})
print("sorted(d.keys()) = ", sorted(d.keys()))
d.update([(10, 'ten'), ('12', 'twelve')])
print("sorted(d.keys()) = ", sorted(d.keys()))
d.update([1, 3, 5])

sorted(d.keys()) =  ['10', '12', '2', '4', '6', '8']
sorted(d.keys()) =  ['10', '12', '2', '4', '6', '8']


TypeError: 'int' object is not iterable

In [120]:
# Example 3-9. MappingProxyType builds a read-only mappingproxy instance from a dict.
from types import MappingProxyType
d = {1: 'A'}
d_proxy = MappingProxyType(d)
print("d_proxy =", d_proxy)
print("d_proxy[1] =", d_proxy[1])
d[2] = 'B'
print("d_proxy[2] =", d_proxy[2])
d_proxy[2] = 'x'

d_proxy = {1: 'A'}
d_proxy[1] = A
d_proxy[2] = B


TypeError: 'mappingproxy' object does not support item assignment

In [121]:
# Set theory
l = ['spam', 'spam', 'eggs', 'spam']
print("set(l) =", set(l))
print("list(set(l)) =", list(set(l)))

set(l) = {'eggs', 'spam'}
list(set(l)) = ['eggs', 'spam']


In [20]:
# Example 3-10. Count occurrences needles in a haystack, both of type set
needles = set((1,2,3))
haystack = set((2,3,4))
print("len(needles & haystack) =", len(needles & haystack))

# Example 3-12. ... for any iterable types
needles = 1,2,3
haystack = [2,3,4]
print("len(set(needles) & set(haystack)) =", len(set(needles) & set(haystack)))
print("len(set(needles).intersection(haystack)) =", len(set(needles).intersection(haystack)))
print()
print("frozenset(range(10)) =", frozenset(range(10)))

len(needles & haystack) = 2
len(set(needles) & set(haystack)) = 2
len(set(needles).intersection(haystack)) = 2

frozenset(range(10)) = frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})


In [12]:
# set literals
s = {1}
print("type(s) =", type(s))
print("s =", s)
print("s.pop() =", s.pop())
print("s =", s)

type(s) = <class 'set'>
s = {1}
s.pop() = 1
s = set()


In [18]:
from dis import dis
dis('{1}')
print()
dis('set([2])')

  1           0 LOAD_CONST               0 (1)
              3 BUILD_SET                1
              6 RETURN_VALUE

  1           0 LOAD_NAME                0 (set)
              3 LOAD_CONST               0 (2)
              6 BUILD_LIST               1
              9 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
             12 RETURN_VALUE


In [34]:
# set comprehensions
# Example 3-13. Build a set of Latin-1 characters that have the word “SIGN” in their Unicode names.
from unicodedata import name
chr_set = {chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i),'')}
print("chr_set =\n", chr_set)
print()
chr_dict = {chr(i) : name(chr(i),'') for i in range(32, 256) if 'SIGN' in name(chr(i),'')}
print("chr_dict =\n", chr_dict)

chr_set =
 {'°', '×', '¥', '¬', '¢', '¤', '©', '=', '±', '+', 'µ', '%', '£', '¶', '§', '>', '®', '<', '÷', '#', '$'}

chr_dict =
 {'¶': 'PILCROW SIGN', '°': 'DEGREE SIGN', '±': 'PLUS-MINUS SIGN', 'µ': 'MICRO SIGN', '¬': 'NOT SIGN', '¢': 'CENT SIGN', '¤': 'CURRENCY SIGN', '=': 'EQUALS SIGN', '+': 'PLUS SIGN', '%': 'PERCENT SIGN', '÷': 'DIVISION SIGN', '£': 'POUND SIGN', '×': 'MULTIPLICATION SIGN', '¥': 'YEN SIGN', '§': 'SECTION SIGN', '©': 'COPYRIGHT SIGN', '®': 'REGISTERED SIGN', '<': 'LESS-THAN SIGN', '>': 'GREATER-THAN SIGN', '#': 'NUMBER SIGN', '$': 'DOLLAR SIGN'}


In [37]:
# KWO: methods for set and others iterable
a = {1,2,3}
b = (11,12)
c = [21,22]
a.union(b, c)

{1, 2, 3, 11, 12, 21, 22}

In [46]:
# KWO: set methods
s1 = {1,2,3,4,5}
s2 = {3,4,5,6,7}
print("s1 & s2 =", s1 & s2)
print("s1 | s2 =", s1 | s2)
print("s1 - s2 =", s1 - s2)
print("s1.difference(s2) =", s1.difference(s2))
print("s1 ^ s2 =", s1 ^ s2)
print("s1.symmetric_difference(s2) =", s1.symmetric_difference(s2))

s1 & s2 = {3, 4, 5}
s1 | s2 = {1, 2, 3, 4, 5, 6, 7}
s1 - s2 = {1, 2}
s1.difference(s2) = {1, 2}
s1 ^ s2 = {1, 2, 6, 7}
s1.symmetric_difference(s2) = {1, 2, 6, 7}


In [51]:
# Example 3-17. dialcodes.py fills three dictionaries with the same data 
DIAL_CODES = [
        (86, 'China'),
        (91, 'India'),
        (1, 'United States'),
        (62, 'Indonesia'),
        (55, 'Brazil'),
        (92, 'Pakistan'),
        (880, 'Bangladesh'),
        (234, 'Nigeria'),
        (7, 'Russia'),
        (81, 'Japan'),
    ]

d1 = dict(DIAL_CODES)  # <1>
print('d1:', d1.keys())
d2 = dict(sorted(DIAL_CODES))  # <2>
print('d2:', d2.keys())
d3 = dict(sorted(DIAL_CODES, key=lambda x:x[1]))  # <3>
print('d3:', d3.keys())
assert d1 == d2 and d2 == d3  # <4>

d1: dict_keys([880, 1, 86, 55, 7, 234, 91, 92, 62, 81])
d2: dict_keys([880, 1, 91, 86, 81, 55, 234, 7, 92, 62])
d3: dict_keys([880, 81, 1, 86, 55, 7, 234, 91, 92, 62])
