## Chapter 2. An Array of Sequences

### List Comprehensions and Generator Expressions
#### List Comprehensions and Readability

In [16]:
# Example 2-1. Build a list of Unicode codepoints from a string

symbols = '$¢£¥€¤'
codes = []
for symbol in symbols:
    codes.append(ord(symbol))
codes

[36, 162, 163, 165, 8364, 164]

In [17]:
# Example 2-2. Build a list of Unicode codepoints from a string, take two

symbols = '$¢£¥€¤'
codes = [ord(symbol) for symbol in symbols]  # more readable than the above for-loop
codes

[36, 162, 163, 165, 8364, 164]

#### Listcomps Versus map and filter

In [1]:
# Example 2-3. The same list built by a listcomp and a map/filter composition

symbols = '$¢£¥€¤'
beyond_ascii = [ord(s) for s in symbols if ord(s) > 127]
print(beyond_ascii)
beyond_ascii = list(filter(lambda c: c > 127, map(ord, symbols)))
print(beyond_ascii)

[162, 163, 165, 8364, 164]
[162, 163, 165, 8364, 164]


In [18]:
import timeit

TIMES = 1000000

SETUP = """
symbols = '$¢£¥€¤'
def non_ascii(c):
    return c > 127
"""

def clock(label, cmd):
    res = timeit.repeat(cmd, setup=SETUP, number=TIMES)
    print(label, *('{:.3f}'.format(x) for x in res))

clock('listcomp        :', '[ord(s) for s in symbols if ord(s) > 127]')
clock('listcomp + func :', '[ord(s) for s in symbols if non_ascii(ord(s))]')
clock('filter + lambda :', 'list(filter(lambda c: c > 127, map(ord, symbols)))')
clock('filter + func   :', 'list(filter(non_ascii, map(ord, symbols)))')

listcomp        : 0.509 0.522 0.542 0.515 0.514
listcomp + func : 0.807 0.839 0.845 0.849 0.853
filter + lambda : 0.774 0.774 0.776 0.784 0.782
filter + func   : 0.692 0.718 0.725 0.726 0.726


#### Cartesian Products

In [19]:
# Example 2-4. Cartesian product using a list comprehension

colors = ['black', 'white']
sizes = ['S', 'M', 'L']
tshirts = [(color, size) for color in colors for size in sizes]
tshirts

[('black', 'S'),
 ('black', 'M'),
 ('black', 'L'),
 ('white', 'S'),
 ('white', 'M'),
 ('white', 'L')]

In [20]:
for color in colors:
    for size in sizes:
        print((color, size))

('black', 'S')
('black', 'M')
('black', 'L')
('white', 'S')
('white', 'M')
('white', 'L')


In [21]:
tshirts = [(color, size) for color in colors 
                         for size in sizes]  # More readable
tshirts

[('black', 'S'),
 ('black', 'M'),
 ('black', 'L'),
 ('white', 'S'),
 ('white', 'M'),
 ('white', 'L')]

#### Generator Expressions

To initialize tuples, arrays, and other types of sequences, you could also start from a
listcomp, but a genexp (generator expression) saves memory because it yields items
one by one using the iterator protocol instead of building a whole list just to feed
another constructor.


In [2]:
# Example 2-5. Initializing a tuple and an array from a generator expression

symbols = '$¢£¥€¤'
tuple(ord(symbol) for symbol in symbols)

(36, 162, 163, 165, 8364, 164)

In [3]:
import array
array.array('I', (ord(symbol) for symbol in symbols))

array('I', [36, 162, 163, 165, 8364, 164])

In [4]:
# Example 2-6. Cartesian product in a generator expression

colors = ['black', 'white']
sizes = ['S', 'M', 'L']
for tshirt in ('%s %s' % (c, s) for c in colors for s in sizes):  # a list with all six T-shirt is never produced
    print(tshirt)

black S
black M
black L
white S
white M
white L


### Tuples Are Not Just Immutable Lists
#### Tuples as Records with no field names

In [25]:
# Example 2-7. Tuples used as records

lax_coordinates = (33.9425, -118.408056)
city, year, pop, chg, area = ('Tokyo', 2003, 32450, 0.66, 8014)
traveler_ids = [('USA', '31195855'), ('BRA', 'CE342567'), ('ESP', 'XDA205856')]
for passport in sorted(traveler_ids):
    print('%s/%s' % passport)

BRA/CE342567
ESP/XDA205856
USA/31195855


In [26]:
for country, _ in traveler_ids:  # _ : dummy variable
    print(country)

USA
BRA
ESP


#### Tuples as Immutable Lists

In [4]:
a = (10, 'alpha', [1, 2])  # Tuples with mutable items can be a source of bugs.
b = (10, 'alpha', [1, 2])
print(a == b, a is b)

print(id(b[-1]), id(b))
b[-1].append(99)
print(id(b[-1]), id(b))
print(a == b, a is b)
print(b)

True False
4375043840 4374849216
4375043840 4374849216
False False
(10, 'alpha', [1, 2, 99])


In [5]:
def fixed(o):
    try:
        hash(o)  # an object is only hashable if its value cannot ever change (immutable)
    except TypeError:
        return False
    return True

tf = (10, 'alpha', (1, 2))  # hashable can be used in the key value in the dict or value in the set
tm = (10, 'alpha', [1, 2])
print(fixed(tf), fixed(tm))

True False


#### Tuple Unpacking

In [27]:
latitude, longitude = lax_coordinates # tuple unpacking
print(latitude)
print(longitude)

33.9425
-118.408056


In [28]:
a = 3
b = 2
b, a = a, b  # swap without temporary variable by unpacking
a, b

(2, 3)

In [7]:
print(divmod(20, 8))
t = (20, 8)
t = [20, 8]
print(divmod(*t))  # unpacking
quotient, remainder = divmod(*t)
quotient, remainder

(2, 4)
(2, 4)


(2, 4)

In [4]:
import os
_, filename = os.path.split('/home/hjk/.ssh/idrsa.pub')
filename

'idrsa.pub'

#### Using * to grab excess Items

In [8]:
# Using * to grab excess items in Python3

a, b, *rest = range(5)
print((a, b, rest))
a, b, *rest = range(3)
print((a, b, rest))
a, b, *rest = range(2)
print((a, b, rest))

(0, 1, [2, 3, 4])
(0, 1, [2])
(0, 1, [])


In [32]:
a, *body, c, d = range(5)
print((a, body, c, d))
*head, b, c, d = range(5)
print((head, b, c, d))

(0, [1, 2], 3, 4)
([0, 1], 2, 3, 4)


#### Unpacking with * in Function Calls and Sequence Literals

In [9]:
def fun(a, b, c, d, *rest):
    return a, b, c, d, rest

print(fun(*[1, 2], 3, *range(4, 7)))
print()

tuple = *range(4), 4
list = [*range(4), 4]
set = {*range(4), 4, *(5, 6, 7)}

print(tuple)
print(list)
print(set)

(1, 2, 3, 4, (5, 6))

(0, 1, 2, 3, 4)
[0, 1, 2, 3, 4]
{0, 1, 2, 3, 4, 5, 6, 7}


#### Nested Tuple Unpacking

In [5]:
# Example 2-8. Unpacking nested tuples to access the longitude
metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
    ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
    ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
    ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
    ('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]

print('{:15} | {:^9} | {:^9}'.format('', 'lat.', 'long.'))
fmt = '{:15} | {:9.4f} | {:9.4f}'

for name, cc, pop, (latitude, longitude) in metro_areas:
    if longitude <= 0:
        print(fmt.format(name, latitude, longitude))

                |   lat.    |   long.  
Mexico City     |   19.4333 |  -99.1333
New York-Newark |   40.8086 |  -74.0204
Sao Paulo       |  -23.5478 |  -46.6358


### Pattern Matching with Sequences

In [8]:
# Example 2-9. Method from an imaginary Robot class

def handle_command(self, message):
    match message:
        case ['BEEPER', frequency, times]:
            self.beep(times, frequency)
        case ['NECK', angle]:
            self.rotate_neck(angle)
        case ['LED', ident, intensity]:
            self.leds[ident].set_brightness(ident, intensity)
        case ['LED', ident, red, green, blue]:
            self.leds[ident].set_color(ident, red, green, blue)
        case _:
            raise InvalidCommand(message)


In [11]:
# Example 2-10. Destructuring nested tuples—requires Python ≥ 3.10

metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),  # <1>
    ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
    ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
    ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
    ('São Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]

def main():
    print(f'{"":15} | {"latitude":>9} | {"longitude":>9}')
    for record in metro_areas:
        match record:  # <1>
            # case [name, _, _, (lat, lon)] if lon <= 0:  # <2>
            case (name, _, _, (lat, lon)) if lon <= 0:  # <2>  no diff with "( )" instead of "[ ]"
                print(f'{name:15} | {lat:9.4f} | {lon:9.4f}')

if __name__ == '__main__':
    main()

                |  latitude | longitude
Mexico City     |   19.4333 |  -99.1333
New York-Newark |   40.8086 |  -74.0204
São Paulo       |  -23.5478 |  -46.6358


### Slicing
#### Why Slices and Range Exclude the Last Item

In [13]:
l = [10, 20, 30, 40, 50, 60]
print(l[:2])
print(l[2:])
print(l[:3])
print(l[3:])

[10, 20]
[30, 40, 50, 60]
[10, 20, 30]
[40, 50, 60]


#### Slice Objects

In [14]:
s = 'bicycle'
print(s[::3])
print(s[::-1])
print(s[::-2])

bye
elcycib
eccb


In [19]:
# Example 2-11. Line items from a flat-file invoice
invoice = """
0.....6.................................40........52...55........
1909  Pimoroni PiBrella                     $17.50    3    $52.50
1489  6mm Tactile Switch x20                 $4.95    2     $9.90
1510  Panavise Jr. - PV-201                 $28.00    1    $28.00
1601  PiTFT Mini Kit 320x240                $34.95    1    $34.95
"""

SKU = slice(0, 6)
DESCRIPTION = slice(6, 40)
UNIT_PRICE = slice(40, 52)
QUANTITY = slice(52, 55)
ITEM_TOTAL = slice(55, None)
line_items = invoice.split('\n')[2:]

for item in line_items:
    print(item[UNIT_PRICE], item[DESCRIPTION])

    $17.50   Pimoroni PiBrella                 
     $4.95   6mm Tactile Switch x20            
    $28.00   Panavise Jr. - PV-201             
    $34.95   PiTFT Mini Kit 320x240            
 


#### Assigning to Slices

In [22]:
l = list(range(10))
print(l)

l[2:5] = [20, 30]  # 4 is deleted
print(l)

del l[5:7]
print(l)

l[3::2] = [11, 22]
print(l)

# l[2:5] = 100  # TypeError

l[2:5] = [100]  # 11, 5 are deleted
print(l)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 20, 30, 5, 6, 7, 8, 9]
[0, 1, 20, 30, 5, 8, 9]
[0, 1, 20, 11, 5, 22, 9]
[0, 1, 100, 22, 9]


### Using + and * with Sequences

In [1]:
l = [1, 2, 3]
print(l * 5)

print(5 * 'abcd')

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
abcdabcdabcdabcdabcd


In [3]:
# Beware of expressions like a * n when a is a sequence containing "mutable" items

a = [1, 2, 3]
b = [a]

d = b * 3
print(d)

a[1] = 4  # all 2 -> 4
print(d)  

b[0][0] = 10  # all 1 -> 10
print(d)  

[[1, 2, 3], [1, 2, 3], [1, 2, 3]]
[[1, 4, 3], [1, 4, 3], [1, 4, 3]]
[[10, 4, 3], [10, 4, 3], [10, 4, 3]]


#### Building Lists of Lists

In [8]:
# Example 2-14. A list with three lists of length 3 can represent a tic-tac-toe board

board = [['_'] * 3 for i in range(3)]
print(board)
board[1][2] = 'X'
print(board)

board = []
for i in range(3):
    row = ['_'] * 3  # new row in each iteration
    board.append(row)
board[2][0] = 'X'
print(board)
# print(id(board[2][0]), id(board[2][1]), id(board[2][2]))

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]
[['_', '_', '_'], ['_', '_', 'X'], ['_', '_', '_']]
[['_', '_', '_'], ['_', '_', '_'], ['X', '_', '_']]


In [5]:
# Example 2-15. A list with three references to the same list is useless

weird_board = [['_'] * 3] * 3  # The outer list is made of three references to the same inner list.
print(weird_board)
weird_board[1][2] = 'O'
print(weird_board)

row = ['_'] * 3
board = []
for i in range(3):
    board.append(row)  # the same row in each iteration
board[2][0] = 'X'
print(board)

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]
[['_', '_', 'O'], ['_', '_', 'O'], ['_', '_', 'O']]
2094280190128 2094280190128
[['X', '_', '_'], ['X', '_', '_'], ['X', '_', '_']]


### Augmented Assignment with Sequences

In [33]:
l = [1, 2, 3]
print(id(l))

l *= 2
print(l)
print(id(l))  # same id, items are appended
print()

t = (1, 2, 3)
print(id(t))

t *= 2
print(id(t))  # id(t) is different from the above

2273112586432
[1, 2, 3, 1, 2, 3]
2273112586432

2273112846208
2273112818688


### A += Assignment Puzzler

In [10]:
# Example 2-14. A riddle
t = (1, 2, [30, 40])
# t[2] += [50, 60]  # error since it tries to assign new list
t[2].extend([50, 60])  # same operation without the exception

In [42]:
t  # [50, 60] is added to t[2] even with the TypeError exception

(1, 2, [30, 40, 50, 60])

### list.sort and the sorted Built-In Function

In [44]:
fruits = ['grape', 'raspberry', 'apple', 'banana']
print(fruits)
print(sorted(fruits))
print(fruits)  # not changed
print(sorted(fruits, reverse=True))
print(sorted(fruits, key=len))
print(sorted(fruits, key=len, reverse=True))
print(fruits)  # not changed
fruits.sort()
print(fruits)  # changed

['grape', 'raspberry', 'apple', 'banana']
['apple', 'banana', 'grape', 'raspberry']
['grape', 'raspberry', 'apple', 'banana']
['raspberry', 'grape', 'banana', 'apple']
['grape', 'apple', 'banana', 'raspberry']
['raspberry', 'banana', 'grape', 'apple']
['grape', 'raspberry', 'apple', 'banana']
['apple', 'banana', 'grape', 'raspberry']


### Managin Ordered Sequences with bisect
#### Searching with bisect

In [73]:
# Example 2-17. bisect finds insertion points for items in a sorted sequence

import bisect
import sys

HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]
NEEDLES = [0, 1, 2, 5, 8, 10, 22, 23, 29, 30, 31]

ROW_FMT = '{0:2d} @ {1:2d}    {2}{0:<2d}'

def demo(bisect_fn):
    for needle in reversed(NEEDLES):
        position = bisect_fn(HAYSTACK, needle)  # <1>
        offset = position * '  |'  # <2>
        print(ROW_FMT.format(needle, position, offset))  # <3>

if __name__ == '__main__':

    if sys.argv[-1] == 'left':    # <4>
        bisect_fn = bisect.bisect_left
    else:
        bisect_fn = bisect.bisect

    print('DEMO:', bisect_fn.__name__)  # <5>
    print('haystack ->', ' '.join('%2d' % n for n in HAYSTACK))
    demo(bisect_fn)

DEMO: bisect_right
haystack ->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |31
30 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |30
29 @ 13      |  |  |  |  |  |  |  |  |  |  |  |  |29
23 @ 11      |  |  |  |  |  |  |  |  |  |  |23
22 @  9      |  |  |  |  |  |  |  |  |22
10 @  5      |  |  |  |  |10
 8 @  5      |  |  |  |  |8 
 5 @  3      |  |  |5 
 2 @  1      |2 
 1 @  1      |1 
 0 @  0    0 


In [74]:
# Given a test score, grade returns the corresponding letter grade
def grade(score, breakpoints=[60, 70, 80, 90], grades='FDCBA'):
    i = bisect.bisect(breakpoints, score)
    return grades[i]

[grade(score) for score in [33, 99, 77, 70, 89, 90, 100]]

['F', 'A', 'C', 'C', 'B', 'A', 'A']

#### Inserting with bisect.insort

In [75]:
#  Insort keeps a sorted sequence always sorted

import bisect
import random

SIZE = 7

random.seed(1729)

my_list = []
for i in range(SIZE):
    new_item = random.randrange(SIZE*2)
    bisect.insort(my_list, new_item)
    print('%2d ->' % new_item, my_list)

10 -> [10]
 0 -> [0, 10]
 6 -> [0, 6, 10]
 8 -> [0, 6, 8, 10]
 7 -> [0, 6, 7, 8, 10]
 2 -> [0, 2, 6, 7, 8, 10]
10 -> [0, 2, 6, 7, 8, 10, 10]


### When a List Is Not the Answer
#### Arrays

In [1]:
# Example 2-19. Creating, saving, and loading a large array of floats

from array import array
from random import random

floats = array('d', (random() for i in range(10**7)))
print(floats[-1])

fp = open('floats.bin', 'wb')
floats.tofile(fp)
fp.close()

floats2 = array('d')
fp = open('floats.bin', 'rb')
floats2.fromfile(fp, 10**7)
fp.close()
print(floats2[-1])
print(floats2 == floats)

0.9824560834335367
0.9824560834335367
True
array('d', [0.6378163349112206, 0.7533430177985135, 0.5106393937430881, 0.6962047547325638, 0.8405665202291518, 0.8153172958729039, 0.966724031581811, 0.2435310007823125, 0.2336750638102738, 0.6908547129621364, 0.4994314235281089, 0.9195040025773801, 0.7275941173249403, 0.6518308766097499, 0.06647998072268169, 0.9394147786398322, 0.5502924000969193, 0.8550257137334796, 0.9460007781304922, 0.32349554397190816, 0.12916565483467168, 0.7447104408643618, 0.9218652339180727, 0.6412653445699467, 0.8532519968360633, 0.48707118995081977, 0.36983461017648256, 0.17721978539710093, 0.6044593661887099, 0.7386615321144616, 0.09652748963042668, 0.4798672764196027, 0.8051676313191761, 0.0013453785583888989, 0.89490240981802, 0.32766458857233205, 0.43681729073565234, 0.24949500752099363, 0.41589030739163724, 0.22093437585345854, 0.8589947934341857, 0.3826215596650788, 0.1660245207977531, 0.23503436942492928, 0.619821345509197, 0.8653200897349237, 0.51284178629

In [4]:
t = array('B', [1, 1, 2, 3])  # 'B' unsigned char
print(t)

# An integer number (object) has unlimited bound in Python
a = 100000000000000000000000000000 + 20000000000000000000000000000000000
print(a)

array('B', [1, 1, 2, 3])
20000100000000000000000000000000000


#### Memory Views

In [14]:
# Example 2-20. Handling 6 bytes of memory as 1×6, 2×3, and 3×2 views

from array import array
octets = array('B', range(6))
m1 = memoryview(octets)
print(m1.tolist())

m2 = m1.cast('B', [2, 3])
print(m2.tolist())

m3 = m1.cast('B', [3, 2])
print(m3.tolist())

m2[1, 1] = 22
m3[1, 1] = 33
print(octets)

[0, 1, 2, 3, 4, 5]
[[0, 1, 2], [3, 4, 5]]
[[0, 1], [2, 3], [4, 5]]
array('B', [0, 1, 2, 33, 22, 5])


In [78]:
# Example 2-21. Changing the value of an array item by poking one of its bytes
import array

numbers = array.array('h', [-2, -1, 0, 1, 2])  # 16 bit (half)
memv = memoryview(numbers)
print(len(memv))
print(memv[0])

memv_oct = memv.cast('B')  # 'B' unsigned char
print(memv_oct.tolist())
memv_oct[5] = 4
print(numbers)

5
-2
[254, 255, 255, 255, 0, 0, 1, 0, 2, 0]
array('h', [-2, -1, 1024, 1, 2])


#### NumPy and SciPy

In [3]:
# Example 2-22. Basic operations with rows and columns in a numpy.ndarray

import numpy

a = numpy.arange(12)
print(a)
print(type(a))
print(a.shape)

a.shape = 3, 4
print(a)
print(a[2])
print(a[2, 1])
print(a[:, 1])
print(a.transpose())

[ 0  1  2  3  4  5  6  7  8  9 10 11]
<class 'numpy.ndarray'>
(12,)
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 8  9 10 11]
9
[1 5 9]
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


#### Deques and Other Queues

In [4]:
# Example 2-23. Working with a deque
from collections import deque

dq = deque(range(10), maxlen=10)
print(dq)

dq.rotate(3)
print(dq)

dq.rotate(-4)
print(dq)

dq.appendleft(-1)
print(dq)

dq.extend([11, 22, 33])
print(dq)

dq.extendleft([10, 20, 30, 40])
print(dq)

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)
deque([7, 8, 9, 0, 1, 2, 3, 4, 5, 6], maxlen=10)
deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], maxlen=10)
deque([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)
deque([3, 4, 5, 6, 7, 8, 9, 11, 22, 33], maxlen=10)
deque([40, 30, 20, 10, 3, 4, 5, 6, 7, 8], maxlen=10)
