# Chapter 2: An Array of Sequences
These are the main topics this chapter will cover:
• List comprehensions and the basics of generator expressions
• Using tuples as records versus using tuples as immutable lists
• Sequence unpacking and sequence patterns
• Reading from slices and writing to slices
• Specialized sequence types, like arrays and queues

There are a bunch of types in the standard library:
Containers (list, typle, deque) hold references to the objects they contain (which are of any type)
Flat sequences (str, bytes, array.array (?)) store the values of its contents in its own memory space and not as distinct objects. So, flat sequences are more compact but are limited to holding primitives

You can also group sequence types by mutability:
Mutable sequences (list, array, deque)
Immutable sequences (tuple, str, bytes)

## List Comprehensions and Generator Expressions

In [None]:
# list comprehensions
symbols = 'ABCDEF'
codes = [ord(symbol) for symbol in symbols] # compare to a normal for loop and appending to an empty list
codes

# syntax tip: line breaks are ignored inside of [], {}, and ().count

[65, 66, 67, 68, 69, 70]

In [2]:
# local scope
# list comprehensions, generators, and set and dict comprehensions, have a local scope to hold variables assigned in the 'for' clause.
# but, if you assign variables with the walrus operator :=, you can access those variables afterwards.
codes = [last := ord(symbol) for symbol in symbols]
last

70

In [4]:
# list comprehensions vs. map and filter

ascii_comp = [ord(s) for s in symbols if ord(s) > 67] # SIX SEVENNN
ascii_map = list(filter(lambda c: c > 67, map(ord, symbols)))

print(ascii_comp, ascii_map)

[68, 69, 70] [68, 69, 70]


In [None]:
# using list comprehensions for cartesian products
colors = ['red', 'white', 'blue']
sizes = ['S', 'M', 'L']

tshirts = [(color, size) for color in colors for size in sizes]
print(tshirts)

tshirts_2 = [(color, size) for size in sizes for color in colors]
print(tshirts_2)

# note the difference in ordering based on how we write the for loops

[('red', 'S'), ('red', 'M'), ('red', 'L'), ('white', 'S'), ('white', 'M'), ('white', 'L'), ('blue', 'S'), ('blue', 'M'), ('blue', 'L')]
[('red', 'S'), ('white', 'S'), ('blue', 'S'), ('red', 'M'), ('white', 'M'), ('blue', 'M'), ('red', 'L'), ('white', 'L'), ('blue', 'L')]


In [None]:
# if we want to generate data for other sequence types, we use a generator expression (genexp)
tuple(ord(symbol) for symbol in symbols)

# more on generators in chapter 17

(65, 66, 67, 68, 69, 70)

## Tuples

In [11]:
for tshirt in (f'{c} {s}' for c in colors for s in sizes):
    print(tshirt)

red S
red M
red L
white S
white M
white L
blue S
blue M
blue L


In [13]:
# tuples are not just immutable lists. they also can be used as records (i.e. quantity and order matters)
traveler_ids = [('USA', '31195855'), ('BRA', 'CE342567'), ('ESP', 'XDA205856')]

for country, _ in traveler_ids: # _ as a dummy variable is a convention. do not confuse with _ in a match statemenet
    print(f'{country}')

USA
BRA
ESP


In [14]:
# if you use them as immutable lists, it makes it easier to remember the length of the tuple and allows for memory optimization
# however, note that only the references in a tuple cannot be deleted or changed
# if the reference points to a mutable object, then if the object changes then the value of the tuple changes

a = (10, 'alpaca', [1, 2])
b = (10, 'alpaca', [1, 2])

a == b

True

In [15]:
b[-1].append(1000)
a == b

False

In [17]:
a, b

((10, 'alpaca', [1, 2]), (10, 'alpaca', [1, 2, 1000]))

In [18]:
# if you want to determine if a tuple has a fixed value
# then you can use hash() to see if the value is fixed
def fixed(obj):
    try:
        hash(obj)
    except TypeError:
        return False
    else:
        return True
    
a = (10, 'alpaca', (1, 2))
fixed(a), fixed(b)

(True, False)

In [None]:
# these are the only methods which both lists and tuples allow:

t = ('a', 3, {'bla': 1})
t + ('new',) # concatenation | note: trailing comma required to differentiate a single-element tuple and a parenthesized expr
3 in t # __contains__
t.count('a') # count
# __getitem__, index(), __iter__, __len__, __mul__ (repeated concatenation), __rmul__ (reverse repeated concatenation) (?)

True

## Unpacking

In [None]:
coords = (33.9425, -118.4081)
lat, lon = coords
lat

33.9425

In [24]:
# swapping is an example of unpacking
a, b = 1, 2
a, b = b, a
a, b

(2, 1)

In [25]:
# prefixing with * is unpacking
divmod(20, 8)


(2, 4)

In [26]:
t = (20, 8)
divmod(*t)

(2, 4)

In [27]:
# excess items
a, b, *rest = range(5)
a, b, rest

(0, 1, [2, 3, 4])

In [28]:
a, *rest, b = range(4)
a, b, rest

(0, 3, [1, 2])

In [None]:
# nested unpacking
metro_areas = [
 ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
 ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
 ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
 ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
 ('São Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]

def print_city():
    print(f'{"":15} | {"latitude":>9} | {"longitude":>9}') # idk how print formatting works
    for name, _, _, (lat, lon) in metro_areas:
        if lon <= 0:
            print(f'{name:15} | {lat:9.4f} | {lon:9.4f}')
    
print_city()

                |  latitude | longitude
Mexico City     |   19.4333 |  -99.1333
New York-Newark |   40.8086 |  -74.0204
São Paulo       |  -23.5478 |  -46.6358


In [None]:
# pattern matching with match and case
# PEP 634
phone_1 = '19148271002'
phone_2 = '34919932188'
phone_3 = '55555555555'

def check_phone(phone):
    match tuple(phone):
        case ['1', *rest]:
            print(''.join(rest), ' America')
        case ['3' | '4', *rest]:
            print(''.join(rest), ' Europe')
        case _: # note that here, "_" matches anything, but it is never bound to whatever the actual value is. i.e. there is no _ variable that you can use later
            print(phone, 'Other')
            print(_) # this actually prints the passport id for ESP from earlier LOL what an odd side effect
        
for p in (phone_1, phone_2, phone_3):
    check_phone(p)

9148271002  America
4919932188  Europe
55555555555 Other
XDA205856


In [None]:
# there's a whole section about a Lisp interpreter in python using match case statements, but I don't think I'll be using that a lot
# p. 43-46

## Slicing

In [None]:
# to recap, s[a:b:c] gives a start, stop, and stride (i.e. skip). stride can be negative to return items in reverse.
# the stop is not inclusive.
s = 'bicycle'
s[::3], s[::-1]

('bye', 'elcycib')

In [None]:
# one interesting syntax feature is the Ellipsis ... which is mainly used by NumPy

In [None]:
# you can assign to slices
l = list(range(10))

l[2:5] = [100] # modifying a slice
l[3::3] = [11, 22] # modifying a sequence (both must be same size)
l

[0, 1, 100, 11, 6, 7, 22, 9]

In [42]:
# sequences with + and *
[1, 2, 3] * 5

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

In [None]:
# nested lists with a list comprehension, the wrong way and the right way (Be careful with references!)

bad_board = [['_'] * 3] * 3
bad_board[1][0] = 'a'
bad_board

# this is equivalent to:
row = ['_'] * 3
board = []
for i in range(3):
    board.append(row)

# essentially, the same row is being referenced three times, so a modification to one of them modifies all of them

[['a', '_', '_'], ['a', '_', '_'], ['a', '_', '_']]

In [None]:
# compare to:
good_board = [['_'] * 3 for i in range(3)]
good_board[1][0] = 'oo'
good_board

# equivalent to
board = []
for i in range(3):
    row = ['_'] * 3
    board.append(row)

[['_', '_', '_'], ['oo', '_', '_'], ['_', '_', '_']]

In [48]:
# riddle
t = (1, 2, [30, 40])
t[2] += [50, 60]

# i think that since you can't assign to a tuple this raises an error.
# but actually...
t

TypeError: 'tuple' object does not support item assignment

In [None]:
print(t) # an error is raised AND the list is modified! lol

(1, 2, [30, 40, 50, 60])


In [None]:
# .sort() vs sorted

items = [5, 3, 1, 4, 2]

items.sort() # .sort() sorts in place.

items_2 = [5, 3, 1, 4, 2]
sorted(items_2) # sorted() returns a new sorted list and takes any iterable, not just lists

items, items_2

([1, 2, 3, 4, 5], [5, 3, 1, 4, 2])

## When is a list not the best option?

In [None]:
# sometimes, a list is not the best structure to use.

# for example, an array.array saves a lot of memory when handling lots of floating point values
from array import array
from random import random
floats = array('d', (random() for _ in range(10**7)))
floats[-1]

# there's also memoryview, which allows you to access the memory of other binary objects without copying
octets = array('B', range(6))
m1 = memoryview(octets)
print(m1.tolist())

m2 = m1.cast('B', [2, 3])
print(m2.tolist())

m3 = m1.cast('B', [3, 2])
print(m3.tolist())
# both m2 and m3 are also memoryviews, allowing you to work on the same memory without copying over

m2[1, 1] = 22
m3[1, 1] = 33

print(octets) # as we can see, the original array is modified through the memoryview

# look into numpy for ml and data purposes

[0, 1, 2, 3, 4, 5]
[[0, 1, 2], [3, 4, 5]]
[[0, 1], [2, 3], [4, 5]]
array('B', [0, 1, 2, 33, 22, 5])


In [None]:
# a deque is useful for additions and removals from both ends
from collections import deque

dq = deque(range(10), maxlen=10)
dq.rotate(3)
print(dq)
dq.extendleft([10, 20, 30, 40])
print(dq)

# there are also single-ended (thread-safe) queues (queue library),
# multiprocessing queues
# asyncio queues for async programming
# heapq for priority queues

deque([7, 8, 9, 0, 1, 2, 3, 4, 5, 6], maxlen=10)
deque([40, 30, 20, 10, 7, 8, 9, 0, 1, 2], maxlen=10)


In [60]:
# and a set is great for membership ('in')
s = set([i for i in range(10)])

if 5 in s:
    print("yay 5")

yay 5
