<a href="https://colab.research.google.com/github/present42/PyTorchPractice/blob/main/Fluent_Python_ch2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## First way of grouping sequence types
* Container Sequences
 - e.g. `list`, `tuple`, `collections.deque`
 - can hold heterogenous items

* Flat Sequences
 - e.g. `str`, `bytes`, `array.array`
 - hold items of one simple type

## Second way of grouping sequence types
* Mutable Sequences
 - e.g. `list`, `bytearray`, `array.array`, `collections.deque`
* Immutable Sequences
 - e.g. `tuple`, `str`, `bytes`

In [None]:
from collections import abc

built-in concrete sequence types are *virtual subclasses* of `MutableSequence`, `Sequence`

In [None]:
issubclass(tuple, abc.Sequence)
# issubclass(list, abc.Sequence)

True

In [None]:
x = 'ABC'
codes = [ord(x) for x in x]

In [None]:
codes = [last := ord(c) for c in x]

In [None]:
last

67

In [None]:
codes

[65, 66, 67]

In [None]:
symbols = "$¢£¤¥¦§"
beyond_ascii = [ord(s) for s in symbols if ord(s) > 127]
beyond_ascii

[162, 163, 164, 165, 166, 167]

In [None]:
beyond_ascii = list(filter(lambda c: c > 127, map(ord, symbols)))

In [None]:
beyond_ascii

[162, 163, 164, 165, 166, 167]

In [None]:
%%timeit
beyond_ascii = [ord(s) for s in symbols if ord(s) > 127]

1.14 µs ± 105 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [None]:
%%timeit
beyond_ascii = list(filter(lambda c: c > 127, map(ord, symbols)))

1.37 µs ± 312 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [None]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
tshirts = [(color, size) for color in colors for size in sizes]

In [None]:
tshirts

[('black', 'S'),
 ('black', 'M'),
 ('black', 'L'),
 ('white', 'S'),
 ('white', 'M'),
 ('white', 'L')]

In [None]:
tshirts = [(color, size) for size in sizes
                          for color in colors]

In [None]:
tshirts

[('black', 'S'),
 ('white', 'S'),
 ('black', 'M'),
 ('white', 'M'),
 ('black', 'L'),
 ('white', 'L')]

## Generator Expressions
 - It saves memory because it yields items one by one using the iterator protocol instead of building a whole list just to feed another constructor

In [None]:
symbols = "$¢£¤¥¦§"
tuple(ord(symbol) for symbol in symbols)

import array
array.array('I', (ord(symbol) for symbol in symbols))

array('I', [36, 162, 163, 164, 165, 166, 167])

In [None]:
for tshirt in (f'{c} {s}' for c in colors for s in sizes):
  print(tshirt)

black S
black M
black L
white S
white M
white L


In [None]:
type(f'{c} {s}' for c in colors for s in sizes)

generator

## Tuples Are Not Just Immutable Lists
 - also used as records with no field names

In [None]:
lax_coordinates = (33.9425, -118.408056)
city, year, pop, chg, area = ('Tokyo', 2003, 32_450, 0.66, 8014)
traveler_ids = [('USA', '3192319'), ('BRA', 'CE230124'), ('ESP', 'XDA12031')]
for passport in sorted(traveler_ids):
  print('%s/%s' % passport)

BRA/CE230124
ESP/XDA12031
USA/3192319


In [None]:
for country, _ in traveler_ids: # unpacking
  print(country)

USA
BRA
ESP


### Tuples as Immutable Lists
 - Clarity
 - Performance: uses less memory than a list of the same length, allows Python to do some optimization

In [None]:
a = (10, 'alpha', [1, 2])
b = (10, 'alpha', [1, 2])

In [None]:
a == b

True

### Caveat
- Tuples with mutable items can be a source of bugs!!!

In [None]:
b[-1].append(99)

In [None]:
a == b

False

In [None]:
b

(10, 'alpha', [1, 2, 99])

In [None]:
def fixed(o):
  try:
    hash(o)
  except TypeError:
    return False
  return True

#### `hash()`
 - An object is only hashable if its value cannot ever change

In [None]:
tf = (10, 'alpha', (1, 2))
tm = (10, 'alpha', [1, 2])

In [None]:
fixed(tf)

True

In [None]:
fixed(tm)

False

In [None]:
lax_coordinates = (33.9425, -118.408056)

In [None]:
latitude, longitude = lax_coordinates

In [None]:
a = 3
b = 5
b, a = a, b
print(a, b)

5 3


In [None]:
divmod(20, 8)

(2, 4)

In [None]:
t = (20, 8)

In [None]:
divmod(*t)

(2, 4)

In [None]:
quotient, remainder = divmod(*t)

In [None]:
quotient, remainder

(2, 4)

In [None]:
import os

In [None]:
_, filename = os.path.split('/home/george/.ssh/id_rsa.pub')
filename

'id_rsa.pub'

In [None]:
a, b, *rest = range(5)

In [None]:
a, b, *rest = range(3)
a, b, rest

(0, 1, [2])

In [None]:
a, b, *c = range(2)

In [None]:
a, *b, c = range(2)

In [None]:
*head, b, c, d = range(5)

In [None]:
def fun(a, b, c, d, *rest):
  return a, b, c, d, rest

In [None]:
fun(*[1, 2], 3, *range(4, 7)) # expect (1, 2, 3, 4, (5, 6))

(1, 2, 3, 4, (5, 6))

In [None]:
*range(4), 4

(0, 1, 2, 3, 4)

In [None]:
[*range(4), 4]

[0, 1, 2, 3, 4]

In [None]:
{*range(4), 4, *(5, 6, 7)}

{0, 1, 2, 3, 4, 5, 6, 7}

### Nested Unpacking

In [None]:
metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.4214)),
    ('Delhi NCR', 'IN', 21.942, (28.613412, 77.2130)),
    ('Mexico City', 'MX', 20.942, (19.4342, -99.13333)),
    ('New York-Newark', 'US', 20.104, (40.805325, -74.020386)),
    ('Sao Paulo', 'BR', 19.649, (-23.529, -46.1421)),
]

print(f'{"":15} | {"latitude":>9} | {"longitude":>9}')
for name, _, _, (lat, lon) in metro_areas:
  if lon <= 0:
    print(f'{name:15} | {lat:>9} | {lon:>9}')

                |  latitude | longitude
Mexico City     |   19.4342 | -99.13333
New York-Newark | 40.805325 | -74.020386
Sao Paulo       |   -23.529 |  -46.1421


#### Caveat
- single element tuple must be written with a trailing comma

In [None]:
(record, ) = (3, )

#### Note
- Sequence pattern can match instances of most actual or virtual subclasses of `collections.abc.Sequence`, with the exception of `str`, `bytes`, and `bytearray`
- `str(name)`, `float(name)` look like constructor calls But! in the context of a pattern,that syntax performs a runtime type check

In [None]:
metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.4214)),
    ('Delhi NCR', 'IN', 21.942, (28.613412, 77.2130)),
    ('Mexico City', 'MX', 20.942, (19.4342, -99.13333)),
    ('New York-Newark', 'US', 20.104, (40.805325, -74.020386)),
    ('Sao Paulo', 'BR', 19.649, (-23.529, -46.1421)),
]

print(f'{"":15} | {"latitude":>9} | {"longitude":>9}')
for record in metro_areas:
  match record:
    # optinal guard starting with "if" is evaluated only if the pattern matches
    case [str(name), *extra, (float(lat), float(lon)) as coord] if lon <= 0:
      print(f'{name:15} | {lat:>9} | {lon:>9}')

                |  latitude | longitude
Mexico City     |   19.4342 | -99.13333
New York-Newark | 40.805325 | -74.020386
Sao Paulo       |   -23.529 |  -46.1421


#### Note
- If you wanna treat an obj of `str`, `bytes`, `bytearray` as a sequence subject, convert it in the `match` clause

In [None]:
phone = '42345'

match tuple(phone):
  case ['1', *rest]:
    print("North America and Carribean")
  case ['2', *rest]:
    print("Africa and some territories")
  case ['3' | '4', *rest]:
    print("Europe")
  case _:
    print("IDK")

Europe


#### Lisp

In [None]:
################ Lispy: Scheme Interpreter in Python 3.3+

## (c) Peter Norvig, 2010-18; See http://norvig.com/lispy.html

################ Imports and Types

import math
import operator as op
from collections import ChainMap as Environment

Symbol = str          # A Lisp Symbol is implemented as a Python str
List   = list         # A Lisp List   is implemented as a Python list
Number = (int, float) # A Lisp Number is implemented as a Python int or float

class Procedure(object):
    "A user-defined Scheme procedure."
    def __init__(self, parms, body, env):
        self.parms, self.body, self.env = parms, body, env
    def __call__(self, *args):
        env =  Environment(dict(zip(self.parms, args)), self.env)
        return eval(self.body, env)

################ Global Environment

def standard_env():
    "An environment with some Scheme standard procedures."
    env = {}
    env.update(vars(math)) # sin, cos, sqrt, pi, ...
    env.update({
        '+':op.add, '-':op.sub, '*':op.mul, '/':op.truediv,
        '>':op.gt, '<':op.lt, '>=':op.ge, '<=':op.le, '=':op.eq,
        'abs':     abs,
        'append':  op.add,
        'apply':   lambda proc, args: proc(*args),
        'begin':   lambda *x: x[-1],
        'car':     lambda x: x[0],
        'cdr':     lambda x: x[1:],
        'cons':    lambda x,y: [x] + y,
        'eq?':     op.is_,
        'equal?':  op.eq,
        'length':  len,
        'list':    lambda *x: list(x),
        'list?':   lambda x: isinstance(x,list),
        'map':     lambda *args: list(map(*args)),
        'max':     max,
        'min':     min,
        'not':     op.not_,
        'null?':   lambda x: x == [],
        'number?': lambda x: isinstance(x, Number),
        'procedure?': callable,
        'round':   round,
        'symbol?': lambda x: isinstance(x, Symbol),
    })
    return env

global_env = standard_env()

################ Parsing: parse, tokenize, and read_from_tokens

def parse(program):
    "Read a Scheme expression from a string."
    return read_from_tokens(tokenize(program))

def tokenize(s):
    "Convert a string into a list of tokens."
    return s.replace('(',' ( ').replace(')',' ) ').split()

def read_from_tokens(tokens):
    "Read an expression from a sequence of tokens."
    if len(tokens) == 0:
        raise SyntaxError('unexpected EOF while reading')
    token = tokens.pop(0)
    if '(' == token:
        L = []
        while tokens[0] != ')':
            L.append(read_from_tokens(tokens))
        tokens.pop(0) # pop off ')'
        return L
    elif ')' == token:
        raise SyntaxError('unexpected )')
    else:
        return atom(token)

def atom(token):
    "Numbers become numbers; every other token is a symbol."
    try: return int(token)
    except ValueError:
        try: return float(token)
        except ValueError:
            return Symbol(token)

################ Interaction: A REPL

def repl(prompt='lis.py> '):
    "A prompt-read-eval-print loop."
    while True:
        val = eval(parse(input(prompt)))
        if val is not None:
            print(lispstr(val))

def lispstr(exp):
    "Convert a Python object back into a Lisp-readable string."
    if isinstance(exp, List):
        return '(' + ' '.join(map(lispstr, exp)) + ')'
    else:
        return str(exp)

################ eval

def eval(x, env=global_env):
    "Evaluate an expression in an environment."
    if isinstance(x, Symbol):      # variable reference
        return env[x]
    elif not isinstance(x, List):  # constant literal
        return x
    elif x[0] == 'quote':          # (quote exp)
        (_, exp) = x
        return exp
    elif x[0] == 'if':             # (if test conseq alt)
        (_, test, conseq, alt) = x
        exp = (conseq if eval(test, env) else alt)
        return eval(exp, env)
    elif x[0] == 'define':         # (define var exp)
        (_, var, exp) = x
        env[var] = eval(exp, env)
    elif x[0] == 'lambda':         # (lambda (var...) body)
        (_, parms, body) = x
        return Procedure(parms, body, env)
    else:                          # (proc arg...)
        proc = eval(x[0], env)
        args = [eval(exp, env) for exp in x[1:]]
        return proc(*args)

In [None]:
eval(parse('(gcd 18 45)'))

9

In [None]:
parse('''
(define double
  (lambda (n)
     (* n 2)))
''')

['define', 'double', ['lambda', ['n'], ['*', 'n', 2]]]

### Slicing

In [None]:
l = [10, 20, 30, 40, 50, 60]
print(l[:2])
print(l[2:])
print(l[:3])
print(l[3:])

[10, 20]
[30, 40, 50, 60]
[10, 20, 30]
[40, 50, 60]


In [None]:
s = 'bicycle'
s[::3]
s[::-1]
s[::-2]

'eccb'

notation `a:b:c` is only valid within `[]` when used as the indexing or subscript operator, and it produces a slice object: `slice(a, b, c)`

Python calls `seq.__getitem__(slice(start, end, step))`

In [None]:
invoice = """
0.....6.................................40........52...55........
1909  Pimoroni PiBrella                     $17.50    3    $52.50
1489  6mm Tactile Switch x20                 $4.95    2     $9.90
1510  Panavise Jr. - PV-201                 $28.00    1    $28.00
1601  PiTFT Mini Kit 320x240                $34.95    1    $34.95
"""

SKU = slice(0, 6)
DESCRIPTION = slice(6, 40)
UNIT_PRICE = slice(40, 52)
QUANTITY = slice(52, 55)
ITEM_TOTAL = slice(55, None)
line_items = invoice.split('\n')[2:]

In [None]:
for item in line_items:
  print(item[UNIT_PRICE], item[DESCRIPTION])

    $17.50   Pimoroni PiBrella                 
     $4.95   6mm Tactile Switch x20            
    $28.00   Panavise Jr. - PV-201             
    $34.95   PiTFT Mini Kit 320x240            
 


#### Multidimensional Slicing and Ellipsis

In [None]:
import numpy as np
hi = np.random.rand(3, 4, 2)

In [None]:
hi[2, ..., 1] == hi[2, :, 1]

array([ True,  True,  True,  True])

In [None]:
l = list(range(10))
l

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [None]:
l[2:5] = [20, 30]

In [None]:
l

[0, 1, 20, 30, 5, 6, 7, 8, 9]

In [None]:
del l[5:7]

In [None]:
l

[0, 1, 20, 30, 5, 8, 9]

In [None]:
l[3::2]

[30, 8]

In [None]:
l[2:5] = 100

TypeError: can only assign an iterable

#### Using + and * with Sequences

In [1]:
l = [1, 2, 3]
l * 5

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

In [2]:
5 * 'abcd'

'abcdabcdabcdabcdabcd'

Warning! `my_list` in the below code results in a list with three references to the same inner list

In [3]:
my_list = [[]] * 3

In [4]:
my_list[0].append(1)

In [5]:
my_list

[[1], [1], [1]]

In [6]:
board = [['_'] * 3 for i in range(3)] # create a list of three lists of three items each

In [7]:
board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [8]:
board[1][2] = 'X'

In [9]:
board

[['_', '_', '_'], ['_', '_', 'X'], ['_', '_', '_']]

In [14]:
## internal of the above code
board = []
for i in range(3):
  row = ['_'] * 3
  board.append(row) # the same row is appended three times to `board`

In [15]:
board[1][2] = 'X'
board

[['_', '_', '_'], ['_', '_', 'X'], ['_', '_', '_']]

Warning! Below code is not working as intended

In [10]:
weird_board = [['_'] * 3] * 3 # outer list is made of three references to the same list
print(weird_board)
weird_board[1][2] = 'X'
weird_board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]


[['_', '_', 'X'], ['_', '_', 'X'], ['_', '_', 'X']]

In [11]:
## internal of the above code
row = ['_'] * 3
board = []
for i in range(3):
  board.append(row) # the same row is appended three times to `board`

In [12]:
board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [13]:
board[1][2] = 'O'
board

[['_', '_', 'O'], ['_', '_', 'O'], ['_', '_', 'O']]

##### Note
How `a += b` is executed?
 1. If `__iadd__` is available, that will be called (`a` will be changed in place)
 2. If not, new object `a + b` will be created and `a` will be bounded to that new object.

In [22]:
l = [1, 2, 3]
print(l)
id(l)

[1, 2, 3]


138574041463872

In [23]:
l *= 2
print(l)
id(l)

[1, 2, 3, 1, 2, 3]


138574041463872

In [30]:
t = (1, 2, 3)
id(t)

138574043252928

In [31]:
t *= 2
id(t)

138574280535744

In [32]:
t = (1, 2, [30, 40])
t[2] += [50, 60]

TypeError: 'tuple' object does not support item assignment

In [33]:
t

(1, 2, [30, 40, 50, 60])

In [35]:
import dis
dis.dis('s[a] += b')

  1           0 LOAD_NAME                0 (s)
              2 LOAD_NAME                1 (a)
              4 DUP_TOP_TWO
              6 BINARY_SUBSCR
              8 LOAD_NAME                2 (b)
             10 INPLACE_ADD
             12 ROT_THREE
             14 STORE_SUBSCR
             16 LOAD_CONST               0 (None)
             18 RETURN_VALUE


3 Lessons
 - Avoid putting mutable items in tuples
 - Augmented assignment is not an atomic operation
 - inspecting python bytecoe is not too difficult, and can be helpful to see what is going on under the hood

### `list.sort` vs the `sorted` built-in

In [36]:
fruits = ['grape', 'raspberry', 'apple', 'banana']
sorted(fruits)

['apple', 'banana', 'grape', 'raspberry']

In [37]:
fruits

['grape', 'raspberry', 'apple', 'banana']

In [38]:
sorted(fruits, reverse=True)

['raspberry', 'grape', 'banana', 'apple']

In [39]:
sorted(fruits, key=len)

['grape', 'apple', 'banana', 'raspberry']

In [40]:
sorted(fruits, key=len, reverse=True)

['raspberry', 'banana', 'grape', 'apple']

In [41]:
fruits

['grape', 'raspberry', 'apple', 'banana']

In [42]:
fruits.sort()

In [43]:
fruits

['apple', 'banana', 'grape', 'raspberry']

#### managing ordered sequences with bisect

In [None]:
## bisect_demo.py
##
# import bisect
# import sys

# HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]
# NEEDLES = [0, 1, 2, 5, 8, 10, 22, 23, 29, 30, 31]

# ROW_FMT = '{0:2d} @ {1:2d}    {2}{0:<2d}'

# def demo(bisect_fn):
#   for needle in reversed(NEEDLES):
#     position = bisect_fn(HAYSTACK, needle)
#     offset = position * '  |'
#     print(ROW_FMT.format(needle, position, offset))

# if __name__ == '__main__':
#   if sys.argv[-1] == 'left':
#     bisect_fn = bisect.bisect_left
#   else:
#     bisect_fn = bisect.bisect

#   print('DEMO:', bisect_fn.__name__)
#   print('haystack->', ' '.join(f'{n:2}' for n in HAYSTACK))
#   demo(bisect_fn)

In [45]:
!python bisect_demo.py left

DEMO: bisect_left
haystack->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14      |  |  |  |  |  |  |  |  |  |  |  |  |  |31
30 @ 13      |  |  |  |  |  |  |  |  |  |  |  |  |30
29 @ 12      |  |  |  |  |  |  |  |  |  |  |  |29
23 @  9      |  |  |  |  |  |  |  |  |23
22 @  9      |  |  |  |  |  |  |  |  |22
10 @  5      |  |  |  |  |10
 8 @  4      |  |  |  |8 
 5 @  2      |  |5 
 2 @  1      |2 
 1 @  0    1 
 0 @  0    0 
