## Tuples are not just immutable lists

Tuple : immutable list + records with no field names

### Tuples as records

In [30]:
lax_coordinates = (33.9425, -118.408056)
city, year, pop, chg, area = ('Tokyo', 2003, 32450, 0.66, 8014)

In [31]:
traveler_ids = [('USA', '31195855'), ('BRA', 'CE342567'), ('ESP', 'XDA205856')]

In [32]:
for passport in sorted(traveler_ids):
    print('%s/%s' % passport)

BRA/CE342567
ESP/XDA205856
USA/31195855


In [33]:
for country,_ in traveler_ids:
    print(country)

USA
BRA
ESP


### Tuple unpacking

In [34]:
lax_coorinates = (33.9425, -118.408056)
lattitude, longitude = lax_coordinates

In [35]:
lattitude

33.9425

In [36]:
longitude

-118.408056

swapping the values of variables without using a temporary variable

In [37]:
a, b = (10,20)
b,a = a,b

In [38]:
a

20

In [39]:
b

10

prefixing an argument with a star when calling a function

In [40]:
divmod(20, 8)

(2, 4)

In [41]:
t = (20, 8)
divmod(*t)

(2, 4)

In [42]:
quotient, remainder = divmod(*t)

In [43]:
quotient, remainder

(2, 4)

os.path.split() function builds a tuple(path, last_part) from  a filesystem path

In [44]:
import os
_, filename = os.path.split('/home/luciano/..sh/idras.pub')
filename

'idras.pub'

### using * to grab excess items

In [45]:
a, b, *rest = range(5)
a,b,rest

(0, 1, [2, 3, 4])

In [47]:
a,b,*rest = range(3)
a,b,rest

(0, 1, [2])

In [48]:
a,b,*rest = range(2)
a,b,rest

(0, 1, [])

In [49]:
a,*body,c,d = range(5)
a,body,c,d

(0, [1, 2], 3, 4)

In [50]:
*head, b, c, d = range(5)
head, b , c, d

([0, 1], 2, 3, 4)

## Nested tuple unpacking

In [55]:
metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
    ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
    ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
    ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
    ('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]

print('{:15} | {:^9} | {:^9}'.format('', 'lat.', 'long.'))
fmt = '{:15} | {:9.4} | {:9.4f}'
for name, cc, pop, (latitude, longitude) in metro_areas:
    if longitude <= 0:
        print(fmt.format(name, latitude, longitude))

                |   lat.    |   long.  
Mexico City     |     19.43 |  -99.1333
New York-Newark |     40.81 |  -74.0204
Sao Paulo       |    -23.55 |  -46.6358


## Named tuples

In [56]:
from collections import namedtuple
City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))
tokyo

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))

In [57]:
tokyo.population

36.933

In [58]:
tokyo.coordinates

(35.689722, 139.691667)

In [59]:
tokyo[1]

'JP'

named tuple type has a few attributes in addition to those inherited from tuple.

_fields class attribute, _make(iterable) class method, _asdict() instance method


In [60]:
City._fields

('name', 'country', 'population', 'coordinates')

In [61]:
LatLong = namedtuple('LatLong', 'lat long')
delhi_data = ('Delhi NCR', 'IN', 21.935, LatLong(28.613889, 77.208889))
# instantiate a named uple from iterable
delhi = City._make(delhi_data)
delhi._asdict()

OrderedDict([('name', 'Delhi NCR'),
             ('country', 'IN'),
             ('population', 21.935),
             ('coordinates', LatLong(lat=28.613889, long=77.208889))])

## Slicing

In [62]:
l = [10,20,30,40,50,60]
l[:2]

[10, 20]

In [63]:
l[2:]

[30, 40, 50, 60]

In [64]:
l[:3]

[10, 20, 30]

In [65]:
l[3:]

[40, 50, 60]

### Slice objects

s[a:b:c] -> c : step

In [66]:
s = 'bicycle'
s[::3]

'bye'

In [67]:
s[::-1]

'elcycib'

In [68]:
s[::-2]

'eccb'

### Assignin g to slices

In [74]:
l = list(range(10))
l

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [75]:
l[2:5] = [20, 30]
l

[0, 1, 20, 30, 5, 6, 7, 8, 9]

In [76]:
del l[5:7]
l

[0, 1, 20, 30, 5, 8, 9]

In [77]:
l[3::2] = [11,22]

In [78]:
l

[0, 1, 20, 11, 5, 22, 9]

In [79]:
l[2:5] = 100

TypeError: can only assign an iterable

In [80]:
l[2:5] = [100]
l

[0, 1, 100, 22, 9]

## Using + and * with sequences

In [1]:
l = [1,2,3]
l*5

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

In [2]:
5*'abcd'

'abcdabcdabcdabcdabcd'

### building lists of lists

In [5]:
# Create a list of with 3 lists of 3 items each
board = [["-"] * 3 for i in range(3)]

print(board)

[['-', '-', '-'], ['-', '-', '-'], ['-', '-', '-']]


In [6]:
board[1][2] = 'X'
board

[['-', '-', '-'], ['-', '-', 'X'], ['-', '-', '-']]

### wrong shortcut

In [4]:
# outer list is made of three references to the same inner list
weird_board = [['_'] *3]*3
weird_board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [7]:
weird_board[1][2] = '0'
weird_board

[['_', '_', '0'], ['_', '_', '0'], ['_', '_', '0']]

### equivalent to the wrong shorcut

In [8]:
row = ['_'] *3
board = []
for i in range(3):
    board.append(row)

In [9]:
board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [11]:
board[1][2] = 'X'
board

[['_', '_', 'X'], ['_', '_', 'X'], ['_', '_', 'X']]

In [12]:
board = []
for i in range(3):
    row = ['_'] * 3
    board.append(row)
    
board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [13]:
board[2][0] = 'X'
board

[['_', '_', '_'], ['_', '_', '_'], ['X', '_', '_']]

## Augmented assignment with sequences

In [14]:
l =[1,2,3]
id(l)

2035481993544

In [15]:
l *= 2
l

[1, 2, 3, 1, 2, 3]

In [16]:
id(l)

2035481993544

In [17]:
t = (1,2,3)
id(t)

2035481325144

In [18]:
t *= 2
id(t)

2035481443880

## A+= assignemnt puzzler

In [19]:
t = (1,2, [30,40])
t[2] += [50,60]

TypeError: 'tuple' object does not support item assignment

In [20]:
t

(1, 2, [30, 40, 50, 60])

## list.sort and the sorted built-in function

In [22]:
fruits = ['grape', 'raspberry', 'apple', 'banana']
sorted(fruits)

['apple', 'banana', 'grape', 'raspberry']

In [23]:
fruits

['grape', 'raspberry', 'apple', 'banana']

In [24]:
sorted(fruits, reverse=True)

['raspberry', 'grape', 'banana', 'apple']

In [25]:
# Since the sorting algorithm is stable, "grape" and "apple" both of length5, are in the original oerder
sorted(fruits, key=len)

['grape', 'apple', 'banana', 'raspberry']

In [26]:
sorted(fruits, key=len, reverse=True)

['raspberry', 'banana', 'grape', 'apple']

In [27]:
fruits

['grape', 'raspberry', 'apple', 'banana']

In [28]:
fruits.sort()

In [29]:
fruits

['apple', 'banana', 'grape', 'raspberry']

## Managing ordered sequences with bisect

bisect finds insertion points for items in a sorted sequence

bisect module offers two main functions : use the binary search algorithm to quickly find and insert itmes in any sorted sequence.

__bisect__

__insort__

In [30]:
import bisect
import sys

HAYSTACK = [1,4,5,6,8,12,15,20,21,23,23,26,29,30]
NEEDLES = [0,1,2,5,8,10,22,23,29,30,31]

ROW_FMT = '{0:2d} @ {1:2d}   {2}{0:<2d}'

def demo(bisect_fn):
    for needle in reversed(NEEDLES):
        position = bisect_fn(HAYSTACK, needle)
        offset = position * '  |'
        print(ROW_FMT.format(needle, position, offset))

In [31]:
if __name__=='__main__':
    
    bisect_fn = bisect.bisect
    print('DEMO:', bisect_fn.__name__)
    print('haystakc ->', ' '.join('%2d' % n for n in HAYSTACK))
    demo(bisect_fn)

DEMO: bisect
haystakc ->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14     |  |  |  |  |  |  |  |  |  |  |  |  |  |31
30 @ 14     |  |  |  |  |  |  |  |  |  |  |  |  |  |30
29 @ 13     |  |  |  |  |  |  |  |  |  |  |  |  |29
23 @ 11     |  |  |  |  |  |  |  |  |  |  |23
22 @  9     |  |  |  |  |  |  |  |  |22
10 @  5     |  |  |  |  |10
 8 @  5     |  |  |  |  |8 
 5 @  3     |  |  |5 
 2 @  1     |2 
 1 @  1     |1 
 0 @  0   0 


In [32]:
if __name__=='__main__':
    
    bisect_fn = bisect.bisect_left
    print('DEMO:', bisect_fn.__name__)
    print('haystakc ->', ' '.join('%2d' % n for n in HAYSTACK))
    demo(bisect_fn)

DEMO: bisect_left
haystakc ->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14     |  |  |  |  |  |  |  |  |  |  |  |  |  |31
30 @ 13     |  |  |  |  |  |  |  |  |  |  |  |  |30
29 @ 12     |  |  |  |  |  |  |  |  |  |  |  |29
23 @  9     |  |  |  |  |  |  |  |  |23
22 @  9     |  |  |  |  |  |  |  |  |22
10 @  5     |  |  |  |  |10
 8 @  4     |  |  |  |8 
 5 @  2     |  |5 
 2 @  1     |2 
 1 @  0   1 
 0 @  0   0 


### perform table lookups by numeric values

In [34]:
def grade(score, breakpoints=[60, 70, 80, 90], grades='FDCBA'):
    i = bisect.bisect(breakpoints, score)
    print(i)
    return grades[i]

[grade(score) for score in [33, 99, 77, 70, 89, 90, 100]]

0
4
2
2
3
4
4


['F', 'A', 'C', 'C', 'B', 'A', 'A']

### Inserting with bisect.insort

insort(seq, item) inserts item into seq so as to keep seq in ascending order.

In [36]:
import bisect
import random

SIZE=7
random.seed(1729)

my_list = []
for i in range(SIZE):
    new_item = random.randrange(SIZE*2)
    bisect.insort(my_list, new_item)
    print('%2d ->' % new_item, my_list)

10 -> [10]
 0 -> [0, 10]
 6 -> [0, 6, 10]
 8 -> [0, 6, 8, 10]
 7 -> [0, 6, 7, 8, 10]
 2 -> [0, 2, 6, 7, 8, 10]
10 -> [0, 2, 6, 7, 8, 10, 10]


## When a list is not the answer

list는 사용하기 쉽지만 특정 요구조건에선 더 좋은 옵션이 있다. 예를 들어 10M개의 실수형 값을 저장한다면 array가 더 효율적이다. 왜냐하면 array는 완전한 float 객체를 저장하는게 아니라 기계어 값을 나타내는 bytes로 구성되기 때문이다.(C 언어의 array와 같다)

반면에 list의 끝에서 아이템을 FIFO나 LIFO처럼 더하거나 뺀다면 deque가 더 빨리 동작한다.

만약 containtment check를 한다면 Set을 쓰는것을 고려한다. memebership checking에 더 최적화되어 있는데 sequence는 아니다(unorderd)

array는 mutalbe sequence type

### Array

typecode

'b' -> singed char, single byte, -128~127

'd' -> double

In [1]:
from array import array
from random import random

floats = array('d', (random() for i in range(10**7)))
floats[-1]

0.5114222367261633

In [2]:
fp = open('floats.bin', 'wb')
floats.tofile(fp)
fp.close()

In [4]:
floats2 = array('d')
fp = open('floats.bin', 'rb')
floats2.fromfile(fp, 10**7)
fp.close()
floats2[-1]

0.5114222367261633

In [5]:
floats2 == floats

True

binary file을 읽고 쓰는게 빠르다

numeric data를 저장하는 또 다른 방법은 pickle module을 통해 object serialization을 한다.
pickle은 거의 모든 built-in type을 다룬다

array는 sort method가 없다. sorted 함수를 써서 sorted로 동작하게 만든다

In [14]:
a = array('d', (random() for i in range(10)))
print(a)
a = array(a.typecode, sorted(a))

array('d', [0.575844286641216, 0.8733035665137253, 0.4644505691423464, 0.8523097707431884, 0.34101621236417623, 0.7126468800022254, 0.08768827393958034, 0.7444110370203408, 0.7207097284354549, 0.3561205456050647])


In [15]:
print(a)

array('d', [0.08768827393958034, 0.34101621236417623, 0.3561205456050647, 0.4644505691423464, 0.575844286641216, 0.7126468800022254, 0.7207097284354549, 0.7444110370203408, 0.8523097707431884, 0.8733035665137253])


## Memory Views

shared-memory seqeunce type that lets you handle slices of arrays without copying bytes.

A memoryview is essentially a generalized Numpy array

It allows you to share memory between data-structures without first copying

In [17]:
# typecode h -> short signed integers
numbers = array('h', [-2,-1,0,1,2])
memv = memoryview(numbers)
len(memv)

5

In [18]:
memv[0]

-2

In [19]:
# typecode 'B' -> unsigned char
memv_oct = memv.cast('B')
memv_oct.tolist()

[254, 255, 255, 255, 0, 0, 1, 0, 2, 0]

In [21]:
memv_oct[5] = 4

In [22]:
numbers

array('h', [-2, -1, 1024, 1, 2])

## Numpy and SciPy

In [23]:
import numpy as np
a = np.arange(12)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [24]:
type(a)

numpy.ndarray

In [25]:
a.shape

(12,)

In [26]:
a.shape = 3,4
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [27]:
a[2]

array([ 8,  9, 10, 11])

In [28]:
a[2,1]

9

In [30]:
a[:,1]

array([1, 5, 9])

In [31]:
a.transpose()

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

## loading, saving and operating on all elements

In [13]:
import numpy as np
floats = np.array([random() for i in range(10**7)])

In [14]:
from time import perf_counter as pc
t0 = pc(); floats /=3; pc() - t0

0.011660057971027982

In [16]:
np.save('floats-10M', floats)

In [18]:
# load the data as a memory-mapped file into another array
floats2 = np.load('floats-10M.npy', 'r+')
floats2

memmap([ 0.09022956,  0.22038166,  0.03885401, ...,  0.27605282,
         0.07845377,  0.02580212])

In [20]:
floats2 *= 6
floats2[-3:]

memmap([ 9.93790167,  2.82433565,  0.92887619])

## Deques and other queues

append와 pop(0)을 쓰면 LIFO처럼 동작하게 할 수 있다. 하지만 list의 왼쪽에 inserting과 removing을 하려면 전체 리스트를 shift해야 한다.

deque는 양 끝에서 inserting과 removing을 할 수 있도록 해준다.

bounded되어 있다-> full인 상태에서 추가하면 반대편 item이 사라진다.

In [21]:
from collections import deque
dq = deque(range(10), maxlen=10)
dq

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [22]:
dq.rotate(3)

In [23]:
dq

deque([7, 8, 9, 0, 1, 2, 3, 4, 5, 6])

In [24]:
dq.rotate(-4)
dq

deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 0])

In [26]:
dq.appendleft(-1)
dq

deque([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [27]:
dq.extend([11,22,33])
dq

deque([3, 4, 5, 6, 7, 8, 9, 11, 22, 33])

In [28]:
dq.extendleft([10,20,30,40])
dq

deque([40, 30, 20, 10, 3, 4, 5, 6, 7, 8])

In [29]:
len(dq) == dq.maxlen

True