# 5 Common data structures in Python

## 5.1 Dictionaries, Maps, and Hashtables

In [4]:
cars = {
    'vw': 1,
    'ford': 2,
}

plus_one = {x: x + 1 for x in range(5)}

- keys must be hashable (constant `__hash__` and comparable via `__eq__`)
- based on hash table $\rightarrow$ $\mathcal{O}(1)$ for insertion, deletion, update, lookup

### `collections.OrderedDict ` remember insertion order

In [8]:
from collections import OrderedDict

d = OrderedDict(B=1)
d['A'] = 2
d.keys()

odict_keys(['B', 'A'])

- CPython 3.6 also does this but not as part of language spec

### `collections.defaultdict` have default values for missing keys

In [11]:
from collections import defaultdict

d = defaultdict(list)
d['A'].append(2)
d['B'].append(1)

d

defaultdict(list, {'A': [2], 'B': [1]})

###  `collections.ChainMap` search multiple dicts as single and return left-most hit

In [14]:
from collections import ChainMap

d1 = {'one': 1}
d2 = {'two': 2}
d3 = {'two': -2, 'three': 3,}

chain = ChainMap(d1, d2, d3)
chain

ChainMap({'one': 1}, {'two': 2}, {'two': -2, 'three': 3})

In [15]:
chain['two']

2

### `types.MappingProxyType` for read-only dicts

In [17]:
from types import MappingProxyType

writable = {'one': 1}
read_only = MappingProxyType(writable)

read_only['one'] = 2

TypeError: 'mappingproxy' object does not support item assignment

In [18]:
writable['two'] = 2
read_only

mappingproxy({'one': 1, 'two': 2})

## 5.2 Array data structures

### `list` - mutable, dynamic arrays

In [1]:
l = [1, 2] + [3]
l

[1, 2, 3]

In [21]:
import dis

dis.dis(compile("[1, 2, '3']", '', 'eval'))

  1           0 LOAD_CONST               0 (1)
              2 LOAD_CONST               1 (2)
              4 LOAD_CONST               2 ('3')
              6 BUILD_LIST               3
              8 RETURN_VALUE


### `tuple` - immutable container

In [2]:
t = 1, 2, 3
t

(1, 2, 3)

In [22]:
dis.dis(compile("(1, 2, '3')", '', 'eval'))

  1           0 LOAD_CONST               3 ((1, 2, '3'))
              2 RETURN_VALUE


### `array.array` - basic typed arrays

In [6]:
import array

a = array.array('i', (1, 2))
a

array('i', [1, 2])

In [9]:
a.append(3)
a.append('string')

TypeError: an integer is required (got type str)

### `str` - immutable unicode arrays

In [10]:
s = 'abc'
s

'abc'

In [11]:
s[1] = 'D'

TypeError: 'str' object does not support item assignment

### `bytes` - immutable arrays of bytes

In [12]:
b = bytes((1, 2, 3))
b

b'\x01\x02\x03'

In [13]:
b[0] = 1

TypeError: 'bytes' object does not support item assignment

In [15]:
bytes((256,))

ValueError: bytes must be in range(0, 256)

### `bytearray` - mutable arrays of single bytes

In [16]:
ba = bytearray((0, 2, 4))
ba

bytearray(b'\x00\x02\x04')

In [18]:
ba[2] = 5
ba

bytearray(b'\x00\x02\x05')

## 5.3 Records, Structs, and Data Transfer Objects

Plain `tuple` can be used for small use cases with 2-3 fields. Own classes with `@property` decorator full control.

### `collections.namedtuple` - convenient data objects

As little memory as regular tuples while field names provide hints what's going on.

In [28]:
from collections import namedtuple
from sys import getsizeof

p = namedtuple('Animal', 'legs name')
p(legs=4, name='Rex')

Animal(legs=4, name='Rex')

### `typing.NamedTuple` - improved namedtuples with (unenforced by default) type hints

In [29]:
from typing import NamedTuple

class Animal(NamedTuple):
    legs: int
    name: str

In [30]:
Animal(4, 'Rex')

Animal(legs=4, name='Rex')

In [31]:
Animal('a', 1)

Animal(legs='a', name=1)

### `struct.Struct` - for tight packaging and serialization or data exchange

In [45]:
from struct import Struct

s = Struct('ci')

In [46]:
d = s.pack(b'f', 2)
d

b'f\x00\x00\x00\x02\x00\x00\x00'

In [47]:
s.unpack(d)

(b'f', 2)

## 5.4 Sets and multisets

### `set` - mutable set

In [2]:
numbers = {1, 2, 3}
numbers.add('4')
numbers

{'4', 1, 2, 3}

### `frozenset` - immutable sets (they are hashable)

In [3]:
num = frozenset(numbers)
num.add(5)

AttributeError: 'frozenset' object has no attribute 'add'

### `collections.Counter` - Multisets

In [5]:
from collections import Counter

numbers = Counter()
numbers.update({1: 1, 2: 2})
numbers

Counter({1: 1, 2: 2})

In [7]:
numbers.update({1: 1, 2: 2, 3: 3})
numbers

Counter({1: 3, 2: 6, 3: 3})

In [8]:
len(numbers)

3

In [9]:
sum(numbers.values())

12

## 5.5 Stacks (LIFOs)

### Lists as stacks

- lists are based on arrays
- $O(1)$ for access, add/remove
- always remove from and add to end, otherwise this costs $O(n)$

In [10]:
stack = []
stack.append(1)
stack.append(2)
stack

[1, 2]

In [11]:
stack.pop()

2

### `collections.deque`

- based on doubly linked list
- edding and removing from either end in $O(1)$
- $O(n)$ for access

In [13]:
from collections import deque

stack = deque()
stack.append(1)
stack.append(2)
stack

deque([1, 2])

In [14]:
stack.pop()

2

### `queue.LifoQueue` - concurrent queue

In [15]:
from queue import LifoQueue

stack = LifoQueue()
stack.put(1)
stack.put(2)
stack

<queue.LifoQueue at 0x1039da4a8>

In [16]:
stack.get()

2

## 5.6 Queues (FIFOs)

### `list` is slow queue due to $O(n)$ for inserting left

### `collections.deque` make good queues with $O(1)$ insertion/removal

In [17]:
from collections import deque

q = deque()
q.append(1)
q.append(2)
q

deque([1, 2])

In [18]:
q.popleft()

1

### `queue.Queue` for concurrency 

In [19]:
from queue import Queue

q = Queue()
q.put(1)
q.put(2)
q

<queue.Queue at 0x1039da898>

In [20]:
q.get()

1

### `multiprocessing.Queue` shared job queues

In [21]:
from multiprocessing import Queue

q = Queue()
q.put(1)
q.put(2)
q

<multiprocessing.queues.Queue at 0x1039dab00>

In [22]:
q.get()

1

## 5.7 Priority Queues

### `queue.PriorityQueue`

- uses `heapq` internally $\rightarrow$ insertion and extraction in $O(log\ n)$
- is synchronized $\rightarrow$ a little overhead over `heapq`

In [1]:
from queue import PriorityQueue

q = PriorityQueue()

q.put((3, '3'))
q.put((1, '1'))
q.put((2, '2'))

while not q.empty():
    item = q.get()
    print(item)

(1, '1')
(2, '2')
(3, '3')
