# Common Data Structures in Python

## 1 Dictionaries, Maps, and Hashtables

### dict – Your Go-To Dictionary

In [1]:
phonebook = {
'bob': 7387,
'alice': 3719,
'jack': 7052,
}


In [2]:
phonebook['alice']

3719

In [3]:
squares = {x: x * x for x in range(6)}

In [4]:
squares

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

`There are some restrictions on which objects can be used as valid keys.
Python’s dictionaries are indexed by keys that can be of any hashable
type2
: A hashable object has a hash value which never changes during
its lifetime (see __hash__), and it can be compared to other objects
(see __eq__). In addition, hashable objects which compare as equal
must have the same hash value.
Immutable types like strings and numbers are hashable and work well
as dictionary keys. You can also use tuple objects as dictionary keys,
as long as they contain only hashable types themselves.`


### collections.OrderedDict – Remember the Insertion Order of Keys

In [5]:
import collections
d = collections.OrderedDict(one=1, two=2, three=3)

In [8]:
d

OrderedDict([('one', 1), ('two', 2), ('three', 3)])

In [9]:
d['four'] = 4
d.keys()

odict_keys(['one', 'two', 'three', 'four'])

### collections.defaultdict – Return Default Values for Missing Keys

In [10]:
from collections import defaultdict
dd = defaultdict(list)

In [11]:
dd['dogs'].append('Rufus')
dd['dogs'].append('Kathrin')
dd['dogs'].append('Mr Sniffles')

In [12]:
dd['dogs']

['Rufus', 'Kathrin', 'Mr Sniffles']

### collections.ChainMap – Search Multiple Dictionaries as a Single Mapping

In [13]:
from collections import ChainMap
dict1 = {'one': 1, 'two': 2}
dict2 = {'three': 3, 'four': 4}
chain = ChainMap(dict1, dict2)

In [14]:
chain

ChainMap({'one': 1, 'two': 2}, {'three': 3, 'four': 4})

In [15]:
# ChainMap searches each collection in the chain
# from left to right until it finds the key (or fails):
chain['three']

3

In [16]:
chain['missing']

KeyError: 'missing'

### types.MappingProxyType – A Wrapper for Making Read-Only Dictionaries

In [17]:
from types import MappingProxyType
writable = {'one': 1, 'two': 2}
read_only = MappingProxyType(writable)

In [18]:
read_only['one']

1

In [19]:
# The proxy is read-only:
read_only['one'] = 23

TypeError: 'mappingproxy' object does not support item assignment

In [20]:
# Updates to the original are reflected in the proxy:
writable['one'] = 42

In [21]:
read_only

mappingproxy({'one': 42, 'two': 2})

#### Key Takeaways
* Dictionaries are the central data structure in Python.
* The built-in dict type will be “good enough” most of the time.
* Specialized implementations, like read-only or ordered dicts,
are available in the Python standard library.

## 2 Array Data Structures

### list – Mutable Dynamic Arrays

In [22]:
arr = ['one', 'two', 'three']

In [23]:
arr[0]

'one'

In [24]:
# Lists have a nice repr:
arr

['one', 'two', 'three']

In [25]:
# Lists are mutable:
arr[1] = 'hello'
arr

['one', 'hello', 'three']

In [26]:
del arr[1]
arr

['one', 'three']

In [27]:
# Lists can hold arbitrary data types:
arr.append(23)
arr

['one', 'three', 23]

### tuple – Immutable Containers

In [28]:
arr = 'one', 'two', 'three'
arr[0]

'one'

In [29]:
# Tuples have a nice repr:
arr

('one', 'two', 'three')

In [30]:
# Tuples are immutable:
arr[1] = 'hello'


TypeError: 'tuple' object does not support item assignment

In [31]:
del arr[1]

TypeError: 'tuple' object doesn't support item deletion

In [32]:
# Tuples can hold arbitrary data types:
# (Adding elements creates a copy of the tuple)
arr + (23,)

('one', 'two', 'three', 23)

### array.array – Basic Typed Arrays

In [33]:
import array
arr = array.array('f', (1.0, 1.5, 2.0, 2.5))


In [34]:
arr[1]

1.5

In [35]:
# Arrays have a nice repr:
arr

array('f', [1.0, 1.5, 2.0, 2.5])

In [36]:
# Arrays are mutable:
arr[1] = 23.0
arr

array('f', [1.0, 23.0, 2.0, 2.5])

In [37]:
del arr[1]
arr

array('f', [1.0, 2.0, 2.5])

In [38]:
arr.append(42.0)
arr

array('f', [1.0, 2.0, 2.5, 42.0])

In [39]:
# Arrays are "typed":
arr[1] = 'hello'


TypeError: must be real number, not str

### str – Immutable Arrays of Unicode Characters

In [40]:
arr = 'abcd'
arr[1]

'b'

In [41]:
arr

'abcd'

In [42]:
arr[1] = 'e'

TypeError: 'str' object does not support item assignment

In [43]:
del arr[1]

TypeError: 'str' object doesn't support item deletion

In [44]:
# Strings can be unpacked into a list to
# get a mutable representation:
list('abcd')

['a', 'b', 'c', 'd']

In [45]:
''.join(list('abcd'))

'abcd'

In [46]:
# Strings are recursive data structures:
type('abc')

str

In [47]:
type('abc'[0])


str

### bytes – Immutable Arrays of Single Bytes

In [48]:
arr = bytes((0, 1, 2, 3))
arr[1]

1

In [49]:
arr

b'\x00\x01\x02\x03'

In [50]:
bytes((0, 300))

ValueError: bytes must be in range(0, 256)

In [51]:
# Bytes are immutable:
arr[1] = 23

TypeError: 'bytes' object does not support item assignment

In [52]:
del arr[1]

TypeError: 'bytes' object doesn't support item deletion

### bytearray – Mutable Arrays of Single Bytes

In [53]:
arr = bytearray((0, 1, 2, 3))
arr[1]

1

In [54]:
# The bytearray repr:
arr

bytearray(b'\x00\x01\x02\x03')

In [55]:
arr[1] = 23
arr

bytearray(b'\x00\x17\x02\x03')

In [56]:
# Bytearrays can grow and shrink in size:
del arr[1]
arr

bytearray(b'\x00\x02\x03')

In [57]:
bytes(arr)

b'\x00\x02\x03'

#### Key Takeaways
* You need to store arbitrary objects, potentially with mixed
data types? Use a list or a tuple, depending on whether you want
an immutable data structure or not.
* You have numeric (integer or floating point) data and tight
packing and performance is important? Try out array.array
and see if it does everything you need. Also, consider going beyond
the standard library and try out packages like NumPy or Pandas.
* You have textual data represented as Unicode characters?
Use Python’s built-in str. If you need a “mutable string,” use a list
of characters.
* You want to store a contiguous block of bytes? Use the immutable bytes type, or bytearray if you need a mutable data structure.

## 3 Records, Structs, and Data Transfer Objects

### dict – Simple Data Objects

`Using dictionaries as a record data type or data object in Python is
possible. Dictionaries are easy to create in Python, as they have their
own syntactic sugar built into the language in the form of dictionary
literals. The dictionary syntax is concise and quite convenient to type.`

`Data objects created using dictionaries are mutable, and there’s little
protection against misspelled field names, as fields can be added and
removed freely at any time. Both of these properties can introduce
surprising bugs, and there’s always a trade-off to be made between
convenience and error resilience.`

In [58]:
car1 = {
'color': 'red',
'mileage': 3812.4,
'automatic': True,
}
car2 = {
'color': 'blue',
'mileage': 40231,
'automatic': False,
}

In [59]:
# Dicts have a nice repr:
car2

{'color': 'blue', 'mileage': 40231, 'automatic': False}

In [60]:
# Get mileage:
car2['mileage']

40231

In [61]:
# Dicts are mutable:
car2['mileage'] = 12
car2['windshield'] = 'broken'
car2

{'color': 'blue', 'mileage': 12, 'automatic': False, 'windshield': 'broken'}

In [62]:
# No protection against wrong field names,
# or missing/extra fields:
car3 = {
'colr': 'green',
'automatic': False,
'windshield': 'broken',
}

### tuple – Immutable Groups of Objects

`Performance-wise, tuples take up slightly less memory than lists in
CPython,17 and they’re also faster to construct.`

In [63]:
# Fields: color, mileage, automatic
car1 = ('red', 3812.4, True)
car2 = ('blue', 40231.0, False)

In [64]:
# Tuple instances have a nice repr:
car1


('red', 3812.4, True)

In [65]:
# Get mileage:
car2[1]

40231.0

In [66]:
# Tuples are immutable:
car2[1] = 12

TypeError: 'tuple' object does not support item assignment

In [67]:
# No protection against missing/extra fields
# or a wrong order:
car3 = (3431.5, 'green', True, 'silver')


### Writing a Custom Class – More Work, More Control

In [1]:
class Car:
    def __init__(self, color, mileage, automatic):
        self.color = color
        self.mileage = mileage
        self.automatic = automatic

In [2]:
car1 = Car('red', 3812.4, True)

In [3]:
car2 = Car('blue', 40231.0, False)

In [4]:
# Get the mileage:
car2.mileage

40231.0

In [5]:
# Classes are mutable:
car2.mileage = 12
car2.windshield = 'broken'

In [None]:
car2.windshield

TypeError: 'type' object is not iterable

In [8]:
# String representation is not very useful
# (must add a manually written __repr__ method):
car1

<__main__.Car at 0x1faf128d8b0>

### collections.namedtuple – Convenient Data Objects

In [9]:
from collections import namedtuple
from sys import getsizeof
p1 = namedtuple('Point', 'x y z')(1, 2, 3)
p2 = (1, 2, 3)
getsizeof(p1)

64

In [10]:
getsizeof(p2)

64

In [11]:
from collections import namedtuple
Car = namedtuple('Car' , 'color mileage automatic')
car1 = Car('red', 3812.4, True)


In [12]:
# Instances have a nice repr:
car1


Car(color='red', mileage=3812.4, automatic=True)

In [13]:
# Accessing fields:
car1.mileage

3812.4

In [14]:
# Fields are immtuable:
car1.mileage = 12

AttributeError: can't set attribute

In [15]:
car1.windshield = 'broken'

AttributeError: 'Car' object has no attribute 'windshield'

In [18]:
# Type annotations are not enforced without
# a separate type checking tool like mypy:
Car('red', 'NOT_A_FLOAT', 99)

Car(color='red', mileage='NOT_A_FLOAT', automatic=99)

### struct.Struct – Serialized C Structs