# collections

This module implements specialized container datatypes providing alternatives to Python’s general purpose built-in containers, dict, list, set, and tuple.

For more information see the documentation: https://docs.python.org/3/library/collections.html

A brief overview:

| Name         | Description                                                          |
| ------------ | -------------------------------------------------------------------- |
| namedtuple() | factory function for creating tuple subclasses with named fields     |
| deque        | list-like container with fast appends and pops on either end         |
| ChainMap     | dict-like class for creating a single view of multiple mappings      |
| Counter      | dict subclass for counting hashable objects                          |
| OrderedDict  | dict subclass that remembers the order entries were added            |
| defaultdict  | dict subclass that calls a factory function to supply missing values |
| UserDict     | wrapper around dictionary objects for easier dict subclassing        |
| UserList     | wrapper around list objects for easier list subclassing              |
| UserString   | wrapper around string objects for easier string subclassing          |


In [54]:
from collections import namedtuple
from collections import deque
from collections import ChainMap
from collections import Counter
from collections import OrderedDict
from collections import defaultdict
from collections import UserDict
from collections import UserList
from collections import UserString

### ChainMap

A Counter is a dict subclass for counting hashable objects. It is a collection where elements are stored as dictionary keys and their counts are stored as dictionary values.

In [55]:
# source https://en.wikipedia.org/wiki/List_of_heaviest_land_mammals

heavy_anmials = {
    'African elephant': 6600,
    'Asian elephant': 3700,
    'White rhinoceros': 3300,
    'Hippopotamus': 2105,
    'Black rhinoceros': 1873
}

lighter_animals = {
    'Giraffe': 1050,
    'Gaur': 975,
    'Bison': 970,
    'Wild water buffalo': 900,
    'Wild yak': 850
}

In [56]:
animals = ChainMap(heavy_anmials, lighter_animals, {})
# The chain map searches in each map in order.

In [57]:
animals['African elephant']

6600

In [58]:
animals['Giraffe']

1050

In [59]:
# Writes and deletes all occur on the first map.
animals['Human'] = 75

In [60]:
heavy_anmials

{'African elephant': 6600,
 'Asian elephant': 3700,
 'White rhinoceros': 3300,
 'Hippopotamus': 2105,
 'Black rhinoceros': 1873,
 'Human': 75}

In [61]:
animals.maps

[{'African elephant': 6600,
  'Asian elephant': 3700,
  'White rhinoceros': 3300,
  'Hippopotamus': 2105,
  'Black rhinoceros': 1873,
  'Human': 75},
 {'Giraffe': 1050,
  'Gaur': 975,
  'Bison': 970,
  'Wild water buffalo': 900,
  'Wild yak': 850},
 {}]

In [62]:
for x, y in animals.items():
    print(x, y)

Giraffe 1050
Gaur 975
Bison 970
Wild water buffalo 900
Wild yak 850
African elephant 6600
Asian elephant 3700
White rhinoceros 3300
Hippopotamus 2105
Black rhinoceros 1873
Human 75


In [63]:
# You can think of animals similar to merging the dicts but without the 
# re-writing.
animals = lighter_animals.copy()
animals.update(heavy_anmials)
animals

{'Giraffe': 1050,
 'Gaur': 975,
 'Bison': 970,
 'Wild water buffalo': 900,
 'Wild yak': 850,
 'African elephant': 6600,
 'Asian elephant': 3700,
 'White rhinoceros': 3300,
 'Hippopotamus': 2105,
 'Black rhinoceros': 1873,
 'Human': 75}

In [64]:
# Similar to:
animals = {**lighter_animals, **heavy_anmials}

### Counter

A Counter is a dict subclass for counting hashable objects. It is a collection where elements are stored as dictionary keys and their counts are stored as dictionary values.

In [65]:
# From an string (iterable)
counter = Counter('misissippi')
counter

Counter({'m': 1, 'i': 4, 's': 3, 'p': 2})

In [66]:
# From an iterable
counter = Counter(["cat", "cat", "dog", "dog", "cat", "gold fish"])
counter

Counter({'cat': 3, 'dog': 2, 'gold fish': 1})

In [67]:
# From dict
counter = Counter({'cat': 3, 'dog': 2, 'gold fish': 1})
counter

Counter({'cat': 3, 'dog': 2, 'gold fish': 1})

In [68]:
# Missing elements have 0 
counter['shark']

0

In [69]:
list(counter.elements())

['cat', 'cat', 'cat', 'dog', 'dog', 'gold fish']

In [70]:
counter.most_common(2)

[('cat', 3), ('dog', 2)]

In [71]:
# find the two least common animals
counter.most_common()[:-3:-1]

[('gold fish', 1), ('dog', 2)]

In [72]:
counter.total()

6

In [73]:
another_counter = Counter({'cat': -3, 'dog': 10})

In [74]:
counter + another_counter

Counter({'dog': 12, 'gold fish': 1})

In [75]:
# You can increment the values like so:
counter['dog'] += 1

### deque

Deques support thread-safe, memory efficient appends and pops from either side of the deque with approximately the same O(1) performance in either direction

In [76]:
example_list = list(range(50_000))

In [77]:
%%timeit
while example_list:
    example_list.pop(-1)
    example_list.pop(0)

132 ns ± 65.3 ns per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [78]:
example_deque = deque(range(50_000))

In [79]:
%%timeit
while example_deque:
    example_deque.pop()
    example_deque.popleft()

20.5 ns ± 0.227 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [80]:
example_deque = deque(range(5))

In [81]:
example_deque

deque([0, 1, 2, 3, 4])

In [82]:
example_deque.append(5)
example_deque.appendleft(-1)

In [83]:
example_deque

deque([-1, 0, 1, 2, 3, 4, 5])

In [84]:
example_deque.extend([6, 7, 8])
example_deque.extendleft([-2, -3, -4]) # Note the ordering.
example_deque

deque([-4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8])

In [85]:
example_deque.index(3)

7

In [86]:
example_deque.insert(7, 5)
example_deque

deque([-4, -3, -2, -1, 0, 1, 2, 5, 3, 4, 5, 6, 7, 8])

In [87]:
# remove first occurence
example_deque.remove(5)
example_deque

deque([-4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8])

In [88]:
# This is inplace.
example_deque.reverse()

In [89]:
example_deque

deque([8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4])

deque with maxlen

In [90]:
limited_deque = deque(range(5), maxlen=5)
limited_deque

deque([0, 1, 2, 3, 4])

In [91]:
# note this pops off the 0
limited_deque.append(5)
limited_deque

deque([1, 2, 3, 4, 5])

In [92]:
# this pushes off the 5 off the other end
limited_deque.appendleft(0)
limited_deque

deque([0, 1, 2, 3, 4])

In [93]:
# this sequentiall pushes the 5, 6, 7 
# onto the right end forcing out the 0, 1, 2
limited_deque.extend([5, 6, 7])
limited_deque

deque([3, 4, 5, 6, 7])

In [94]:
# this sequentially pushes the 1, 2, 3
# onto the left end forcing out the 5, 6, 7
limited_deque.extendleft([0, 1, 2])
limited_deque

deque([2, 1, 0, 3, 4])

In [95]:
# this rotates elements n steps. 
limited_deque.rotate(2)
limited_deque

deque([3, 4, 2, 1, 0])

In [96]:
# we can access the maxlen
limited_deque.maxlen

5

In [97]:
limited_deque.clear()
limited_deque

deque([])

In [98]:
# note if the initial iterable is longer than
# the maxlen then we only get the last n elements
deque([1, 2, 3, 4, 5], maxlen=3)

deque([3, 4, 5])

### defaultdict

In [99]:
sentence = """imagine we want to take a sentence and store words in lists 
in a dictionary keyed on the letter that each word starts with"""

In [100]:
words_by_starting_letter = {}
for word in sentence.split(' '):
    if word.isalpha():
        if word[0] in words_by_starting_letter:
            words_by_starting_letter[word[0]].append(word)
        else:
            words_by_starting_letter[word[0]] = [word]
words_by_starting_letter

{'i': ['imagine', 'in'],
 'w': ['we', 'want', 'words', 'word', 'with'],
 't': ['to', 'take', 'the', 'that'],
 'a': ['a', 'and', 'a'],
 's': ['sentence', 'store', 'starts'],
 'l': ['lists', 'letter'],
 'd': ['dictionary'],
 'k': ['keyed'],
 'o': ['on'],
 'e': ['each']}

In [101]:
words_by_starting_letter = defaultdict(list)
for word in sentence.split(' '):
    if word.isalpha():
        words_by_starting_letter[word[0]].append(word)
words_by_starting_letter

defaultdict(list,
            {'i': ['imagine', 'in'],
             'w': ['we', 'want', 'words', 'word', 'with'],
             't': ['to', 'take', 'the', 'that'],
             'a': ['a', 'and', 'a'],
             's': ['sentence', 'store', 'starts'],
             'l': ['lists', 'letter'],
             'd': ['dictionary'],
             'k': ['keyed'],
             'o': ['on'],
             'e': ['each']})

In [102]:
# Note you can also use this formulation in some circumstances.
words_by_starting_letter = {}
for word in sentence.split(' '):
    if word.isalpha():
        words_by_starting_letter.setdefault(word[0], []).append(word)
words_by_starting_letter

{'i': ['imagine', 'in'],
 'w': ['we', 'want', 'words', 'word', 'with'],
 't': ['to', 'take', 'the', 'that'],
 'a': ['a', 'and', 'a'],
 's': ['sentence', 'store', 'starts'],
 'l': ['lists', 'letter'],
 'd': ['dictionary'],
 'k': ['keyed'],
 'o': ['on'],
 'e': ['each']}

In [104]:
# An example from the offical docs:
s = 'mississippi'
d = defaultdict(int)
for k in s:
    d[k] += 1

sorted(d.items())


[('i', 4), ('m', 1), ('p', 2), ('s', 4)]

### named tuple

Named tuples assign meaning to each position in a tuple and allow for more readable, self-documenting code. 

In [123]:
simon = ('Simon Ward-Jones', 'Data Scientist', 180)

In [124]:
def display_employee(employee):
    print(f"Employee {employee[0]} is a {employee[1]}"
          f" and is {employee[2]} cm tall")

In [125]:
display_employee(simon)

Employee Simon Ward-Jones is a Data Scientist and is 180 cm tall


In [138]:
Employee = namedtuple("Employee", "name job_title height")

In [139]:
Employee = namedtuple("Employee",["name", "job_title", "height"])

In [140]:
simon = Employee(name='Simon Ward-Jones', 
                 job_title='Data Scientist',
                 height=180)

Employee(name='Simon Ward-Jones', job_title='Data Scientist', height=180)

In [152]:
def display_employee(employee):
    print(f"Employee {employee.name} is a {employee.job_title}"
          f" and is {employee.height} cm tall")

In [153]:
display_employee(simon)

Employee Simon Ward-Jones is a Data Scientist and is 180 cm tall


In [154]:
simon._asdict()

{'name': 'Simon Ward-Jones', 'job_title': 'Data Scientist', 'height': 180}

In [155]:
taller_simon = simon._replace(height=183)
taller_simon

Employee(name='Simon Ward-Jones', job_title='Data Scientist', height=183)

In [156]:
simon._fields

('name', 'job_title', 'height')

In [157]:
# We can also do the same thing using the typing.NamedTuple

In [158]:
from typing import NamedTuple

In [159]:
class Employee(NamedTuple):
    name: str
    job_title : str
    height : float

In [160]:
simon = Employee(name='Simon Ward-Jones', 
                 job_title='Data Scientist',
                 height=180)

In [161]:
simon

Employee(name='Simon Ward-Jones', job_title='Data Scientist', height=180)

### OrderedDict objects

Ordered dictionaries are just like regular dictionaries but have some extra capabilities relating to ordering operations. They have become less important now that the built-in dict class gained the ability to remember insertion order (this new behavior became guaranteed in Python 3.7).

# Fin.