# collections

This module implements specialized container datatypes providing alternatives to Python’s general purpose built-in containers, dict, list, set, and tuple.

For more information see the documentation: https://docs.python.org/3/library/collections.html

A brief overview:

| Name         | Description                                                          |
| ------------ | -------------------------------------------------------------------- |
| namedtuple() | factory function for creating tuple subclasses with named fields     |
| deque        | list-like container with fast appends and pops on either end         |
| ChainMap     | dict-like class for creating a single view of multiple mappings      |
| Counter      | dict subclass for counting hashable objects                          |
| OrderedDict  | dict subclass that remembers the order entries were added            |
| defaultdict  | dict subclass that calls a factory function to supply missing values |
| UserDict     | wrapper around dictionary objects for easier dict subclassing        |
| UserList     | wrapper around list objects for easier list subclassing              |
| UserString   | wrapper around string objects for easier string subclassing          |


In [None]:
from collections import namedtuple
from collections import deque
from collections import ChainMap
from collections import Counter
from collections import OrderedDict
from collections import defaultdict
from collections import UserDict
from collections import UserList
from collections import UserString

### ChainMap

A Counter is a dict subclass for counting hashable objects. It is a collection where elements are stored as dictionary keys and their counts are stored as dictionary values.

In [None]:
# source https://en.wikipedia.org/wiki/List_of_heaviest_land_mammals

heavy_anmials = {
    'African elephant': 6600,
    'Asian elephant': 3700,
    'White rhinoceros': 3300,
    'Hippopotamus': 2105,
    'Black rhinoceros': 1873
}

lighter_animals = {
    'Giraffe': 1050,
    'Gaur': 975,
    'Bison': 970,
    'Wild water buffalo': 900,
    'Wild yak': 850
}

In [None]:
animals = ChainMap(heavy_anmials, lighter_animals, {})
# The chain map searches in each map in order.

In [None]:
animals['African elephant']

In [None]:
animals['Giraffe']

In [None]:
# Writes and deletes all occur on the first map.
animals['Human'] = 75

In [None]:
heavy_anmials

In [None]:
animals.maps

In [None]:
for x, y in animals.items():
    print(x, y)

In [None]:
# You can think of animals similar to merging the dicts but without the 
# re-writing.
animals = lighter_animals.copy()
animals.update(heavy_anmials)
animals

In [None]:
# Similar to:
animals = {**lighter_animals, **heavy_anmials}

### Counter

A Counter is a dict subclass for counting hashable objects. It is a collection where elements are stored as dictionary keys and their counts are stored as dictionary values.

In [None]:
# From an string (iterable)
counter = Counter('misissippi')
counter

In [None]:
# From an iterable
counter = Counter(["cat", "cat", "dog", "dog", "cat", "gold fish"])
counter

In [None]:
# From dict
counter = Counter({'cat': 3, 'dog': 2, 'gold fish': 1})
counter

In [None]:
# Missing elements have 0 
counter['shark']

In [None]:
list(counter.elements())

In [None]:
counter.most_common(2)

In [None]:
# find the two least common animals
counter.most_common()[:-3:-1]

In [None]:
counter.total()

In [None]:
another_counter = Counter({'cat': -3, 'dog': 10})

In [None]:
counter + another_counter

In [None]:
# You can increment the values like so:
counter['dog'] += 1

### deque

Deques support thread-safe, memory efficient appends and pops from either side of the deque with approximately the same O(1) performance in either direction

In [None]:
example_list = list(range(50_000))

In [None]:
%%timeit
while example_list:
    example_list.pop(-1)
    example_list.pop(0)

In [None]:
example_deque = deque(range(50_000))

In [None]:
%%timeit
while example_deque:
    example_deque.pop()
    example_deque.popleft()

In [None]:
example_deque = deque(range(5))

In [None]:
example_deque

In [None]:
example_deque.append(5)
example_deque.appendleft(-1)

In [None]:
example_deque

In [None]:
example_deque.extend([6, 7, 8])
example_deque.extendleft([-2, -3, -4]) # Note the ordering.
example_deque

In [None]:
example_deque.index(3)

In [None]:
example_deque.insert(7, 5)
example_deque

In [None]:
# remove first occurence
example_deque.remove(5)
example_deque

In [None]:
# This is inplace.
example_deque.reverse()

In [None]:
example_deque

deque with maxlen

In [None]:
limited_deque = deque(range(5), maxlen=5)
limited_deque

In [None]:
# note this pops off the 0
limited_deque.append(5)
limited_deque

In [None]:
# this pushes off the 5 off the other end
limited_deque.appendleft(0)
limited_deque

In [None]:
# this sequentiall pushes the 5, 6, 7 
# onto the right end forcing out the 0, 1, 2
limited_deque.extend([5, 6, 7])
limited_deque

In [None]:
# this sequentially pushes the 1, 2, 3
# onto the left end forcing out the 5, 6, 7
limited_deque.extendleft([0, 1, 2])
limited_deque

In [None]:
# this rotates elements n steps. 
limited_deque.rotate(2)
limited_deque

In [None]:
# we can access the maxlen
limited_deque.maxlen

In [None]:
limited_deque.clear()
limited_deque

In [None]:
# note if the initial iterable is longer than
# the maxlen then we only get the last n elements
deque([1, 2, 3, 4, 5], maxlen=3)

### defaultdict

In [None]:
sentence = """imagine we want to take a sentence and store words in lists 
in a dictionary keyed on the letter that each word starts with"""

In [None]:
words_by_starting_letter = {}
for word in sentence.split(' '):
    if word.isalpha():
        if word[0] in words_by_starting_letter:
            words_by_starting_letter[word[0]].append(word)
        else:
            words_by_starting_letter[word[0]] = [word]
words_by_starting_letter

In [None]:
words_by_starting_letter = defaultdict(list)
for word in sentence.split(' '):
    if word.isalpha():
        words_by_starting_letter[word[0]].append(word)
words_by_starting_letter

In [None]:
# Note you can also use this formulation in some circumstances.
words_by_starting_letter = {}
for word in sentence.split(' '):
    if word.isalpha():
        words_by_starting_letter.setdefault(word[0], []).append(word)
words_by_starting_letter

In [None]:
# An example from the offical docs:
s = 'mississippi'
d = defaultdict(int)
for k in s:
    d[k] += 1

sorted(d.items())


### named tuple

Named tuples assign meaning to each position in a tuple and allow for more readable, self-documenting code. 

In [None]:
simon = ('Simon Ward-Jones', 'Data Scientist', 180)

In [None]:
def display_employee(employee):
    print(f"Employee {employee[0]} is a {employee[1]}"
          f" and is {employee[2]} cm tall")

In [None]:
display_employee(simon)

In [None]:
Employee = namedtuple("Employee", "name job_title height")

In [None]:
Employee = namedtuple("Employee",["name", "job_title", "height"])

In [None]:
simon = Employee(name='Simon Ward-Jones', 
                 job_title='Data Scientist',
                 height=180)

In [None]:
def display_employee(employee):
    print(f"Employee {employee.name} is a {employee.job_title}"
          f" and is {employee.height} cm tall")

In [None]:
display_employee(simon)

In [None]:
simon._asdict()

In [None]:
taller_simon = simon._replace(height=183)
taller_simon

In [None]:
simon._fields

In [None]:
# We can also do the same thing using the typing.NamedTuple

In [None]:
from typing import NamedTuple

In [None]:
class Employee(NamedTuple):
    name: str
    job_title : str
    height : float

In [None]:
simon = Employee(name='Simon Ward-Jones', 
                 job_title='Data Scientist',
                 height=180)

In [None]:
simon

### OrderedDict objects

Ordered dictionaries are just like regular dictionaries but have some extra capabilities relating to ordering operations. They have become less important now that the built-in dict class gained the ability to remember insertion order (this new behavior became guaranteed in Python 3.7).

# Fin.