# Collections module

https://docs.python.org/3/library/collections.html

In [1]:
from collections import namedtuple, deque, ChainMap, Counter, defaultdict 
import csv
import random

## namedtuple

In [2]:
Comment = namedtuple("Comment", "text polarity")

In [3]:
comment = Comment("Nice Bite! Learned (once again) to always, always, always proof-read my code.", 0.75)

In [4]:
comment[0], comment[1]

('Nice Bite! Learned (once again) to always, always, always proof-read my code.',
 0.75)

In [5]:
comment.text, comment.polarity

('Nice Bite! Learned (once again) to always, always, always proof-read my code.',
 0.75)

## deque

In [6]:
lst = list(range(10_000_000))
deq = deque(range(10_000_000))

def insert_and_delete(ds):
    for _ in range(10):
        index = random.choice(range(100))
        ds.remove(index)
        ds.insert(index, index)

In [7]:
%timeit insert_and_delete(lst)

157 ms ± 4.59 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
%timeit insert_and_delete(deq)

18.5 µs ± 404 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [9]:
deq = deque([4, 5, 6], maxlen=3)

In [10]:
for i in range(5):
    deq.append(i)
    print(i, deq)

0 deque([5, 6, 0], maxlen=3)
1 deque([6, 0, 1], maxlen=3)
2 deque([0, 1, 2], maxlen=3)
3 deque([1, 2, 3], maxlen=3)
4 deque([2, 3, 4], maxlen=3)


## ChainMap

In [13]:
cli_args = {}

In [14]:
import os 
try:
    del os.environ["color"]
except KeyError:
    pass

In [15]:
defaults = {'color': 'red'}

In [16]:
combined = ChainMap(cli_args, os.environ, defaults)

In [17]:
combined['color']

'red'

In [18]:
os.environ["color"] = "blue"

In [19]:
combined = ChainMap(cli_args, os.environ, defaults)

In [20]:
combined['color']

'blue'

In [21]:
cli_args = {"color": "green"}

In [22]:
combined = ChainMap(cli_args, os.environ, defaults)

In [23]:
combined['color']

'green'

## Counter

In [24]:
text = """
Once a bounded length deque is full, when new items are added, a corresponding number of items are discarded from the opposite end. Bounded length deques provide functionality similar to the tail filter in Unix. They are also useful for tracking transactions and other pools of data where only the most recent activity is of interest.
"""

In [25]:
cnt = Counter(text.strip().split())

In [26]:
cnt.most_common(10)

[('are', 3),
 ('of', 3),
 ('the', 3),
 ('a', 2),
 ('length', 2),
 ('is', 2),
 ('items', 2),
 ('Once', 1),
 ('bounded', 1),
 ('deque', 1)]

In [27]:
more_text = "Several mathematical operations are provided for combination of numbers"

In [28]:
cnt2 = Counter(more_text.split())

In [29]:
cnt2.most_common()

[('Several', 1),
 ('mathematical', 1),
 ('operations', 1),
 ('are', 1),
 ('provided', 1),
 ('for', 1),
 ('combination', 1),
 ('of', 1),
 ('numbers', 1)]

In [30]:
(cnt + cnt2).most_common(10)

[('are', 4),
 ('of', 4),
 ('the', 3),
 ('a', 2),
 ('length', 2),
 ('is', 2),
 ('items', 2),
 ('for', 2),
 ('Once', 1),
 ('bounded', 1)]

## defaultdict

In [31]:
# docs example
s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]

In [37]:
d = defaultdict(list)

In [38]:
for k, v in s:
    d[k].append(v)

In [39]:
d

defaultdict(list, {'yellow': [1, 3], 'blue': [2, 4], 'red': [1]})

In [40]:
# nested example
def get_num_words_spoken_by_character_per_episode(content):
    """Receives loaded csv content (str) and returns a dict of
       keys=characters and values=Counter object,
       which is a mapping of episode=>words spoken"""
    reader = csv.DictReader(content.splitlines(), delimiter=',')

    # nested collections: https://stackoverflow.com/a/5029958
    words_spoken = defaultdict(lambda: Counter())

    for row in reader:
        episode = row['Episode']
        character = row['Character']
        words = row['Line'].rstrip().split()
        words_spoken[character][episode] += len(words)

    return words_spoken

In [41]:
with open("southpark") as f:
    content = f.read()

In [42]:
len(content)

333506

In [43]:
words = get_num_words_spoken_by_character_per_episode(content)

In [44]:
len(words)

379

In [45]:
from pprint import pprint as pp

pp(words)

defaultdict(<function get_num_words_spoken_by_character_per_episode.<locals>.<lambda> at 0x7f836e59bca0>,
            {'A Boy': Counter({'12': 1}),
             'A Politician': Counter({'14': 2}),
             'A few Congressmen': Counter({'13': 5}),
             'A man': Counter({'4': 7}),
             'A voice': Counter({'14': 5}),
             'Adults': Counter({'7': 7}),
             'Afghan Boys': Counter({'9': 39}),
             'Agent': Counter({'6': 30}),
             'Agent 1': Counter({'11': 6}),
             'Agent 2': Counter({'11': 5}),
             'Agent 3': Counter({'11': 14}),
             'Agent 4': Counter({'11': 18}),
             'Agent 5': Counter({'11': 14}),
             'Agent 6': Counter({'11': 23}),
             'Aide': Counter({'3': 51}),
             'Akmarh': Counter({'9': 157}),
             'All': Counter({'5': 2, '1': 1}),
             'All Three': Counter({'11': 3}),
             'Anchor': Counter({'11': 57}),
             'Anchor Tom': Counter({'1': 8