# collections module


In [None]:
# some utilities

import pprint

from data_utils import get_article_items

pp = pprint.PrettyPrinter(indent=4)

# OrderedDict

In [None]:
# A type of dictionary that keeps track of the order of the keys as they are added.

# dictionaries in python <=3.5, in python 3.6 and up order is kept by default using just `dict`

data = {'b': 2, 'a': 1, 'c':3}
for key in data:
    print(key)

In [None]:
from collections import OrderedDict

# now with an OrderedDict
o_dict = OrderedDict(sorted(data.items()))

for key in o_dict:
    print(key)

# defaultdict

In [None]:
from collections import defaultdict

# The defaultdict is a subclass of Python’s dict that accepts a default_factory as its primary argument.

abstract = get_article_items()[0]['abstract']['content'][0]['text']
words = abstract.split(' ')

In [None]:
# with normal dict

reg_dict = {}
for word in words:
    if word in reg_dict:
        reg_dict[word] += 1
    else:
        reg_dict[word] = 1

print(reg_dict)

In [None]:
# now with a defaultdict

d_dict = defaultdict(int)
for word in words:
    d_dict[word] += 1
    
print(d_dict)

# namedtuple

In [34]:
from collections import namedtuple

# factory function for creating tuple subclasses with named fields

data = get_article_items()[0]

Article = namedtuple('Article', ['title', 'doi'])

article = Article(title=data['title'], doi=data['doi'])

print(Article)
print('title: ', article.title)
print('doi: ', article.doi)

<class '__main__.Article'>
title:  Activation of Toll-like receptors nucleates assembly of the MyDDosome signaling hub
doi:  10.7554/eLife.31377


# ChainMap

In [None]:
from collections import ChainMap

# A ChainMap is a class that provides the ability to link multiple mappings together 
# such that they end up being a single unit.

base_config = {'ip': '127.0.0.1', 'port': 8080, 'proc_name': 'base', 'threading': False}
custom_config = {'proc_name': 'custom', 'port': 8081}
os_config = {'threading': True}

# overrides values from left >>> right
CONFIG = ChainMap(custom_config, os_config, base_config)

for k, v in CONFIG.items():
    print('{0}: {1}'.format(k, v))

# Counter

In [None]:
# convenient and fast tallies

from collections import Counter

"""
Small task to grab some article abstracts, find the 20 most common words used.
"""

# get a list of abstract texts
abstracts = [a['abstract']['content'][0]['text'] for a in get_article_items()]

# join them all up into one string and then seperate each word by space into a list
# e.g. ['The', 'brain', 'needs', 'to', 'predict', 'how', 'the', 'body', 'reacts', ...]
all_words = ' '.join(abstracts).split(' ')


# a possible way to get the count using just dict
result = {}
for word in all_words:
    if result.get(word): 
        result[word] += 1
    else: 
        result[word] = 1

print(result)
# now you have to find the 20 most common 
# ??...

In [None]:
# an easier way using Counter()
result = Counter(all_words)
print(result.most_common(20))