In [2]:
# The initialization of multidict can be messy, especially the first value, for example, you might have code that looks like this:
d = {}
for key, value in {}:
    if key not in d:
        d[key] = []
    else:
        d[key].append(value)


# so that you can use defaultdict that it automatically initializes the first value so you can samply focus on adding items, this lead to much cleaner code
from collections import defaultdict

d = defaultdict(list)
d['a'].append(1)
d['a'].append(1)
d['b'].append(2)
print(f'multidict dict[str->list]: {d}')

s = defaultdict(set)
s['a'].add(1)
s['a'].add(1)
s['b'].add(2)
print(f'multidict dict[str->set]: {s}')



multidict dict[str->list]: defaultdict(<class 'list'>, {'a': [1, 1], 'b': [2]})
multidict dict[str->set]: defaultdict(<class 'set'>, {'a': {1}, 'b': {2}})


In [3]:
# To control the order of items in a dictionary, you can use OrderedDict from the collection module.
from collections import OrderedDict

d = OrderedDict()
d['foo'] = 1
d['abb'] = 2
d['zoo'] = -1
for key in d:
    print(key, d[key])

# An OrderedDict internally maintains a doubly linked list that orders the key according to insertion order.
# Be aware of that the size of an OrderedDict is more than twice as large as a normal dictionary due to the extra linked list that's created

foo 1
abb 2
zoo -1


In [4]:
# You want to perform various calculations on a dictionary of data
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
# zip() invert the dictionary to a sequence of (value, key) pairs. 
import json
dict_name_to_price = zip(prices.values(), prices.keys())

# when performing comparison on such tuples(value, key), the value element is compared first, followed by the key
min_price=min(dict_name_to_price)
min_price


(10.75, 'FB')

In [1]:
# Finding Commonalities in Two Dictionaries
a = {
    'x': 1,
    'y': 2,
    'z': 3,
    'zz': 3
}

b = {
    'w': 10,
    'x': 11,
    'y': 2 
}
# Find keys in common
print(a.keys() & b.keys())

# Find keys in a that are not in b
print(a.keys() - b.keys())

# find (key, value) pairs in common
print(a.items() & b.items())

# the values() method of a dictionary does not support the set operations. 
# In part, this is due to the fact that unlike keys(), the values() can not be guaranteed to be unique.
# However, if you must perform such calculations, they can be accomplished by simply converting the values to a set first
print(set(a.values()) & set(b.values()))

{'y', 'x'}
{'z', 'zz'}
{('y', 2)}
{2}


In [33]:
# Sorting a List of Dictionaries by a Common Key
rows = [
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004},
    {'fname': 'Big', 'lname': 'Big', 'uid': 1005},
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1005},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
]

from operator import itemgetter

# The sorted function accepts a keyword argument key.This argument is expected to be a callable that 
# accept a single item from rows as input And returns a value that will be used as the basis for sorting.
# the itemgetter() function create just such a callable
sort_argument = ['fname']
sorted_rows = sorted(rows, key=itemgetter(*sort_argument))
print(f'rows_by_fname: {[k[sort_argument[0]] for k in sorted_rows]}')

sort_argument = ['uid']
sorted_rows = sorted(rows, key=itemgetter(*sort_argument))
print(f'rows_by_uid: {[k[sort_argument[0]] for k in sorted_rows]}')

# if you give a multiple indices to itemgetter(), the callable it  produce will return a tuple.
sort_argument = ['fname', 'lname']
sorted_rows = sorted(rows, key=itemgetter(*sort_argument))
print(f'rows_by_fname_lname: {[(k[sort_argument[0]],k[sort_argument[1]]) for k in sorted_rows]}')

# the functionality of itemgetter() is sometimes replaced by lamda expressions.
sorted_rows = sorted(rows, key=lambda r: r["fname"])
print(f'rows_by_fname_lambda: {sorted_rows}')

# the technique shown in this recipe can be applied to functions such as min(), max()
print(f'min: {min(rows, key=itemgetter("uid"))}')


rows_by_fname: ['Big', 'Big', 'Brian', 'David', 'John']
rows_by_uid: [1001, 1002, 1004, 1005, 1005]
rows_by_fname_lname: [('Big', 'Big'), ('Big', 'Jones'), ('Brian', 'Jones'), ('David', 'Beazley'), ('John', 'Cleese')]
rows_by_fname_lambda: [{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Big', 'lname': 'Big', 'uid': 1005}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1005}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
min: {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}


In [37]:
# objects sort
class User:
    def __init__(self, user_id):
        self.user_id = user_id
    def __repr__(self):
        return f'User({self.user_id})'
users = [User(23), User(3), User(99)]
sorted_user = sorted(users, key=lambda u: u.user_id)
print(sorted_user)

# Instand of using lambda, an alternative approach is to use operator.attrgetter()
from operator import attrgetter
sorted_user_2 = sorted(users, key=attrgetter('user_id'), reverse=1)
print(sorted_user_2)


[User(3), User(23), User(99)]
[User(99), User(23), User(3)]


In [50]:
# Grouping records together based on a field
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

from operator import itemgetter
from itertools import groupby

# sort by the desired field first
rows.sort(key=itemgetter('date'))

# Iterate in group
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    print([f'{item}' for item in items])

07/01/2012
["{'address': '5412 N CLARK', 'date': '07/01/2012'}"]
07/04/2012
["{'address': '5148 N CLARK', 'date': '07/04/2012'}"]
07/02/2012
["{'address': '5800 E 58TH', 'date': '07/02/2012'}"]
07/03/2012
["{'address': '2122 N CLARK', 'date': '07/03/2012'}"]
07/02/2012
["{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}", "{'address': '1060 W ADDISON', 'date': '07/02/2012'}"]
07/01/2012
["{'address': '4801 N BROADWAY', 'date': '07/01/2012'}"]
07/04/2012
["{'address': '1039 W GRANVILLE', 'date': '07/04/2012'}"]


In [58]:
# Filtering sequence elements
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
greater_zero = [n for n in mylist if n>0]
print(greater_zero)

# one potensial downside of using a list comperhension is that it may produce a large result if the original iput is large. 
# if this is a concern, you can use generator expressions to produce the filtered values
pos = (n for n in mylist if n>0)
for x in pos:
    print(x)

[1, 4, 10, 2, 3]
1
4
10
2
3


In [68]:
# access the element by name in a sequence elements
from collections import namedtuple

# namedtuple return a subclass of the standard Python tuple type
Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('sfx', '2021')

# unpacking by index
join, add = sub

# unpacking by name
join, add = sub.joined, sub.addr

# namedtuple is immutable. if you need to change any of the attributes, it can be done usong
# the _replace() method of a namedtuple, which makes an entirely new namedtuple
# convert dict to tuple, use _replace method. 
Stock = namedtuple('Stock', ['name', 'shares'])
stock_prototype = Stock("", 0)
def dict_to_stock(s):
    return stock_prototype._replace(**s)
a = {"name": 'sfx', "shares": 888}
dict_to_stock(a)

Stock(name='sfx', shares=888)