In [1]:
names = ['Socrates', 'Archimedes', 'Plato', 'Aristotle']
names.sort(key=len)
print(names)

['Plato', 'Socrates', 'Aristotle', 'Archimedes']


In [2]:
def log_missing():
    print('Key added')
    return 0

In [3]:
from collections import defaultdict
current = {'green': 12, 'blue': 3}
increments = [('red', 5),
 ('blue', 17),
 ('orange', 9),
]
result = defaultdict(log_missing, current)
print('Before:', dict(result))
for key, amount in increments:
    result[key] += amount
print('After: ', dict(result))

Before: {'green': 12, 'blue': 3}
Key added
Key added
After:  {'green': 12, 'blue': 20, 'red': 5, 'orange': 9}


In [4]:
result['a']

Key added


0

In [5]:
print('After: ', dict(result))

After:  {'green': 12, 'blue': 20, 'red': 5, 'orange': 9, 'a': 0}


In [7]:
class CountMissing:
    def __init__(self):
        self.added = 0
    def missing(self):
        self.added += 1
        return 0

In [8]:
counter = CountMissing()
result = defaultdict(counter.missing, current) # Method ref
for key, amount in increments:
    result[key] += amount

In [9]:
counter.added

2

In [10]:
class BetterCountMissing:
    def __init__(self):
        self.added = 0
    def __call__(self):
        self.added += 1
        return 0

In [11]:
counter = BetterCountMissing()

In [13]:
counter() == 0

True

In [16]:
counter = BetterCountMissing()
result = defaultdict(counter, current) # Method ref
for key, amount in increments:
    result[key] += amount
counter.added

2

In [19]:
class InputData:
    def read(self):
        raise NotImplementedError

In [20]:
class PathInputData(InputData):
    def __init__(self, path):
        super().__init__()
        self.path = path
    def read(self):
        with open(self.path) as f:
            return f.read()

In [21]:
class Worker:
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = None
    def map(self):
        raise NotImplementedError
    def reduce(self, other):
        raise NotImplementedError

In [22]:
class LineCountWorker(Worker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')
    def reduce(self, other):
        self.result += other.result

In [23]:
import os
def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name))

In [24]:
def create_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers

In [25]:
from threading import Thread


In [26]:
def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()
    first, *rest = workers
    for worker in rest:
        first.reduce(worker)
    return first.result

In [27]:
def mapreduce(data_dir):
    inputs = generate_inputs(data_dir)
    workers = create_workers(inputs)
    return execute(workers)

In [32]:
import os
import random

In [33]:
def write_test_files(tmpdir):
    os.makedirs(tmpdir)
    for i in range(100):
        with open(os.path.join(tmpdir, str(i)), 'w') as f:
            f.write('\n' * random.randint(0, 100))
tmpdir = 'test_inputs'
write_test_files(tmpdir)
result = mapreduce(tmpdir)
print(f'There are {result} lines')

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'test_inputs'

In [None]:
class GenericInputData:
    def read(self):
        raise NotImplementedError
    @classmethod
    def generate_inputs(cls, config):
        raise NotImplementedError

In [36]:
class MyBaseClass:
    def __init__(self, value):
        self.value = value

In [37]:
class TimesTwo:
    def __init__(self):
        self.value *= 2
class PlusFive:
    def __init__(self):
        self.value += 5

In [38]:
class OneWay(MyBaseClass, TimesTwo, PlusFive):
    def __init__(self, value):
        MyBaseClass.__init__(self, value)
        TimesTwo.__init__(self)
        PlusFive.__init__(self)

In [39]:
foo = OneWay(5)
print('First ordering value is (5 * 2) + 5 =', foo.value)

First ordering value is (5 * 2) + 5 = 15


In [40]:
class TimesSevenCorrect(MyBaseClass):
    def __init__(self, value):
        super().__init__(value)
        self.value *= 7
class PlusNineCorrect(MyBaseClass):
    def __init__(self, value):
        super().__init__(value)
        self.value += 9

In [41]:
class GoodWay(TimesSevenCorrect, PlusNineCorrect):
    def __init__(self, value):
        super().__init__(value)
foo = GoodWay(5)
print('Should be 7 * (5 + 9) = 98 and is', foo.value)

Should be 7 * (5 + 9) = 98 and is 98


In [42]:
mro_str = '\n'.join(repr(cls) for cls in GoodWay.mro())
print(mro_str)

<class '__main__.GoodWay'>
<class '__main__.TimesSevenCorrect'>
<class '__main__.PlusNineCorrect'>
<class '__main__.MyBaseClass'>
<class 'object'>


In [46]:
class GoodWay2(PlusNineCorrect, TimesSevenCorrect):
    def __init__(self, value):
        super().__init__(value)
foo2 = GoodWay2(5)
print('Should be 7 * 5 + 9 = 44 and is', foo2.value)

Should be 7 * 5 + 9 = 44 and is 44


In [47]:
mro_str = '\n'.join(repr(cls) for cls in GoodWay2.mro())
print(mro_str)

<class '__main__.GoodWay2'>
<class '__main__.PlusNineCorrect'>
<class '__main__.TimesSevenCorrect'>
<class '__main__.MyBaseClass'>
<class 'object'>


In [None]:
✦ Python’s standard method resolution order (MRO) solves the problems of superclass initialization order and diamond inheritance.
✦ Use the super built-in function with zero arguments to initialize 
parent classes.

In [None]:
The super function can also be called with two parameters: first the 
type of the class whose MRO parent view you’re trying to access, and 
then the instance on which to access that view. Using these optional 
parameters within the constructor looks like this:
class ExplicitTrisect(MyBaseClass):
    def __init__(self, value):
        super(ExplicitTrisect, self).__init__(value)
        self.value /= 3
However, these parameters are not required for object instance initialization. Python’s compiler automatically provides the correct 
parameters (__class__ and self) for you when super is called with 
zero arguments within a class definition. This means all three of 
these usages are equivalent:
class AutomaticTrisect(MyBaseClass):
 def __init__(self, value):
 super(__class__, self).__init__(value)
 self.value /= 3
class ImplicitTrisect(MyBaseClass):
 def __init__(self, value):
 super().__init__(value)
 self.value /= 3

In [48]:
class ToDictMixin:
    def to_dict(self):
        return self._traverse_dict(self.__dict__)
    def _traverse_dict(self, instance_dict):
        output = {}
        for key, value in instance_dict.items():
            output[key] = self._traverse(key, value)
        return output
    def _traverse(self, key, value):
        if isinstance(value, ToDictMixin):
            return value.to_dict()
        elif isinstance(value, dict):
            return self._traverse_dict(value)
        elif isinstance(value, list):
            return [self._traverse(key, i) for i in value]
        elif hasattr(value, '__dict__'):
            return self._traverse_dict(value.__dict__)
        else:
            return value

In [49]:
class BinaryTree(ToDictMixin):
    def __init__(self, value, left=None, right=None):
        self.value = value
        self.left = left
        self.right = right

In [50]:
tree = BinaryTree(10,
 left=BinaryTree(7, right=BinaryTree(9)),
 right=BinaryTree(13, left=BinaryTree(11)))

In [51]:
print(tree.to_dict())

{'value': 10, 'left': {'value': 7, 'left': None, 'right': {'value': 9, 'left': None, 'right': None}}, 'right': {'value': 13, 'left': {'value': 11, 'left': None, 'right': None}, 'right': None}}


In [52]:
class BinaryTreeWithParent(BinaryTree):
    def __init__(self, value, left=None, right=None, parent=None):
        super().__init__(value, left=left, right=right)
        self.parent = parent
    def _traverse(self, key, value):
        if (isinstance(value, BinaryTreeWithParent) and key == 'parent'):
            return value.value # Prevent cycles
        else:
            return super()._traverse(key, value)

In [53]:
root = BinaryTreeWithParent(10)
root.left = BinaryTreeWithParent(7, parent=root)
root.left.right = BinaryTreeWithParent(9, parent=root.left)
print(root.to_dict())

{'value': 10, 'left': {'value': 7, 'left': None, 'right': {'value': 9, 'left': None, 'right': None, 'parent': 7}, 'parent': 10}, 'right': None, 'parent': None}


In [3]:
class MyParentObject:
    def __init__(self):
        self.__private_field = 71
class MyChildObject(MyParentObject):
    def get_private_field(self):
        return self.__private_field
baz = MyChildObject()
baz._MyParentObject__private_field

71

In [4]:
print(baz.__dict__)

{'_MyParentObject__private_field': 71}


In [5]:
dir(baz)

['_MyParentObject__private_field',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'get_private_field']

In [6]:
class FrequencyList(list):
    def __init__(self, members):
        super().__init__(members)
    def frequency(self):
        counts = {}
        for item in self:
            counts[item] = counts.get(item, 0) + 1
        return counts

In [7]:
foo = FrequencyList(['a', 'b', 'a', 'c', 'b', 'a', 'd'])
print('Length is', len(foo))

Length is 7


In [8]:
foo.pop()
print('After pop:', repr(foo))
print('Frequency:', foo.frequency())

After pop: ['a', 'b', 'a', 'c', 'b', 'a']
Frequency: {'a': 3, 'b': 2, 'c': 1}


In [9]:
from collections.abc import Sequence

In [10]:
class BadType(Sequence):
    pass

In [11]:
class BinaryNode:
    def __init__(self, value, left=None, right=None):
        self.value = value
        self.left = left
        self.right = right

In [13]:
class IndexableNode(BinaryNode):
    def _traverse(self):
        if self.left is not None:
            yield from self.left._traverse()
        yield self
        if self.right is not None:
            yield from self.right._traverse()
    def __getitem__(self, index):
        for i, item in enumerate(self._traverse()):
            if i == index:
                return item.value
        raise IndexError(f'Index {index} is out of range')

In [14]:
class SequenceNode(IndexableNode):
    def __len__(self):
        for count, _ in enumerate(self._traverse(), 1):
            pass
        return count

In [15]:
class BadType(Sequence):
    pass
foo = BadType()

TypeError: Can't instantiate abstract class BadType with abstract methods __getitem__, __len__

In [16]:
class BetterNode(SequenceNode, Sequence):
    pass

In [17]:
tree = BetterNode(
 10,
 left=BetterNode(
 5,
 left=BetterNode(2),
 right=BetterNode(
 6,
 right=BetterNode(7))),
 right=BetterNode(
 15,
 left=BetterNode(11))
)
print('Index of 7 is', tree.index(7))
print('Count of 10 is', tree.count(10))

Index of 7 is 3
Count of 10 is 1
