# Chapter 17. Iterators, Generators, and Classic Coroutines

## A Sequence of Words

You give its constructor a string with some text, and then you can iterate word by word.

In [None]:
# sentence.py

import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:

  def __init__(self, text):
    self.text = text
    self.words = RE_WORD.findall(text)

  def __getitem__(self, index):
    return self.words[index]

  def __len__(self):
    return len(self.words)

  def __repr__(self):
    return 'Sentence(%s)' % reprlib.repr(self.text)



In [None]:
s = Sentence('"The time has come," the Walrus said,')
s

Sentence('"The time ha... Walrus said,')

In [None]:
for word in s:
  print(word)

The
time
has
come
the
Walrus
said


In [None]:
list(s)

['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']

In the following sections, we'll develop other `Sentence` classes that pass the tests in above example.

In [None]:
s[0]

'The'

In [None]:
s[5]

'Walrus'

In [None]:
s[-1]

'said'

## Why Sequences Are Iterable: The `iter` function

Whenever Python needs to iterate over an object `x`, it automatically calls `iter(x)`

The `iter` built-in fcn:
 1. Checks whether the object implements `__iter__` and calls that to obtain an iterator
 2. Otherwise, but if `__getitem__` is implemented, then `iter()` creates an iterator that tries to fetch items by index, starting from 0.
 3. If that fails, Python raises `TypeError`, usually saying 'C' object is not iterable, where C is the class of the target obj.

In [None]:
class Spam:
  def __getitem__(self, i):
    print('->', i)
    raise IndexError()

In [None]:
spam_can = Spam()
iter(spam_can)

<iterator at 0x7efb25334850>

In [None]:
list(spam_can)

-> 0


[]

In goose typing approach, the defintion for an iterable is simpler but not as flexible: an obj is considered iterable if it implements the `__iter__` method.

In [None]:
from collections import abc
isinstance(spam_can, abc.Iterable)

False

### Using `iter` with Callable

We can call `iter` with two arguments to create an iterator from a fcn or any callable obj. In this usage, the first argument must be a callable to be invoked repeatedly to produce values, and the second argument is a `sentinel`

In [None]:
from random import randint

def d6():
  return randint(1, 6)

In [None]:
d6_iter = iter(d6, 1) #

In [None]:
d6_iter

<callable_iterator at 0x7efb25334160>

In [None]:
for roll in d6_iter:
  print(roll)

3
2
2
2
2
2
2
5
3
6
5
4
4


Note that the `iter` function here returns a `callable_iterator`. As usual with iterators, the `d6_iter` object becomes useless once exhausted.

In [None]:
from functools import partial

with open('mydata.db', 'rb') as f:
  read64 = partial(f.read, 64)
  for block in iter(read64, b''):
    process_block(block)

## Iterables Verses Iterators

> *iterable* \
Any object from which the `iter` built-in fcn can obtain an iterator. Objects implementing an `__iter__` method returning an iterator are iterable. Sequences are always iterable, as are objects implementing a `__getitem__` method that accepts 0-based indexes


Python obtains iterator from iterables.

In [None]:
s = 'ABC' # iterable
for char in s: # iterator behind the scene
  print(char)

A
B
C


In [None]:
s = 'ABC'
it = iter(s)
while True:
  try:
    print(next(it))
  except StopIteration:
    del it # release ref to `it`
    break

A
B
C


Python's standard interface for an iterator has two methods:
 -`__next__` return the next item in the series, raising `StopIteration` if there are no more.
 -`__iter__` return self; this allows iterators to be used where an iterable is expected, for example in a `for` loop

In [None]:
s = "ABC"
for char in s:
  print(char)

A
B
C


In [None]:
s = iter("ABC")
for char in s: # iter(s) == s
  print(char)

A
B
C


In [None]:
iter(s) is s

True

In [None]:
s3 = Sentence('Life of Brain')
it = iter(s3)

In [None]:
it

<iterator at 0x7efb53f98ee0>

In [None]:
next(it)

'Life'

In [None]:
next(it)

'of'

In [None]:
next(it)

'Brain'

In [None]:
next(it)

StopIteration: 

In [None]:
list(it) # Once exhausted, an iterable will always raise `StopIteration`

[]

In [None]:
list(iter(s3))

['Life', 'of', 'Brain']

`Sentence` implemented using the Iterator pattern

In [None]:
# sentence_iter.py


import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:

  def __init__(self, text):
    self.text = text
    self.words = RE_WORD.findall(text)

  def __repr__(self):
    return 'Sentence(%s)' % reprlib.repr(self.text)

  def __iter__(self):
    return SentenceIterator(self.words)

class SentenceIterator:

  def __init__(self, words):
    self.words = words
    self.index = 0

  def __next__(self):
    try:
      word = self.words[self.index]
    except IndexError:
      raise StopIteration()
    self.index += 1
    return word

  def __iter__(self):
    return self

In [None]:
s = Sentence('"The time has come," the Walrus said,')

In [None]:
s

Sentence('"The time ha... Walrus said,')

In [None]:
for word in s:
  print(word)

The
time
has
come
the
Walrus
said


In [None]:
list(s)

['The', 'time', 'has', 'come', 'the', 'Walrus', 'said']

In [None]:
isinstance(s, abc.Iterable)

True

### **Don't Make the Iterable an Iterator for Itself**

i.e., Don't implement `__next__` in addition to `__iter__` in the `Sentence` class.

Use the Iterator pattern
 - to access an aggregate obj's contents w/o exposing its internal repr.
 - to support multiple traversals of aggregated objs.
 - to provide a uniform interface for traversing different aggregate structures

To "support multiple traversals" it must be possible to obtain multiple indep. iterators form teh same iterable instance.

In [None]:
# sentence_gen.py


import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:

  def __init__(self, text):
    self.text = text
    self.words = RE_WORD.findall(text)

  def __repr__(self):
    return 'Sentence(%s)' % reprlib.repr(self.text)

  def __iter__(self):
    for word in self.words:
      yield word
    # explicit return is not necessary

Now the iterator in the above example is in fact a generator object, built automatically when the `__iter__` method is called, because `__iter__` here is a generator function.

## How a Generator Works

Any Python fcn that has the `yield` keyword in its body is a generator function: a function which, when called, returns a generator obj (i.e. generator factory)

In [None]:
def gen_123():
  yield 1
  yield 2
  yield 3

In [None]:
gen_123

Generator objects implement the `Iterator` interface, so they are also iterable.

In [None]:
gen_123()

<generator object gen_123 at 0x7efb25289c40>

In [None]:
for i in gen_123():
  print(i)

1
2
3


In [None]:
isinstance(g, abc.Iterator)

True

In [None]:
g = gen_123()
next(g)

1

In [None]:
next(g)

2

In [None]:
next(g)

3

In [None]:
next(g)

StopIteration: 

In [None]:
def gen_AB():
  print('start')
  yield 'A'
  print('continue')
  yield 'B'
  print('end.')


To iterate, `for` machinery does the equivalent of `g = iter(gen_AB())` to get a generator object, and then `next(g)` at each iteration

In [None]:
for c in gen_AB():
  print('-->', c)

# expected:
# start
# --> A
# continue
# --> B
# end


start
--> A
continue
--> B
end.


## Lazy Sentences

`Iterator` interface is designed to be lazy: `next(my_iterator)` yields one item at a time. The opposite of lazy is eager.

Our `Sentence` implementation has not been lazy because the `__init__` eagerly builds a list of all words in the text, binding it to the `self.words` attributes.

In [None]:
# sentence_gen2.py


import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:

  def __init__(self, text):
    self.text = text
    # self.words = RE_WORD.findall(text) - need to process all words


  def __repr__(self):
    return 'Sentence(%s)' % reprlib.repr(self.text)

  def __iter__(self):
    # finditer builds an iterator over the matches of
    # RE_WORD on self.text, yielding MatchObject instances
    for matched in RE_WORD.finditer(self.text):
      # match.group() extracts the matched text from
      # the MatchObject instance
      yield matched.group()

### Lazy Generator Expression

In [None]:
# generator function
def gen_AB():
  print('start')
  yield 'A'
  print("continue")
  yield 'B'
  print('end')

In [None]:
res1 = [x * 3 for x in gen_AB()]

start
continue
end


In [None]:
for i in res1:
  print('-->', i)

--> AAA
--> BBB


In [None]:
res2 = (x*3 for x in gen_AB())
res2

<generator object <genexpr> at 0x78fb866f3680>

In [None]:
res2

<generator object <genexpr> at 0x78fb866f3680>

In [None]:
for i in res2:
  print('-->', i)

start
--> AAA
continue
--> BBB
end


In [None]:
# sentence_genexp.py

import re
import reprlib

RE_WORD = re.compile(r'\w+')

class Sentence:

  def __init__(self, text):
    self.text = text
    # self.words = RE_WORD.findall(text) - need to process all words


  def __repr__(self):
    return 'Sentence(%s)' % reprlib.repr(self.text)

  def __iter__(self):
    # finditer builds an iterator over the matches of
    # RE_WORD on self.text, yielding MatchObject instances
    return (matched.group() for matched in RE_WORD.finditer(self.text))

# When to Use Generator Expressions

*iterator*
 - General term for any object that implements `__next__` method. Iterators are designed to prodcue data that is consumed by the client code, i.e., the code that drives the iterator via a `for` loop or other iterative feature, or by explicitly calling `next(it)` on the iterator. In practice, most iterators we use in Python are *generators*.

*generator*
 - An iterator built by the Python compiler. To create a generator, we don't implement `__next__` method. instead, we use the `yield` keyword to make a *generator* function, which is a factory of *generator objects*. A *generator expression* is another way to build a generator object. Generator objects provide *__next__*, so they are iterators.

In [None]:
def g(): # generator fcn
  yield 0

In [None]:
g() # generator obj (iterator) created by generator fcn

<generator object g at 0x78fb865a5770>

In [None]:
ge = (c for c in 'XYZ') # generator exp builds a generator obj

In [None]:
ge

<generator object <genexpr> at 0x78fb865a6ea0>

In [None]:
type(g()), type(ge)

(generator, generator)

## An Arithmetic Progrssion Generator

In [None]:
class ArithmeticProgression:
  def __init__(self, begin, step, end=None):
    self.begin = begin
    self.step = step
    self.end = end # None -> infinite series

  def __iter__(self):
    result_type = type(self.begin + self.step)
    result = result_type(self.begin)
    forever = self.end is None

    index = 0

    while forever or result < self.end:
      yield result
      index += 1
      # Why not adding cumulatively? numerical stability
      result = self.begin + self.step * index

In [None]:
ap = ArithmeticProgression(0, 1, 3)

In [None]:
list(ap)

[0, 1, 2]

In [None]:
ap = ArithmeticProgression(0, 0.5, 3)
list(ap)

[0.0, 0.5, 1.0, 1.5, 2.0, 2.5]

In [None]:
ap = ArithmeticProgression(0, 1/3, 1)
list(ap)

[0.0, 0.3333333333333333, 0.6666666666666666]

In [None]:
from fractions import Fraction
ap = ArithmeticProgression(0, Fraction(1,3), 1)
list(ap)

[Fraction(0, 1), Fraction(1, 3), Fraction(2, 3)]

In [None]:
from decimal import Decimal
ap = ArithmeticProgression(0, Decimal('.1'), .3)
list(ap)

[Decimal('0'), Decimal('0.1'), Decimal('0.2')]

In [None]:
100 * 1.1

110.00000000000001

In [None]:
sum(1.1 for _ in range(100))

109.99999999999982

In [None]:
1000 * 1.1

1100.0

In [None]:
sum(1.1 for _ in range(1000))

1100.0000000000086

If the whole point of a class is to build a generator by implementing `__iter__`, we can replace the class with a generator function. A generator function is, after all, a generator factory.

In [None]:
def aritporg_gen(begin, step, end=None):
  result_type = type(begin + step)
  result = result_type(begin)
  forever = end is None

  index = 0

  while forever or result < end:
    yield result
    index += 1
    # Why not adding cumulatively? numerical stability
    result = begin + step * index

But remember! There are plenty of ready-to-uses generators in the standard library

### Arithemetic Progression with itertools

In [None]:
import itertools

gen = itertools.count(1, .5) # return a generater that yields numbers

In [None]:
next(gen)

1

In [None]:
next(gen)

1.5

`itertools.takewhile` returns a generator that consumes another generator and stops when a given predicate evalutates to `False`

In [None]:
gen = itertools.takewhile(lambda n: n < 3, itertools.count(1, .5))

In [None]:
list(gen)

[1, 1.5, 2.0, 2.5]

In [None]:
import itertools

def aritprog_gen(begin, step, end=None):
  first = type(begin + step)(begin)
  ap_gen = itertools.count(first, step)
  if end is None:
    return ap_gen
  return itertools.takewhile(lambda n: n < end, ap_gen)

Note that aritprog_gen is not a generator function: it has no `yield` in its body. But it returns a generator, just as a generator function does.

## Generator Functions in the Standard Library

### 1. Filtering Generator Functions
They yield a subset of items produced by the input iterable, without changing the items themselves.

In [None]:
def vowel(c):
  return c.lower() in 'aeiou'

In [None]:
list(filter(vowel, 'Suwon'))

['u', 'o']

In [None]:
import itertools
list(itertools.filterfalse(vowel, 'Suwon'))

['S', 'w', 'n']

In [None]:
# 'A' successful
# 'a' successful
# 'r' fails -> so yields starting from 'r'
list(itertools.dropwhile(vowel, 'Aardvark'))

['r', 'd', 'v', 'a', 'r', 'k']

In [None]:
# 'A' successful -> yield
# 'a' successful -> yield
# 'r' fails -> terminated
list(itertools.takewhile(vowel, 'Aardvark'))

['A', 'a']

In [None]:
# Expected: Arda
list(itertools.compress('Aardvark', (1, 0, 1, 1, 0, 1)))

['A', 'r', 'd', 'a']

In [None]:
list(itertools.islice('Aardvark', 4))

['A', 'a', 'r', 'd']

In [None]:
list(itertools.islice('Aardvark', 4, 7))

['v', 'a', 'r']

In [None]:
list(itertools.islice('Aardvark', 1, 7, 2))

['a', 'd', 'a']

### 2. Mapping Generators

They yield items computed from each individual item in the input iterable in the case of `map` and `starmap`.

In [None]:
# itertools.accumulate
sample = [5, 4, 2, 8, 7, 6, 3, 0, 9, 1]
import itertools

list(itertools.accumulate(sample))

[5, 9, 11, 19, 26, 32, 35, 35, 44, 45]

In [None]:
list(itertools.accumulate(sample, min))

[5, 4, 2, 2, 2, 2, 2, 0, 0, 0]

In [None]:
list(itertools.accumulate(sample, max))

[5, 5, 5, 8, 8, 8, 8, 8, 9, 9]

In [None]:
import operator

In [None]:
list(itertools.accumulate(sample, operator.mul))

[5, 20, 40, 320, 2240, 13440, 40320, 0, 0, 0]

In [None]:
list(itertools.accumulate(range(1, 11), operator.mul))

[1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]

In [None]:
# mapping generator fcn examples

list(enumerate('albatroz', 1))

[(1, 'a'),
 (2, 'l'),
 (3, 'b'),
 (4, 'a'),
 (5, 't'),
 (6, 'r'),
 (7, 'o'),
 (8, 'z')]

In [None]:
import operator
list(map(operator.mul, range(11), range(11)))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [None]:
list(map(operator.mul, range(11), [2, 4, 8]))

[0, 4, 16]

In [None]:
list(map(lambda a, b: (a, b), range(11), [2, 4, 8]))

[(0, 2), (1, 4), (2, 8)]

In [None]:
import itertools

In [None]:

list(itertools.starmap(operator.mul, enumerate('albatroz', 1)))

['a', 'll', 'bbb', 'aaaa', 'ttttt', 'rrrrrr', 'ooooooo', 'zzzzzzzz']

In [None]:
sample = [5, 4, 2, 8, 7, 6, 3, 0, 9, 1]
# 5 / 1, 9 / 2, 11 / 3, ...
# Running averages - nice trick
list(itertools.starmap(lambda a, b: b / a,
                       enumerate(itertools.accumulate(sample), 1)))

[5.0,
 4.5,
 3.6666666666666665,
 4.75,
 5.2,
 5.333333333333333,
 5.0,
 4.375,
 4.888888888888889,
 4.5]

### 3. Merging Generators

All of these yield items from multiple input iterables. `chain` and `chain.from_iterable` consume the input iterables sequntially, while `produce`, `zip`, and `zip_longest` consume the input iterables in parallel

In [None]:
list(itertools.chain('ABC', range(2)))

['A', 'B', 'C', 0, 1]

In [None]:
# `chain` does nothing useful when called with a single iterable
list(itertools.chain(enumerate('ABC')))

[(0, 'A'), (1, 'B'), (2, 'C')]

In [None]:
list(itertools.chain.from_iterable(enumerate('ABC')))

[0, 'A', 1, 'B', 2, 'C']

In [None]:
list(zip('ABC', range(5), [10, 20, 30, 40]))

[('A', 0, 10), ('B', 1, 20), ('C', 2, 30)]

In [None]:
list(itertools.zip_longest('ABC', range(5)))

[('A', 0), ('B', 1), ('C', 2), (None, 3), (None, 4)]

In [None]:
list(itertools.zip_longest('ABC', range(5), fillvalue='?'))

[('A', 0), ('B', 1), ('C', 2), ('?', 3), ('?', 4)]

The `itertools.product` generator is a lazy way of computing Cartesian products, which we built using list comprehensions with more than one `for` clause in "Cartesian Products"

In [None]:
list(itertools.product('ABC', range(2)))

[('A', 0), ('A', 1), ('B', 0), ('B', 1), ('C', 0), ('C', 1)]

In [None]:
suits = 'spades hearts diamonds clubs'.split()
list(itertools.product('AK', suits))

[('A', 'spades'),
 ('A', 'hearts'),
 ('A', 'diamonds'),
 ('A', 'clubs'),
 ('K', 'spades'),
 ('K', 'hearts'),
 ('K', 'diamonds'),
 ('K', 'clubs')]

In [None]:
list(itertools.product('ABC', repeat=2))

[('A', 'A'),
 ('A', 'B'),
 ('A', 'C'),
 ('B', 'A'),
 ('B', 'B'),
 ('B', 'C'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'C')]

In [None]:
list(itertools.product(range(2), repeat=3))

[(0, 0, 0),
 (0, 0, 1),
 (0, 1, 0),
 (0, 1, 1),
 (1, 0, 0),
 (1, 0, 1),
 (1, 1, 0),
 (1, 1, 1)]

In [None]:
rows = itertools.product('AB', range(2), repeat=2)

In [None]:
for row in rows: print(row)

### 4. Expanding the input by yielding more than one value per input item

In [None]:
ct = itertools.count()

In [None]:
next(ct)

0

In [None]:
next(ct), next(ct), next(ct)

(1, 2, 3)

In [None]:
list(itertools.islice(itertools.count(1, .3), 3))

[1, 1.3, 1.6]

In [None]:
cy = itertools.cycle('ABC')
next(cy)

'A'

In [None]:
list(itertools.islice(cy, 7))

['B', 'C', 'A', 'B', 'C', 'A', 'B']

In [None]:
list(itertools.pairwise(range(7)))

[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)]

In [None]:
rp = itertools.repeat(7)
next(rp), next(rp)

(7, 7)

In [None]:
list(itertools.repeat(8, 4))

[8, 8, 8, 8]

In [None]:
list(map(operator.mul, range(11), itertools.repeat(5)))

[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

The `combinations`, `combinations_with_replacement` and `permutations` generator functions are called the combinatroics generators in the `itertools` documentation page.

In [None]:
list(itertools.combinations('ABC', 2))

[('A', 'B'), ('A', 'C'), ('B', 'C')]

In [None]:
list(itertools.combinations_with_replacement('ABC', 2))

[('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]

In [None]:
list(itertools.permutations('ABC', 2))

[('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]

In [None]:
list(itertools.product('ABC', repeat=2))

[('A', 'A'),
 ('A', 'B'),
 ('A', 'C'),
 ('B', 'A'),
 ('B', 'B'),
 ('B', 'C'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'C')]

## 5. Generator Functions designed to yiled all items in the input iterables but rearranged in some way

 1. `itertools.groupby`
 2. `itertools.tee`

In [None]:
# itertools.groupby
list(itertools.groupby('LLLAAGGG'))

[('L', <itertools._grouper at 0x78ed8b799e70>),
 ('A', <itertools._grouper at 0x78ed8b79ac80>),
 ('G', <itertools._grouper at 0x78ed8b79be80>)]

In [None]:
for char, group in itertools.groupby('LLLLAAAGG'):
  print(char, '->', list(group))

L -> ['L', 'L', 'L', 'L']
A -> ['A', 'A', 'A']
G -> ['G', 'G']


In [None]:
animals = ['duck', 'eagle', 'rat', 'giraffe', 'bear',
           'bat', 'dolphin', 'shark', 'lion']
animals.sort(key=len)

In [None]:
animals

['rat', 'bat', 'duck', 'bear', 'lion', 'eagle', 'shark', 'giraffe', 'dolphin']

In [None]:
for length, group in itertools.groupby(animals, len):
  print(length, '->', list(group))

3 -> ['rat', 'bat']
4 -> ['duck', 'bear', 'lion']
5 -> ['eagle', 'shark']
7 -> ['giraffe', 'dolphin']


In [None]:
for l, group in itertools.groupby(reversed(animals), len):
  print(l, '->', list(group))

7 -> ['dolphin', 'giraffe']
5 -> ['shark', 'eagle']
4 -> ['lion', 'bear', 'duck']
3 -> ['bat', 'rat']


`iterator.tee` - unique behavior: it yields multiple generators from a single input iterable, each yielding every item from the input

## Iterable Reducing Functions

In [None]:
all([1, 2, 3])

True

In [None]:
all([1, 0, 3])

False

In [None]:
all([])

True

In [None]:
any([1, 2, 3])

True

In [None]:
any([1, 0, 3])

True

In [None]:
any([0, 0, 0])

False

In [None]:
any([])

False

In [None]:
g = (n for n in [0, 0.0, 7, 8]) # generator exp
any(g) # any iterated over g until g yielded 7

True

In [None]:
next(g) # That's why 8 was still remaining

8

Another built-in that takes an iterable and returns something else is `sorted`. Unlike `reversed`, which is a generator function, `sorted` builds and returns a new `list`.

## Subgenerators with yield from

The `yield from` expression syntax was introduced to allow a generator to delegate work to a subgenerator.

Before `yield from` was introduced, we used a `for` loop when a generator needed to yield values produced from another generator

In [None]:
def sub_gen(): # subgenerator
  yield 1.1
  yield 1.2

def gen(): # delegating generator
  yield 1
  for i in sub_gen(): # client code
    yield i
  yield 2

for x in gen():
  print(x)

1
1.1
1.2
2


In [None]:
def sub_gen():
  yield 1.1
  yield 1.2

def gen():
  yield 1
  yield from sub_gen()
  yield 2

In [None]:
for x in gen():
  print(x)

1
1.1
1.2
2


In [None]:
def sub_gen():
  yield 1.1
  yield 1.2
  return 'Done!'

def gen():
  yield 1
  result = yield from sub_gen()
  print('<--', result)
  yield 2

In [None]:
for x in gen():
  print(x)

1
1.1
1.2
<-- Done!
2


### Reinventing chian: A simple but practical examples of `yield from`



In [None]:
def chain(*iterables):
  for it in iterables:
    for i in it:
      yield i

s = 'ABC'
r = range(3)
list(chain(s, r))

['A', 'B', 'C', 0, 1, 2]

In [None]:
def chain(*iterables):
  for it in iterables:
    yield from it

s = 'ABC'
r = range(3)
list(chain(s, r))

['A', 'B', 'C', 0, 1, 2]

### Traversing a Tree

a script to traverse a tree structure


In [None]:
def tree(cls):
  yield cls.__name__

def display(cls):
  for cls_name in tree(cls):
    print(cls_name)

if __name__ == '__main__':
  display(BaseException)

BaseException


In [None]:
# tree/step1/tree.py
def tree(cls, level=0):
  yield cls.__name__, level
  for subcls in cls.__subclasses__():
    yield from tree(subcls, level + 1)
    # yield sub_cls.__name__, 1

def display(cls):
  for cls_name, level in tree(cls):
    indent = ' ' * 4 * level
    print(f"{indent}{cls_name}")

if __name__ == '__main__':
  display(BaseException)

BaseException
    Exception
        TypeError
            MultipartConversionError
            FloatOperation
            DTypePromotionError
            UFuncTypeError
                UFuncTypeError
                    UFuncTypeError
                UFuncTypeError
                    UFuncTypeError
                    UFuncTypeError
            ConversionError
            StreamConsumedError
            InvalidType
            ApplyTypeError
            ArrowTypeError
            TqdmTypeError
        StopAsyncIteration
        StopIteration
        ImportError
            ModuleNotFoundError
                PackageNotFoundError
            ZipImportError
        OSError
            ConnectionError
                BrokenPipeError
                ConnectionAbortedError
                ConnectionRefusedError
                ConnectionResetError
                    RemoteDisconnected
            BlockingIOError
            ChildProcessError
            FileExistsError
            FileNot

## Generic Iterable Types

In [1]:
# replacer.py returns an iterator of tuples of strings

from collections.abc import Iterable
from typing import TypeAlias

FromTo : TypeAlias = tuple[str, str] # define type alias

def zip_replace(text: str, changes: Iterable[FromTo]) -> str:
  for from_, to in changes:
    text = text.replace(from_, to)
  return text

`Iterator` types don't appear as often as `Iterable` types, but they are also simple to write.

In [2]:
# fibo_gen.py

from collections.abc import Iterator

def fibonacci() -> Iterator[int]:
  a, b = 0, 1
  while True:
    yield a + b
    a, b = b, a + b

In [None]:
# itergentype.py

from collections.abc import Iterator
from keyword import kwlist
from typing import TYPE_CHECKING

short_kw = (k for k in kwlist if len(k) < 5)

if TYPE_CHECKING:
  reveal_type(short_kw)

long_kw: Iterator[str] = (k for k in kwlist if len(k) >= 4)

if TYPE_CHECKING:
  reveal_type(long_kw)


In [3]:
!pip install mypy

Collecting mypy
  Downloading mypy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.7/12.7 MB[0m [31m47.4 MB/s[0m eta [36m0:00:00[0m
Collecting mypy-extensions>=1.0.0 (from mypy)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: mypy-extensions, mypy
Successfully installed mypy-1.10.0 mypy-extensions-1.0.0


In [4]:
!mypy itergentype.py

itergentype.py:10: [34mnote:[m Revealed type is [m[1m"typing.Generator[builtins.str, None, None]"[m[m
itergentype.py:15: [34mnote:[m Revealed type is [m[1m"typing.Iterator[builtins.str]"[m[m
[1m[32mSuccess: no issues found in 1 source file[m


`Iterator[T]` is a shortcut for `Generator[str, None, None]`. Both annotations mean "a generator that yields items of type `T`, but that does not consume or return values"

## Classic Coroutines

Generators are commonly used as iterators, but they can be also used as coroutines. A coroutine is really a generator function, created with the `yield` keyword in its body. And a coroutine object is physically a generator object.

`Generator[YieldType, SendType, ReturnType]`
 - `SendType` is only relevant when the generator is used as a coroutine. That type param is the type of `x` in the call `gen.send(x)`
 - Likewise, `ReturnType` is only meaningful to annotate a coroutine.

Note. Classic Coroutine vs Native Coroutine
 - `Generator` for Classic Coroutine
 - `Coroutine` for Native Coroutine

David Beaxzley - Pycon 2009 course hand out
 - Generators produce data for iteration
 - Coroutines are consumers of Data
 - To keep your brain from exploding, don't mix the two concepts together
 - Coroutines are not related to iteration

### Example: Coroutine to Compute a Running Average

In [5]:
from collections.abc import Generator

# This fcn returns a generator that yields `float` values
# accepts `float` values via `.send()` and does not return
# a useful value
def averager() -> Generator[float, float, None]:
  total = 0.0
  count = 0
  average = 0.0
  # This infinite loop means that coroutine will
  # keep on yielding as long as the client sends value
  while True:
    # yield statement here suspends the coroutine,
    # yields a result to the client, and later
    # gets a value sent by the caller to the coroutine
    term = yield average
    total += term
    count += 1
    average = total / count

In [6]:
coro_avg = averager()
next(coro_avg) # start the coroutine

0.0

In [7]:
coro_avg.send(10)

10.0

In [8]:
coro_avg.send(30)

20.0

In [9]:
coro_avg.send(5)

15.0

In [10]:
coro_avg.close()

In [11]:
coro_avg.close()

In [12]:
coro_avg.send(5)

StopIteration: 

### Returning a Value from a Coroutine

In [15]:
# coroaverager2.py

from collections.abc import Generator
from typing import Union, NamedTuple, TypeAlias

# averager2 coroutine will return an instance of Result
class Result(NamedTuple):
  count: int # type: ignore
  averager: float

# class to make a sentinel value with a readable __repr__
class Sentinel:
  def __repr__(self):
    return f'<Sentinel>'

STOP = Sentinel()

SendType: TypeAlias = Union[float, Sentinel]

In [17]:
def averager2(verbose: bool = False) -> Generator[None, SendType, Result]:
  total = 0.0
  count = 0
  average = 0.0
  while True:
    term = yield # Using yield like this only makes sense in coroutines
                 # which are designed to consume data
    if verbose:
      print('received: ', term)
    if isinstance(term, Sentinel):
      break
    total += term
    count += 1
    average = total / count
  return Result(count, average)

In [30]:
coro_avg = averager2()

In [31]:
next(coro_avg)

In [32]:
coro_avg.send(10)

In [33]:
coro_avg.send(30)

In [34]:
coro_avg.send(6.5)

In [35]:
try:
  coro_avg.send(STOP)
except StopIteration as exc:
  result = exc.value

In [36]:
result

Result(count=3, averager=15.5)

In [37]:
def compute():
  res = yield from averager2(True)
  print('computed:', res)
  return res

In [38]:
comp = compute()

In [39]:
for v in [None, 10, 20, 30, STOP]:
  try:
    comp.send(v)
  except StopIteration as exc:
    result = exc.value

received:  10
received:  20
received:  30
received:  <Sentinel>
computed: Result(count=3, averager=20.0)


In [40]:
result

Result(count=3, averager=20.0)

### Generic Type Hints for Classic Coroutines

`typing.Generator` is one of the few standard library types with a contravariant type parameter.

In [None]:
from typing import TypeVar, Generic, Iterator
T_co = TypeVar('T_co', covariant=True)
V_co = TypeVar('V_co', covariant=True)
T_contra = TypeVar('T_contra', contravariant=True)

class Generator(Iterator[T_co], Generic[T_co, T_contra, V_co],
                extra=_G_base):

