# Generators and Iterators

![Iterables and Iterators](./data/img/Iterable.png)

In [1]:
lst = [3,2,1]

In [2]:
lst_iter = iter(lst)

In [3]:
next(lst_iter)

3

In [6]:
next(lst_iter)

StopIteration: 

## Building your own generators with `yield`

In [7]:
def counter(start, end):
    current = start
    while current < end:
        yield current
        current += 1

In [8]:
counter(1, 10)

<generator object counter at 0x10b3b6ad0>

In [9]:
x = counter(1,10)
next(x)

1

In [10]:
next(x)

2

In [11]:
next(x)

3

In [12]:
x = counter(1,10)
list(x)

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [13]:
for item in counter(1, 10):
    print(item, end=' ')

1 2 3 4 5 6 7 8 9 

In [14]:
next(x)

StopIteration: 

`yield` can also be used as a expression, along with the `send()` method

In [15]:
def accumulator(start=0):
    current = start
    while True:
        current += (yield current)
#         output(current)
#         suspend
#         current += input()

In [16]:
x = accumulator()
next(x)

0

In [17]:
x.send(1)

1

In [18]:
x.send(1)

2

In [19]:
x.send(10)

12

### Composition of generators

In [22]:
def gen1(n):
    for i in range(n):
        yield i
        
def gen2(m, n):
    yield from gen1(m)
    yield from gen1(n)
    
for item in gen2(5, 3):
    print(item)

0
1
2
3
4
0
1
2


## The iterator protocol

What does `for x in sequence:` *really* do?

In [23]:
seq = range(4)
for x in seq: 
    print(x)

0
1
2
3


In [24]:
iter_seq = iter(seq)
print(iter_seq)

<range_iterator object at 0x10b30db10>


In [25]:
iter_seq = iter(seq)
try:
    while True:
        x = next(iter_seq)
        print(x)
except StopIteration:
    pass

0
1
2
3


In [26]:
lst = [1,2,3]
next(iter(lst))

1

In [27]:
li = iter([1,2,3])

In [28]:
li

<list_iterator at 0x10b3f8290>

In [29]:
next(li)

1

In [30]:
reversed([1,2,3])

<list_reverseiterator at 0x10b3fd110>

Generators are their own iterators:

In [31]:
x = counter(0, 4)
print(x)
print(iter(x))
x is iter(x)  #

<generator object counter at 0x10b301ad0>
<generator object counter at 0x10b301ad0>


True

In [32]:
def isiterator(x):
    try:
        return x is iter(x)
    except:
        return False

In [33]:
for item in counter(0, 4): 
    print(item)

0
1
2
3


In [34]:
x = counter(0, 4)
while True:
    next(x)

StopIteration: 

We can also define our own iterator classes (though generators are usually more readable):

In [None]:
class Counter(object):
    def __init__(self, start, end):
        self._start = start
        self._end = end
    def __iter__(self):
        return CounterIterator(self._start, self._end)
    
class CounterIterator(object):
    def __init__(self, start, end):
        self._cur = start
        self._end = end
    def __next__(self):
        result = self._cur
        self._cur += 1
        if result < self._end:
            return result
        else:
            raise StopIteration

In [35]:
ctr = Counter(0, 5)
print(list(ctr))

[0, 1, 2, 3, 4]


In [38]:
class Counter(object):

    def __init__(self, start, end):
        self._start = start
        self._end = end

    def __iter__(self):
        cur = self._start
        while cur < self._end:
            yield cur
            cur += 1

In [39]:
ctr = Counter(0, 5)
print(list(ctr))

[0, 1, 2, 3, 4]


# Set and dict comprehensions

In [40]:
[2*x for x in range(4)]

[0, 2, 4, 6]

In [41]:
{2*x for x in range(4)}

{0, 2, 4, 6}

In [42]:
{2*x:'y' for x in range(4)}

{0: 'y', 2: 'y', 4: 'y', 6: 'y'}

## Generator expressions

In [44]:
( x for x in range(10) if x % 2 == 0 )

<generator object <genexpr> at 0x10b3da2d0>

In [45]:
gen = ( x for x in range(10) if x % 2 == 0 )

In [46]:
next(gen)

0

In [47]:
next(gen)

2

In [48]:
list(gen)

[4, 6, 8]

In [52]:
'-'.join(str(x) for x in range(1, 20, 3))

'1-4-7-10-13-16-19'

In [53]:
gen = ( x for x in range(10) if x % 2 == 0 )
gen = ( x * 2 for x in gen )
list(gen)

[0, 4, 8, 12, 16]

## The `itertools` module

`itertools` provides a number of "higher-order iterators" that allow you to combine iterators in interesting ways.

In [54]:
from itertools import chain, count, groupby

In [55]:
# chain links multiple iterators end-to-end
xs = range(10)
ys = 'abcdef'
list(chain(xs, ys))


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 'b', 'c', 'd', 'e', 'f']

In [56]:
# The Python 3 built-in "zip" lets us iteratively zip multiple iterators. 
#  Useful when building a giant dictionary:
import string
dict(zip(string.ascii_lowercase, string.ascii_uppercase[:10]))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J'}

In [58]:
from itertools import zip_longest

In [59]:
dict(zip_longest(
    string.ascii_lowercase, 
    string.ascii_uppercase[:10]
))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J',
 'k': None,
 'l': None,
 'm': None,
 'n': None,
 'o': None,
 'p': None,
 'q': None,
 'r': None,
 's': None,
 't': None,
 'u': None,
 'v': None,
 'w': None,
 'x': None,
 'y': None,
 'z': None}

In [60]:
dict(zip_longest(
    string.ascii_lowercase, 
    string.ascii_uppercase[:10],
    fillvalue='---'
))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J',
 'k': '---',
 'l': '---',
 'm': '---',
 'n': '---',
 'o': '---',
 'p': '---',
 'q': '---',
 'r': '---',
 's': '---',
 't': '---',
 'u': '---',
 'v': '---',
 'w': '---',
 'x': '---',
 'y': '---',
 'z': '---'}

In [61]:
# count() gives us a simple iterator of consecutive values

for i, letter in zip(count(), string.ascii_letters[:10]):
    print(i, letter)

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j


In [62]:
for i, letter in enumerate(string.ascii_letters[:10]):
    print(i, letter)

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j


In [63]:
# Python anti-pattern
for i in range(len(string.ascii_letters[:10])):
    print(i, string.ascii_letters[i])

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j


In [64]:
# also an anti-pattern
d = dict(zip(string.ascii_lowercase, string.ascii_uppercase[:10]))
for key in d.keys():
    print(key, d[key])

a A
b B
c C
d D
e E
f F
g G
h H
i I
j J


In [65]:
for key, value in d.items():
    print(key, value)

a A
b B
c C
d D
e E
f F
g G
h H
i I
j J


`groupby()` allows us to efficiently group values from an iterator into sub-iterators. For instance, we might have 
some datetime-based data that we wish to convert to date-based data:

In [66]:
from random import random
from datetime import datetime, timedelta

trades = []
dt = datetime(2016, 4, 24)
while dt < datetime(2016,4,27):
    trades.append((dt, random()))
    dt += timedelta(hours=1)
    
print(len(trades))

72


In [67]:
trades[:10]

[(datetime.datetime(2016, 4, 24, 0, 0), 0.712605860577327),
 (datetime.datetime(2016, 4, 24, 1, 0), 0.3142319590716284),
 (datetime.datetime(2016, 4, 24, 2, 0), 0.10977545415739876),
 (datetime.datetime(2016, 4, 24, 3, 0), 0.6055446602210595),
 (datetime.datetime(2016, 4, 24, 4, 0), 0.07201795624646012),
 (datetime.datetime(2016, 4, 24, 5, 0), 0.292449193880956),
 (datetime.datetime(2016, 4, 24, 6, 0), 0.6524101809447218),
 (datetime.datetime(2016, 4, 24, 7, 0), 0.6718237415207235),
 (datetime.datetime(2016, 4, 24, 8, 0), 0.6710779369986538),
 (datetime.datetime(2016, 4, 24, 9, 0), 0.575878034538624)]

In [68]:
def day_of_trade(val):
    dt, value = val
    return dt.date()

for date, date_trades in groupby(trades, key=day_of_trade):
    print(date, len(list(date_trades)))


2016-04-24 24
2016-04-25 24
2016-04-26 24


In [69]:
for date, date_trades in groupby(trades, key=day_of_trade):
    date_trades = list(date_trades)
    print(date, sum(v for dt, v in date_trades) / len(date_trades))


2016-04-24 0.5405557851270745
2016-04-25 0.5060172910712806
2016-04-26 0.5354313239432843


In [72]:
cat data/Hamlet.txt | sort | uniq | wc -l

    2746


In [73]:
import random
random.shuffle(trades)

for date, date_trades in groupby(trades, key=day_of_trade):
    date_trades = list(date_trades)
    print(date, sum(v for dt, v in date_trades) / len(list(date_trades)))


2016-04-26 0.648007118862586
2016-04-25 0.9282062578407122
2016-04-24 0.8157383470007334
2016-04-25 0.5091742497986625
2016-04-24 0.3426601462146175
2016-04-26 0.7740482546935521
2016-04-25 0.35342254199676215
2016-04-24 0.2113552453834281
2016-04-25 0.4159434454903568
2016-04-26 0.43882751936099407
2016-04-25 0.3421737321039665
2016-04-26 0.459745466927788
2016-04-24 0.5922526168335008
2016-04-26 0.35746605511601376
2016-04-24 0.6710779369986538
2016-04-26 0.7111958829595859
2016-04-24 0.7509316368272133
2016-04-26 0.12503505851675195
2016-04-25 0.6710271144018972
2016-04-26 0.5387492009616593
2016-04-25 0.16724916083397512
2016-04-26 0.24915285885571303
2016-04-24 0.6976466413608643
2016-04-25 0.490422995590842
2016-04-26 0.47714732722831976
2016-04-24 0.10977545415739876
2016-04-26 0.6969301252905717
2016-04-24 0.6055446602210595
2016-04-25 0.5487342756294784
2016-04-24 0.7317017679153627
2016-04-26 0.8479656842330379
2016-04-25 0.7796848137552005
2016-04-26 0.8855498692914121
2016-

### Note that your data *must* already be sorted in a "grouped" order if you use `groupby`. If you wish to group *unsorted* data, you should use a `defaultdict` instead.

In [74]:
from collections import defaultdict

date_trades = defaultdict(list)
for dt, value in trades:
    day = dt.date()
    date_trades[day].append(value)

In [75]:
{day: len(values) for day, values in date_trades.items()}

{datetime.date(2016, 4, 26): 24,
 datetime.date(2016, 4, 25): 24,
 datetime.date(2016, 4, 24): 24}

In [76]:
import itertools
itertools?

# Lab

Open [Generators and Iterators Lab][iteration-lab]

[iteration-lab]: ./iteration-lab.ipynb

In [None]:
def gen(n):
    ... 
    for item in gen(n-1):
        yield item
    ...

In [None]:
def gen(n):
    ... 
    yield from gen(n-1)
    ...

In [77]:
ranks = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
suits = 'clubs spades hearts diamonds'.split()

In [78]:
ranks

['2', '3', '4', '5', '6', '7', '8', '9', '10', 'J', 'Q', 'K', 'A']

In [79]:
suits

['clubs', 'spades', 'hearts', 'diamonds']

In [80]:
list(itertools.product(ranks, suits))

[('2', 'clubs'),
 ('2', 'spades'),
 ('2', 'hearts'),
 ('2', 'diamonds'),
 ('3', 'clubs'),
 ('3', 'spades'),
 ('3', 'hearts'),
 ('3', 'diamonds'),
 ('4', 'clubs'),
 ('4', 'spades'),
 ('4', 'hearts'),
 ('4', 'diamonds'),
 ('5', 'clubs'),
 ('5', 'spades'),
 ('5', 'hearts'),
 ('5', 'diamonds'),
 ('6', 'clubs'),
 ('6', 'spades'),
 ('6', 'hearts'),
 ('6', 'diamonds'),
 ('7', 'clubs'),
 ('7', 'spades'),
 ('7', 'hearts'),
 ('7', 'diamonds'),
 ('8', 'clubs'),
 ('8', 'spades'),
 ('8', 'hearts'),
 ('8', 'diamonds'),
 ('9', 'clubs'),
 ('9', 'spades'),
 ('9', 'hearts'),
 ('9', 'diamonds'),
 ('10', 'clubs'),
 ('10', 'spades'),
 ('10', 'hearts'),
 ('10', 'diamonds'),
 ('J', 'clubs'),
 ('J', 'spades'),
 ('J', 'hearts'),
 ('J', 'diamonds'),
 ('Q', 'clubs'),
 ('Q', 'spades'),
 ('Q', 'hearts'),
 ('Q', 'diamonds'),
 ('K', 'clubs'),
 ('K', 'spades'),
 ('K', 'hearts'),
 ('K', 'diamonds'),
 ('A', 'clubs'),
 ('A', 'spades'),
 ('A', 'hearts'),
 ('A', 'diamonds')]