# Generators and Iterators

![Iterables and Iterators](./data/img/Iterable.png)

In [1]:
lst = [3,2,1]

In [2]:
lst_iter = iter(lst)

In [3]:
next(lst_iter)

3

In [4]:
next(lst_iter)

2

## Building your own generators with `yield`

In [5]:
def counter(start, end):
    current = start
    while current < end:
        yield current
        current += 1

In [6]:
counter(1, 10)

<generator object counter at 0x104137b50>

In [7]:
x = counter(1,10)
next(x)

1

In [8]:
next(x)

2

In [9]:
next(x)

3

In [10]:
x = counter(1,10)
list(x)

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [11]:
for item in counter(1, 10):
    print(item, end=' ')

1 2 3 4 5 6 7 8 9 

In [12]:
next(x)

StopIteration: 

`yield` can also be used as a expression, along with the `send()` method

In [13]:
def accumulator(start=0):
    current = start
    while True:
        current += (yield current)
#         output(current)
#         suspend
#         current += input()

In [14]:
x = accumulator()
next(x)

0

In [15]:
x.send(1)

1

In [16]:
x.send(1)

2

In [17]:
x.send(10)

12

## The iterator protocol

What does `for x in sequence:` *really* do?

In [18]:
seq = range(4)
for x in seq: 
    print(x)

0
1
2
3


In [19]:
iter_seq = iter(seq)
print(iter_seq)

<range_iterator object at 0x104143e70>


In [20]:
iter_seq = iter(seq)
try:
    while True:
        x = next(iter_seq)
        print(x)
except StopIteration:
    pass

0
1
2
3


In [21]:
lst = [1,2,3]
next(iter(lst))

1

In [22]:
li = iter([1,2,3])

In [23]:
li

<list_iterator at 0x10421f910>

In [24]:
next(li)

1

In [25]:
reversed([1,2,3])

<list_reverseiterator at 0x104225d90>

Generators are their own iterators:

In [26]:
x = counter(0, 4)
print(x)
print(iter(x))
x is iter(x)  #

<generator object counter at 0x104137cd0>
<generator object counter at 0x104137cd0>


True

In [27]:
def isiterator(x):
    try:
        return x is iter(x)
    except:
        return False

In [28]:
for item in counter(0, 4): 
    print(item)

0
1
2
3


In [29]:
x = counter(0, 4)
while True:
    next(x)

StopIteration: 

We can also define our own iterator classes (though generators are usually more readable):

In [30]:
class Counter(object):
    def __init__(self, start, end):
        self._start = start
        self._end = end
    def __iter__(self):
        '''This is often implemented as a generator function'''
        return CounterIterator(self._start, self._end)
    
class CounterIterator(object):
    def __init__(self, start, end):
        self._cur = start
        self._end = end
    def __next__(self):
        result = self._cur
        self._cur += 1
        if result < self._end:
            return result
        else:
            raise StopIteration

ctr = Counter(0, 5)
print(list(ctr))

[0, 1, 2, 3, 4]


# Set and dict comprehensions

In [31]:
{x for x in range(4)}

{0, 1, 2, 3}

In [32]:
{x:'y' for x in range(4)}

{0: 'y', 1: 'y', 2: 'y', 3: 'y'}

## Generator expressions

In [33]:
[ x for x in range(10) if x % 2 == 0 ]

[0, 2, 4, 6, 8]

In [34]:
( x for x in range(10) if x % 2 == 0 )

<generator object <genexpr> at 0x1041379d0>

In [35]:
gen = ( x for x in range(10) if x % 2 == 0 )

In [36]:
next(gen)

0

In [37]:
next(gen)

2

In [38]:
list(gen)

[4, 6, 8]

In [43]:
'-'.join(str(x) for x in range(1, 20, 3))

'1-4-7-10-13-16-19'

In [46]:
gen = ( x for x in range(10) if x % 2 == 0 )
gen = ( x * 2 for x in gen )
list(gen)

[0, 4, 8, 12, 16]

## The `itertools` module

`itertools` provides a number of "higher-order iterators" that allow you to combine iterators in interesting ways.

In [47]:
from itertools import chain, count, groupby

In [48]:
# chain links multiple iterators end-to-end
xs = range(10)
ys = 'abcdef'
list(chain(xs, ys))


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 'b', 'c', 'd', 'e', 'f']

In [49]:
# The Python 3 built-in "zip" lets us iteratively zip multiple iterators. 
#  Useful when building a giant dictionary:
import string
dict(zip(string.ascii_lowercase, string.ascii_uppercase[:10]))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J'}

In [51]:
from itertools import zip_longest

In [52]:
dict(zip_longest(
    string.ascii_lowercase, 
    string.ascii_uppercase[:10]
))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J',
 'k': None,
 'l': None,
 'm': None,
 'n': None,
 'o': None,
 'p': None,
 'q': None,
 'r': None,
 's': None,
 't': None,
 'u': None,
 'v': None,
 'w': None,
 'x': None,
 'y': None,
 'z': None}

In [53]:
dict(zip_longest(
    string.ascii_lowercase, 
    string.ascii_uppercase[:10],
    fillvalue='---'
))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J',
 'k': '---',
 'l': '---',
 'm': '---',
 'n': '---',
 'o': '---',
 'p': '---',
 'q': '---',
 'r': '---',
 's': '---',
 't': '---',
 'u': '---',
 'v': '---',
 'w': '---',
 'x': '---',
 'y': '---',
 'z': '---'}

In [55]:
# count() gives us a simple iterator of consecutive values

for i, letter in zip(count(), string.ascii_letters[:10]):
    print(i, letter)

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j


In [56]:
for i, letter in enumerate(string.ascii_letters[:10]):
    print(i, letter)

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j


In [57]:
# Python anti-pattern
for i in range(len(string.ascii_letters[:10])):
    print(i, string.ascii_letters[i])

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j


In [58]:
# also an anti-pattern
d = dict(zip(string.ascii_lowercase, string.ascii_uppercase[:10]))
for key in d.keys():
    print(key, d[key])

a A
b B
c C
d D
e E
f F
g G
h H
i I
j J


In [59]:
for key, value in d.items():
    print(key, value)

a A
b B
c C
d D
e E
f F
g G
h H
i I
j J


`groupby()` allows us to efficiently group values from an iterator into sub-values. For instance, we might have 
some datetime-based data that we wish to convert to date-based data:

In [60]:
from random import random
from datetime import datetime, timedelta

trades = []
dt = datetime(2016, 4, 24)
while dt < datetime(2016,4,27):
    trades.append((dt, random()))
    dt += timedelta(hours=1)
    
print(len(trades))

72


In [61]:
trades[:10]

[(datetime.datetime(2016, 4, 24, 0, 0), 0.5237924928840589),
 (datetime.datetime(2016, 4, 24, 1, 0), 0.970748036876779),
 (datetime.datetime(2016, 4, 24, 2, 0), 0.6400556403377038),
 (datetime.datetime(2016, 4, 24, 3, 0), 0.9141630496565304),
 (datetime.datetime(2016, 4, 24, 4, 0), 0.9933350361144487),
 (datetime.datetime(2016, 4, 24, 5, 0), 0.4637221279023578),
 (datetime.datetime(2016, 4, 24, 6, 0), 0.5598122911995024),
 (datetime.datetime(2016, 4, 24, 7, 0), 0.06781033075989773),
 (datetime.datetime(2016, 4, 24, 8, 0), 0.49241557909081957),
 (datetime.datetime(2016, 4, 24, 9, 0), 0.9981452576162229)]

In [62]:
def day_of_trade(val):
    dt, value = val
    return dt.date()

for date, date_trades in groupby(trades, key=day_of_trade):
    print(date, len(list(date_trades)))


2016-04-24 24
2016-04-25 24
2016-04-26 24


In [63]:
for date, date_trades in groupby(trades, key=day_of_trade):
    date_trades = list(date_trades)
    print(date, sum(v for dt, v in date_trades) / len(date_trades))


2016-04-24 0.5533715884417097
2016-04-25 0.49616703572663284
2016-04-26 0.4829418108456363


In [64]:
cat data/Hamlet.txt | sort | uniq | wc -l

    2746


In [65]:
import random
random.shuffle(trades)

for date, date_trades in groupby(trades, key=day_of_trade):
    date_trades = list(date_trades)
    print(date, sum(v for dt, v in date_trades) / len(list(date_trades)))


2016-04-25 0.5018071048588116
2016-04-26 0.30154951738523866
2016-04-24 0.08032654571960685
2016-04-25 0.21257245248320333
2016-04-24 0.48506987889039277
2016-04-25 0.9738821846694379
2016-04-24 0.5872942515125029
2016-04-26 0.7148841243588979
2016-04-25 0.22896630510649296
2016-04-24 0.78295817746488
2016-04-25 0.8435073682771228
2016-04-26 0.15141912005281832
2016-04-24 0.4168565620631537
2016-04-26 0.2590486483436623
2016-04-25 0.9114069021867834
2016-04-24 0.5237924928840589
2016-04-25 0.3908904914421134
2016-04-24 0.9141630496565304
2016-04-25 0.9776328616897122
2016-04-24 0.06781033075989773
2016-04-26 0.4065087469537381
2016-04-25 0.20360305868215367
2016-04-26 0.49677644124823894
2016-04-24 0.021829635083841503
2016-04-25 0.02344191214759217
2016-04-26 0.9031107515715845
2016-04-24 0.9933350361144487
2016-04-26 0.9372779694918775
2016-04-25 0.75527820059377
2016-04-26 0.559998997574382
2016-04-24 0.24482719199899783
2016-04-26 0.6117850405802503
2016-04-25 0.639268727609306
201

### Note that your data *must* already be sorted in a "grouped" order if you use `groupby`. If you wish to group *unsorted* data, you should use a `defaultdict` instead.

In [66]:
from collections import defaultdict

date_trades = defaultdict(list)
for dt, value in trades:
    day = dt.date()
    date_trades[day].append(value)

In [67]:
{day: len(values) for day, values in date_trades.items()}

{datetime.date(2016, 4, 25): 24,
 datetime.date(2016, 4, 26): 24,
 datetime.date(2016, 4, 24): 24}

In [68]:
import itertools
itertools?

# Lab

Open [Generators and Iterators Lab][iteration-lab]

[iteration-lab]: ./iteration-lab.ipynb

In [None]:
def gen(n):
    ... 
    for item in gen(n-1):
        yield item
    ...

In [None]:
def gen(n):
    ... 
    yield from gen(n-1)
    ...

In [69]:
ranks = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
suits = 'clubs spades hearts diamonds'.split()

In [70]:
list(itertools.product(ranks, suits))

[('2', 'clubs'),
 ('2', 'spades'),
 ('2', 'hearts'),
 ('2', 'diamonds'),
 ('3', 'clubs'),
 ('3', 'spades'),
 ('3', 'hearts'),
 ('3', 'diamonds'),
 ('4', 'clubs'),
 ('4', 'spades'),
 ('4', 'hearts'),
 ('4', 'diamonds'),
 ('5', 'clubs'),
 ('5', 'spades'),
 ('5', 'hearts'),
 ('5', 'diamonds'),
 ('6', 'clubs'),
 ('6', 'spades'),
 ('6', 'hearts'),
 ('6', 'diamonds'),
 ('7', 'clubs'),
 ('7', 'spades'),
 ('7', 'hearts'),
 ('7', 'diamonds'),
 ('8', 'clubs'),
 ('8', 'spades'),
 ('8', 'hearts'),
 ('8', 'diamonds'),
 ('9', 'clubs'),
 ('9', 'spades'),
 ('9', 'hearts'),
 ('9', 'diamonds'),
 ('10', 'clubs'),
 ('10', 'spades'),
 ('10', 'hearts'),
 ('10', 'diamonds'),
 ('J', 'clubs'),
 ('J', 'spades'),
 ('J', 'hearts'),
 ('J', 'diamonds'),
 ('Q', 'clubs'),
 ('Q', 'spades'),
 ('Q', 'hearts'),
 ('Q', 'diamonds'),
 ('K', 'clubs'),
 ('K', 'spades'),
 ('K', 'hearts'),
 ('K', 'diamonds'),
 ('A', 'clubs'),
 ('A', 'spades'),
 ('A', 'hearts'),
 ('A', 'diamonds')]