# Generators and Iterators

![Iterables and Iterators](./data/img/Iterable.png)

In [1]:
lst = [3,2,1]

In [7]:
lst_iter = iter(lst)
lst_iter


<list_iterator at 0x1086ed000>

In [8]:
next(lst_iter)

3

In [9]:
next(lst_iter)

2

## Building your own iterators with `yield`

In [6]:
def counter(start, end):
    print('Entering counter generator')
    current = start
    while current < end:
        yield current
        current += 1

In [7]:
counter(1, 10)

<generator object counter at 0x7f85d0150890>

In [8]:
x = counter(1,10)
next(x)

Entering counter generator


1

In [9]:
next(x)

2

In [10]:
next(x)

3

Equivalent list code:

In [11]:
def counter_list(start, end):
    print('Entering counter function')
    result = []
    current = start
    while current < end:
        #yield current
        result.append(current)
        current += 1
    return result

In [12]:
counter_list(1, 10)

Entering counter function


[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [13]:
def counter(start, end):
    print('Entering counter generator')
    current = start
    while current < end:
        print('Generate the value', current)
        yield current
        current += 1

In [14]:
for x in counter(1, 10_000):
    print('Use the value', x)
    if x > 3:
        break

Entering counter generator
Generate the value 1
Use the value 1
Generate the value 2
Use the value 2
Generate the value 3
Use the value 3
Generate the value 4
Use the value 4


In [15]:
def counter_list(start, end):
    print('Entering counter function')
    result = []
    current = start
    while current < end:
        #yield current
        print('Generate the value', current)
        result.append(current)
        current += 1
    return result

In [16]:
for x in counter_list(1, 10):
    print('Use the value', x)
    if x > 3:
        break

Entering counter function
Generate the value 1
Generate the value 2
Generate the value 3
Generate the value 4
Generate the value 5
Generate the value 6
Generate the value 7
Generate the value 8
Generate the value 9
Use the value 1
Use the value 2
Use the value 3
Use the value 4


(Back to the generator version)

In [17]:
def counter(start, end):
    print('Entering counter generator')
    current = start
    while current < end:
        yield current
        current += 1

In [18]:
x = counter(1,10)
list(x)

Entering counter generator


[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [19]:
type(counter)

function

In [20]:
type(counter(1, 10))

generator

In [21]:
next(x)

StopIteration: 

In [22]:
for item in counter(1, 10):
    print(item, end=' ')

Entering counter generator
1 2 3 4 5 6 7 8 9 

In [23]:
def short_gen():
    if False:
        yield
    return 'The return value'

In [24]:
g = short_gen()

In [25]:
g

<generator object short_gen at 0x7f85d00dbe40>

In [26]:
next(g)

StopIteration: The return value

```python
def build_my_list():
    lst = []
    for something in something_else:
        lst.append(something)
    return lst
 
def build_my_gen():
    for something in something_else:
        yield something
```
        
`list(build_my_gen())`  is equivalent to  `build_my_list()`

Generators only use enough memory to produce a single value at time

Lists have all the values present in memory at once

`yield` can also be used as a expression, along with the `send()` method

In [36]:
def accumulator(start=0):
    current = start
    while True:
        current += (yield current)
#         output(current)
#         suspend the generator
#         tmp = input()
#         current += tmp

In [44]:
x = accumulator()

In [45]:
next(x)  # equivalent to x.send(None)

0

In [46]:
x.send(1)

1

In [47]:
x.send(1)

2

In [48]:
x.send(10)

12

Illustration of memory advantage of using iterators/generators

In [49]:
import sys
max_mem_usage = 0

for line in open('./data/hamlet.txt'):
    max_mem_usage = max(
        max_mem_usage,
        sys.getsizeof(line)
    )
print(max_mem_usage)

124


In [50]:
!ls -lh ./data/hamlet.txt

-rw-r--r-- 1 rick446 rick446 173K Sep 11  2020 ./data/hamlet.txt


In [51]:
hamlet_lines = open('./data/hamlet.txt').readlines()

In [52]:
total_mem_usage = sum([
    sys.getsizeof(line) for line in hamlet_lines
]) + sys.getsizeof(hamlet_lines)

In [53]:
total_mem_usage

347341

### Composition of generators

In [57]:
def gen1(prefix, n):
    for i in range(n):
        yield prefix, i
        
def gen2(a, b):
# # this is _Very_ wrong
#     gen1('a-prefix', a)
#     gen1('b-prefix', b)

# #     this is wrong, but better
#     yield gen1('a-prefix', a)
#     yield gen1('b-prefix', b)

# #     This is right, but ugly
#     for carrot in gen1('a-prefix', a):
#         yield carrot
#     for cabbage in gen1('b-prefix', b):
#         yield cabbage
    
    # Preferred way to delegate to sub-generators
    yield from gen1('a-prefix', a)
    yield from gen1('b-prefix', b)
    
for item in gen2(5, 3):
    print(item)

('a-prefix', 0)
('a-prefix', 1)
('a-prefix', 2)
('a-prefix', 3)
('a-prefix', 4)
('b-prefix', 0)
('b-prefix', 1)
('b-prefix', 2)


You can `yield from` any iterable object

In [58]:
def mycat(filename1, filename2):
    with open(filename1) as f1:
        for line in f1:
            yield line
    with open(filename2) as f2:
        yield from f2
        

In [59]:
lines = list(mycat('./data/hamlet.txt', './data/poem.txt'))

In [60]:
lines[-2:]

['I took the one less traveled by,\t\n',
 'And that has made all the difference.\n']

What about return?

In [61]:
def producing_return_value():
    yield 1
    yield 2
    return 'return value'

In [62]:
list(producing_return_value())

[1, 2]

In [63]:
def using_return_value():
    rv = yield from producing_return_value()
    print('I got the return value', rv)

In [64]:
list(using_return_value())

I got the return value return value


[1, 2]

In [65]:
x = producing_return_value()
next(x)
next(x)
try:
    next(x)
except StopIteration as si:
    print(f'StopIteration.value = {si.value!r}')

StopIteration.value = 'return value'


```python
data = yield from socket.recv_async_data()
```

## The iterator protocol

What does `for x in sequence:` *really* do?

In [67]:
seq = range(4)
for x in seq: 
    print(x)

0
1
2
3


In [68]:
seq

range(0, 4)

In [69]:
iter_seq = iter(seq)
print(iter_seq)

<range_iterator object at 0x7f85d010a240>


In [70]:
iter_seq = iter(seq)         # __iter__
while True:
    try:
        x = next(iter_seq)   # __next__
    except StopIteration:
        break
    print(x)  # loop body

0
1
2
3


In [72]:
x = reversed([1,2,3])  # __reversed__ (?)
x

<list_reverseiterator at 0x7f85d01747f0>

In [75]:
x.__next__()   # this is what Python calls when you say next(x)

3

In [76]:
next(x)

2

Generators are their own iterators (which are also iterable):

In [77]:
x = counter(0, 4)
print(x)

<generator object counter at 0x7f85c97f2430>


In [78]:
print(iter(x))

<generator object counter at 0x7f85c97f2430>


In [79]:
x is iter(x)  #

True

In [80]:
def isiterator(x):
    try:
        return x is iter(x)
    except:
        return False

In [81]:
for item in counter(0, 4): 
    print(item)

Entering counter generator
0
1
2
3


In [82]:
x = counter(0, 4)
while True:
    next(x)

Entering counter generator


StopIteration: 

We can also define our own iterator classes (though generators are usually more readable):

In [83]:
class Counter:
    """This is the 'iterable' object"""
    def __init__(self, start, end):
        self._start = start
        self._end = end
        
    def __iter__(self):
        return CounterIterator(self._start, self._end)
    
class CounterIterator:
    """This is the iterator"""
    def __init__(self, start, end):
        self._cur = start
        self._end = end
        
    def __iter__(self):
        return self
    
    def __next__(self):
        if self._cur >= self._end:
            raise StopIteration
        result = self._cur
        self._cur += 1
        return result


In [84]:
ctr = Counter(0, 5)
print(list(ctr))

[0, 1, 2, 3, 4]


In [85]:
class Counter:
    
    def __init__(self, start, end):
        self._start = start
        self._end = end

    def __iter__(self):
        """iter(Counter(...)) returns a generator (which is also an iterator)"""
        cur = self._start
        while cur < self._end:
            yield cur
            cur += 1

In [86]:
ctr = Counter(0, 5)
print(list(ctr))

[0, 1, 2, 3, 4]


In [87]:
class TreeNode:
    def __init__(self, value, left=None, right=None):
        self.value = value
        self.left = left
        self.right = right
        
    def __repr__(self):
        return f'<TreeNode {self.value}>'
    
    def preOrder(self):
        yield self
        if self.left:
            yield from self.left.preOrder()
        if self.right:
            yield from self.right.preOrder()

    def inOrder(self):
        if self.left:
            yield from self.left.inOrder()
        yield self
        if self.right:
            yield from self.right.inOrder()

    def postOrder(self):
        if self.left:
            yield from self.left.postOrder()
        if self.right:
            yield from self.right.postOrder()
        yield self   


In [88]:
tree = TreeNode('root', 
                TreeNode('left',
                         TreeNode('left-1'),
                        ), 
                TreeNode('right')
               )
print(list(tree.preOrder()))
print(list(tree.inOrder()))
print(list(tree.postOrder()))

[<TreeNode root>, <TreeNode left>, <TreeNode left-1>, <TreeNode right>]
[<TreeNode left-1>, <TreeNode left>, <TreeNode root>, <TreeNode right>]
[<TreeNode left-1>, <TreeNode left>, <TreeNode right>, <TreeNode root>]


# List, set and dict comprehensions

In [89]:
[2*x for x in range(4)]

[0, 2, 4, 6]

In [90]:
{2*x for x in range(4)}

{0, 2, 4, 6}

In [91]:
{2*x:'y' for x in range(4)}

{0: 'y', 2: 'y', 4: 'y', 6: 'y'}

## Generator expressions

In [92]:
(x for x in range(10) if x % 2 == 0)

<generator object <genexpr> at 0x7f85d01842e0>

In [93]:
gen = ( x 
       for x in range(10) 
       if x % 2 == 0 
      )

In [94]:
next(gen)

0

In [95]:
next(gen)

2

In [96]:
list(gen)

[4, 6, 8]

In [99]:
'-'.join(str(x) for x in range(1, 20, 3))

'1-4-7-10-13-16-19'

In [100]:
gen = range(10)
gen = ( x for x in gen if x % 2 == 0 )  # filter
gen = ( x * 2 for x in gen )            # map
list(gen)

[0, 4, 8, 12, 16]

In [101]:
pow2 = [2 ** i for i in range(10)]
pow2

[1, 2, 4, 8, 16, 32, 64, 128, 256, 512]

In [102]:
pow2 = list(2 ** i for i in range(10))
pow2

[1, 2, 4, 8, 16, 32, 64, 128, 256, 512]

In [103]:
pow2 = tuple(2 ** i for i in range(10))
pow2


(1, 2, 4, 8, 16, 32, 64, 128, 256, 512)

## Builtin iterator functions

In [104]:
lst = list('abcdefghijklmnopqrstuvwxyz')
lst[:10]

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

In [105]:
for position, value in enumerate(lst[:10]):
    print(position, value)

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j


In [106]:
stooges = 'Larry Moe Curley'.split()

In [107]:
for i in range(len(stooges)):
    print(i, stooges[i])

0 Larry
1 Moe
2 Curley


In [108]:
for i, stooge in enumerate(stooges):
    print(i, stooge)

0 Larry
1 Moe
2 Curley


In [109]:
lst1 = lst[5:]
for x, y in zip(lst, lst1):
    print(x, y, end=' - ')

a f - b g - c h - d i - e j - f k - g l - h m - i n - j o - k p - l q - m r - n s - o t - p u - q v - r w - s x - t y - u z - 

In [110]:
lst1 = lst[5:]
for i, (x, y) in enumerate(zip(lst, lst1)):
    print(i, x, y, end=' - ')

0 a f - 1 b g - 2 c h - 3 d i - 4 e j - 5 f k - 6 g l - 7 h m - 8 i n - 9 j o - 10 k p - 11 l q - 12 m r - 13 n s - 14 o t - 15 p u - 16 q v - 17 r w - 18 s x - 19 t y - 20 u z - 

In [111]:
lst1 = lst[5:]
for x, y, z in zip(lst, lst1, lst1):
    print(x, y, z, end=' - ')

a f f - b g g - c h h - d i i - e j j - f k k - g l l - h m m - i n n - j o o - k p p - l q q - m r r - n s s - o t t - p u u - q v v - r w w - s x x - t y y - u z z - 

## The `itertools` module

`itertools` provides a number of "higher-order iterators" that allow you to combine iterators in interesting ways.

In [112]:
import itertools
itertools?

In [113]:
ranks = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
suits = 'diamonds hearts spades clubs'.split()

In [114]:
ranks

['2', '3', '4', '5', '6', '7', '8', '9', '10', 'J', 'Q', 'K', 'A']

In [115]:
list(itertools.product(suits, ranks))

[('diamonds', '2'),
 ('diamonds', '3'),
 ('diamonds', '4'),
 ('diamonds', '5'),
 ('diamonds', '6'),
 ('diamonds', '7'),
 ('diamonds', '8'),
 ('diamonds', '9'),
 ('diamonds', '10'),
 ('diamonds', 'J'),
 ('diamonds', 'Q'),
 ('diamonds', 'K'),
 ('diamonds', 'A'),
 ('hearts', '2'),
 ('hearts', '3'),
 ('hearts', '4'),
 ('hearts', '5'),
 ('hearts', '6'),
 ('hearts', '7'),
 ('hearts', '8'),
 ('hearts', '9'),
 ('hearts', '10'),
 ('hearts', 'J'),
 ('hearts', 'Q'),
 ('hearts', 'K'),
 ('hearts', 'A'),
 ('spades', '2'),
 ('spades', '3'),
 ('spades', '4'),
 ('spades', '5'),
 ('spades', '6'),
 ('spades', '7'),
 ('spades', '8'),
 ('spades', '9'),
 ('spades', '10'),
 ('spades', 'J'),
 ('spades', 'Q'),
 ('spades', 'K'),
 ('spades', 'A'),
 ('clubs', '2'),
 ('clubs', '3'),
 ('clubs', '4'),
 ('clubs', '5'),
 ('clubs', '6'),
 ('clubs', '7'),
 ('clubs', '8'),
 ('clubs', '9'),
 ('clubs', '10'),
 ('clubs', 'J'),
 ('clubs', 'Q'),
 ('clubs', 'K'),
 ('clubs', 'A')]

In [116]:
dimensions = [suits, ranks]
list(itertools.product(*dimensions))

[('diamonds', '2'),
 ('diamonds', '3'),
 ('diamonds', '4'),
 ('diamonds', '5'),
 ('diamonds', '6'),
 ('diamonds', '7'),
 ('diamonds', '8'),
 ('diamonds', '9'),
 ('diamonds', '10'),
 ('diamonds', 'J'),
 ('diamonds', 'Q'),
 ('diamonds', 'K'),
 ('diamonds', 'A'),
 ('hearts', '2'),
 ('hearts', '3'),
 ('hearts', '4'),
 ('hearts', '5'),
 ('hearts', '6'),
 ('hearts', '7'),
 ('hearts', '8'),
 ('hearts', '9'),
 ('hearts', '10'),
 ('hearts', 'J'),
 ('hearts', 'Q'),
 ('hearts', 'K'),
 ('hearts', 'A'),
 ('spades', '2'),
 ('spades', '3'),
 ('spades', '4'),
 ('spades', '5'),
 ('spades', '6'),
 ('spades', '7'),
 ('spades', '8'),
 ('spades', '9'),
 ('spades', '10'),
 ('spades', 'J'),
 ('spades', 'Q'),
 ('spades', 'K'),
 ('spades', 'A'),
 ('clubs', '2'),
 ('clubs', '3'),
 ('clubs', '4'),
 ('clubs', '5'),
 ('clubs', '6'),
 ('clubs', '7'),
 ('clubs', '8'),
 ('clubs', '9'),
 ('clubs', '10'),
 ('clubs', 'J'),
 ('clubs', 'Q'),
 ('clubs', 'K'),
 ('clubs', 'A')]

In [117]:
from itertools import chain, count, groupby

In [118]:
# chain links multiple iterators end-to-end
xs = range(10)
ys = 'abcdef'

list(chain(xs, ys))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 'b', 'c', 'd', 'e', 'f']

In [119]:
def mycat(filenames):
    files = (open(fn) for fn in filenames)
    return chain.from_iterable(files)


In [120]:
for line in mycat(['./data/hamlet.txt', './data/poem.txt']):
    print(line, end='')



The Tragedy of Hamlet, Prince of Denmark Shakespeare homepage |
Hamlet | Entire play ACT I

SCENE I. Elsinore. A platform before the castle.

FRANCISCO at his post. Enter to him BERNARDO BERNARDO Who's there?
FRANCISCO Nay, answer me: stand, and unfold yourself.  BERNARDO
Long live the king!  FRANCISCO Bernardo?  BERNARDO He.  FRANCISCO
You come most carefully upon your hour.  BERNARDO 'Tis now struck
twelve; get thee to bed, Francisco.  FRANCISCO For this relief much
thanks: 'tis bitter cold, And I am sick at heart.  BERNARDO Have
you had quiet guard?  FRANCISCO Not a mouse stirring.  BERNARDO
Well, good night.  If you do meet Horatio and Marcellus, The rivals
of my watch, bid them make haste.  FRANCISCO I think I hear them.
Stand, ho! Who's there?  Enter HORATIO and MARCELLUS

HORATIO Friends to this ground.  MARCELLUS And liegemen to the Dane.
FRANCISCO Give you good night.  MARCELLUS O, farewell, honest
soldier: Who hath relieved you?  FRANCISCO Bernardo has my place.
Give you go

In [121]:
# The Python 3 built-in "zip" lets us iteratively zip multiple iterators. 
#  Useful when building a giant dictionary:
import string
dict(zip(string.ascii_lowercase, string.ascii_uppercase[:10]))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J'}

In [122]:
z = zip(string.ascii_lowercase, string.ascii_uppercase)
next(z)

('a', 'A')

In [123]:
next(z)

('b', 'B')

In [125]:
from itertools import zip_longest

In [126]:
dict(zip_longest(
    string.ascii_lowercase, 
    string.ascii_uppercase[:10]
))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J',
 'k': None,
 'l': None,
 'm': None,
 'n': None,
 'o': None,
 'p': None,
 'q': None,
 'r': None,
 's': None,
 't': None,
 'u': None,
 'v': None,
 'w': None,
 'x': None,
 'y': None,
 'z': None}

In [127]:
dict(zip_longest(
    string.ascii_lowercase, 
    string.ascii_uppercase[:10],
    fillvalue='---'
))

{'a': 'A',
 'b': 'B',
 'c': 'C',
 'd': 'D',
 'e': 'E',
 'f': 'F',
 'g': 'G',
 'h': 'H',
 'i': 'I',
 'j': 'J',
 'k': '---',
 'l': '---',
 'm': '---',
 'n': '---',
 'o': '---',
 'p': '---',
 'q': '---',
 'r': '---',
 's': '---',
 't': '---',
 'u': '---',
 'v': '---',
 'w': '---',
 'x': '---',
 'y': '---',
 'z': '---'}

In [128]:
# count() gives us a simple iterator of consecutive values

for i, letter in zip(count(), string.ascii_letters[:10]):
    print(i, letter)

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j


In [129]:
for i, letter in enumerate(string.ascii_letters[:10]):
    print(i, letter)

0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j


`groupby()` allows us to efficiently group values from an iterator into sub-iterators. For instance, we might have 
some datetime-based data that we wish to convert to date-based data:

In [130]:
from random import random
from datetime import datetime, timedelta

trades = []
dt = datetime(2016, 4, 24)
while dt < datetime(2016,4,27):
    trades.append((dt, random()))
    dt += timedelta(hours=1)
    
print(len(trades))

72


In [131]:
trades[:10]

[(datetime.datetime(2016, 4, 24, 0, 0), 0.4965480338504109),
 (datetime.datetime(2016, 4, 24, 1, 0), 0.5082207971758307),
 (datetime.datetime(2016, 4, 24, 2, 0), 0.2450234498883369),
 (datetime.datetime(2016, 4, 24, 3, 0), 0.838297546710035),
 (datetime.datetime(2016, 4, 24, 4, 0), 0.6786330193227134),
 (datetime.datetime(2016, 4, 24, 5, 0), 0.25430592781025063),
 (datetime.datetime(2016, 4, 24, 6, 0), 0.9676748178083329),
 (datetime.datetime(2016, 4, 24, 7, 0), 0.9713942585612562),
 (datetime.datetime(2016, 4, 24, 8, 0), 0.816414189896801),
 (datetime.datetime(2016, 4, 24, 9, 0), 0.5462580824553875)]

In [132]:
def day_of_trade(item):
    dt, value = item
    return dt.date()

In [133]:
day_of_trade(trades[12])

datetime.date(2016, 4, 24)

In [134]:
for date, date_trades in groupby(trades, key=day_of_trade):
    print(date, date_trades)

2016-04-24 <itertools._grouper object at 0x7f85d019e5b0>
2016-04-25 <itertools._grouper object at 0x7f85c97b7460>
2016-04-26 <itertools._grouper object at 0x7f85d019e6d0>


In [135]:
for date, date_trades in groupby(trades, key=day_of_trade):
    print(date, len(list(date_trades)))

2016-04-24 24
2016-04-25 24
2016-04-26 24


In [136]:
for date, date_trades in groupby(trades, key=day_of_trade):
    date_trades = list(date_trades)
    #print(date, sum(v for dt, v in date_trades) / len(date_trades))
    print(date, sum(t[1] for t in date_trades) / len(date_trades))


2016-04-24 0.5415627081912705
2016-04-25 0.41307765853079337
2016-04-26 0.449290561822655


In [137]:
!cat data/hamlet.txt | uniq | wc -l

2969


In [138]:
!cat data/hamlet.txt | sort | uniq | wc -l

2746


In [139]:
import random
random.shuffle(trades)

for date, date_trades in groupby(trades, key=day_of_trade):
    date_trades = list(date_trades)
    print(date, len(date_trades), sum(v for dt, v in date_trades) / len(list(date_trades)))


2016-04-24 2 0.5920590226694341
2016-04-26 1 0.7135468530186728
2016-04-25 1 0.038578336192201634
2016-04-24 1 0.5082207971758307
2016-04-25 3 0.26502214631305093
2016-04-26 2 0.8049065528203363
2016-04-24 1 0.651734644583197
2016-04-25 1 0.6491482368586244
2016-04-26 1 0.5229336201489253
2016-04-25 2 0.48180896683541335
2016-04-26 2 0.13796322529305965
2016-04-25 2 0.6048998846618946
2016-04-24 1 0.739350066725017
2016-04-26 2 0.3234764396724436
2016-04-24 2 0.16972029857938653
2016-04-26 2 0.06094892609449376
2016-04-24 1 0.9713942585612562
2016-04-26 1 0.35410235582699223
2016-04-25 1 0.02034726730174863
2016-04-24 1 0.08309889934144354
2016-04-25 2 0.4226954160951453
2016-04-24 1 0.4524470824604008
2016-04-25 2 0.6664468376898474
2016-04-24 2 0.31326126321810815
2016-04-25 1 0.14119039322046545
2016-04-26 1 0.8086068040674569
2016-04-25 1 0.14593175504784228
2016-04-26 1 0.6407205093040631
2016-04-24 1 0.9038686056972837
2016-04-25 1 0.038303183838007904
2016-04-24 1 0.596508336540

### If you wish to group *unsorted* data, you should use a `defaultdict` instead.

In [140]:
from collections import defaultdict

date_trades = defaultdict(list)
for dt, value in trades:
    day = dt.date()
    date_trades[day].append(value)

In [141]:
len(date_trades)

3

In [142]:
{day: len(values) for day, values in date_trades.items()}

{datetime.date(2016, 4, 24): 24,
 datetime.date(2016, 4, 26): 24,
 datetime.date(2016, 4, 25): 24}

This is the one place where itertools can use *lots* of memory:

In [143]:
import itertools 
itertools.tee?

In [144]:
with open('./data/hamlet.txt') as f:
    (it1, it2) = itertools.tee(f)
    for line in it2:
        pass
    print(next(it1), end='')    
    print(next(it1), end='')    
    print(next(it1), end='')



The Tragedy of Hamlet, Prince of Denmark Shakespeare homepage |


In [145]:
def uniq_line_count(filename):
    count = 0
    with open(filename) as f:
        cur, prev = itertools.tee(f)
        first_line = next(cur)
        count = 1
        for c, p in zip(cur, prev):
            if c != p:
                count += 1
    return count
                

In [146]:
uniq_line_count('./data/hamlet.txt')

2969

# Lab

Open [Generators and Iterators Lab][iteration-lab]

[iteration-lab]: ./iteration-lab.ipynb