#Itertools

Exploring the tools in the libray iterbools

    1. count(start[,step]): generate an iterator that count indefinitely. 
    Because count() doesn't terminate on its own, we need a break condition. 
    

In [20]:
import itertools
for i in itertools.count(3,4):
    print i
    if i > 10:break

3
7
11


    2. cycle(iterable): return elements from the iterable. cycle() doesn't terminate and will run indefinitely. The method caches elements of the iterable, when the iterable is exhausted, cycle() will repeat the elements

In [5]:
a = ['a','b','c']
count = 0
for i in itertools.cycle(a):
    print i
    count +=1
    if count > 2*len(a): break

a
b
c
a
b
c
a


    3. repeat(object[,ntimes]): method return the object over and over again indefinitely or as many times as passed into ntimes. 

In [7]:
# return 10 three times. 
for i in itertools.repeat(10,3):
    print i

10
10
10


    4.chain(*iterables): take in a list of iterables, then chain returns the element of each iterable until all the iterables are exhuasted

In [17]:
for i in itertools.chain([1,2,3,4,5],('a','c','d'),{'this':'that'},'qwertewr'):
    print i

1
2
3
4
5
a
c
d
this
q
w
e
r
t
e
w
r


    5. compress(data,selector):filter element in data that is evaluated to the corresponding element in selector. The iterator is terminated when either data or selector is exhausted. 
    NOTE: data and selector must be iterable. In the example below, the string 'asdfdsf' is iterable: the iterable returns the individual character in the string.  

In [5]:
from itertools import compress

for i in compress('asdfdsf',[True,True,0,False,False,True]):
    print i

def foo():
    # need the while loop here to continue generating True/False ad-infinitum
    while True:
        yield True
        yield False
[elem for elem in compress('1234567890',foo())]

a
s
s


['1', '3', '5', '7', '9']

    6. dropwhile(predicate,iterable): drop all element of iterable such that predicate(elem)==true until the first predicate(elem)==false, then return everything. 
    
    To remember: alternative name: dropwhile_pred_is_true

In [39]:
# while 1 & 2 are less than 3, the iterator dropwhile will suppress them. 
# When number 3 is reached, the foo(3)==false, so everything from 3 to -1 is 
# returned even though 0 & -1 are less than 3 
def foo(myint):
    return myint<3

for i in itertools.dropwhile(lambda x:x<3,[1,2,3,4,5,0,-1]):
    print i

3
4
5
0
-1


    7. takewhile(predicate,iterable): for each element in iterable, if predicate(element)==true, return the element. The moment the predicate(element)==false, the iterator is terminated. 
    To remember: alternative name: take_while_pred_is_true
    Note: takewhile and dropwhile are polar-oposite iterators. 
    Note: for dropwhle and takewhile, alternative way to think about these two iterators. Both iterators wait for a key trigger. When the condition is triggered, either return every thing (dropwhile) or terminate the rest(takewhile).

In [40]:
# because 0,1 & 2 are less than 3, the iterator takewhile return them. 
# When the number 3 is reached, pred(3)==false, the iterator takewhile terminates everything 1 & 2 in the end of 
# the list is less than 3. 
 
for i in itertools.takewhile(lambda x:x<3,[0,1,2,3,1,2,3,1,2]):
    print i

0
1
2


    8. ifilter(predicte,iterable): return each elem of iterable if pred(elem)==true. If predicate is None, then predicate=bool, and ifilter evaluate bool(element). 
    Note: unlike dropwhile, this iterator doesn't terminate until all the element of the iterable is evaluated.  

In [42]:
for i in itertools.ifilter(lambda x:x>3,[0,1,2,3,4,5,0,1,2,10,9,1,2,3,4,5,11]):
    print i

4
5
10
9
4
5
11


    9. ifilterfalse(pred,iterable): return each elem of iterable if pred(elem)==false. If pred is None, then pred=bool and ifilterfalse return elem if bool(elem)==false.

In [44]:
for i in itertools.ifilterfalse(lambda x:x>10,range(1,100)):
    print i

1
2
3
4
5
6
7
8
9
10


    10. groupby(iterable[,key]): the elements in the iterablle are put into a group where key(elem) is the same for all element in the same group. The iterator return a tuple of (groupkey, iterators of the same group)
   ***Important: the iterable must be sorted by the same key function. groupby(sorted(iterable,key),key).***
    
    In the example below, our sortkey function returns the group name that is used as the groupkey. The we use the function mysortkey() to sort our item list. 
    In the loop 'for groupname, groupitems in gb:'. The groupname is the return value of the function mysortkey(); so our groupname is either 'div3','div2' or 'notdiv'. The second element 'groupitems' is an iterator that lists the group members that belong to the each groupname. 

In [7]:
# in this script, we are grouping about numbers by one of of divisible by three, by two or not at all.
from itertools import groupby
def mysortkey(x):
    if x % 3 == 0:
        return 'div3'
    elif x % 2 == 0:
        return 'div2'
    else:
        return 'notdiv'


def groupby_even_odd(items, sortkey):
    gb = groupby(sorted(items, key=sortkey), sortkey)
    for groupname, groupitems in gb:
        grouplist = [i for i in groupitems]
        print groupname, grouplist

mylist = sorted([1, 3, 4, 5, 3, 2, 1, 3, 4, 5, 9, 10, 11, 12, 13, 14])

groupby_even_odd(mylist, mysortkey)


div2 [2, 4, 4, 10, 14]
div3 [3, 3, 3, 9, 12]
notdiv [1, 1, 5, 5, 11, 13]


    11. tee(iterable[,n=2]): the doc describes this as: take an iterator and return n iterators. 
        In other words, itertools.tee takes the iterable, and returns it n times***. 

In [17]:
# the 'elem' is an iterator; it has the same data as myiterable
myiterable = [1,2,3,4,5]
for elem in itertools.tee(myiterable,5):
    if elem is myiterable:print 'true'
    else:print 'false'
    print tuple(e for e in elem)

false
(1, 2, 3, 4, 5)
false
(1, 2, 3, 4, 5)
false
(1, 2, 3, 4, 5)
false
(1, 2, 3, 4, 5)
false
(1, 2, 3, 4, 5)


    12. imap(func,*iterables): appy function to each element in each iterable of *iterables. If func is None, func=tuple(). Terminate when the shortest iterable is exhausted. 
    imap(func,iter_1,iter_2,...inter_n) = 
      iter(func(iter_1[0],iter_2[0]),...,inter_n[0]) , func(iter_1[1],iter_2[1],...,iter_n[1])...)
    
    Unlike map(), imap terminates when the shortest iterable is exhausted instead of returning None. 
    
    imap(func,*iterables) is equilvalent to this list comprehension
    [func(x1,x2,...xn) for x1,x2,...xn in zip(iter_1,iter_2,...,iter_n)]

In [51]:
# our function takes two arguments and return a string by joining the two argument together. 
# we pass two iterables into imap. Because one of the iterables has len==2, we only see two results. 
myiter1=[1,2,3]
myiter2=['a','b']
def myfunc(x,y):
    return '->'.join([str(x),y])

for elem in itertools.imap(myfunc,[1,2,3],['a','b']):
    print elem

# this list comprehension is equilvalent to imap
print [myfunc(x1,x2) for x1,x2 in zip(myiter1,myiter2)]

1->a
2->b
['1->a', '2->b']


    13. starmap(func,iterable_of_iterables): for each element in iterable_of_iterables, return func(*element). 

In [44]:
# we have a list of tuple (2,5) and tuple (3,6)
# when starmap is called, we take each elemtn of the tuple 
myiterables = [(2,5),(3,6)]
print list (itertools.starmap(pow,myiterables))

# this list comprehension is equilvalent to starmap
print [pow(*element) for element in myiterables]

[32, 729]
[32, 729]


    14. izip(*iterables): return an iterator instead of a list like zip() does. the itertor aggregate the elements of the iterables. izip terminates when the shortest iterable is exhausted. 

In [4]:
import itertools
for i in itertools.izip('this is a string','that is not a string',[1,2,3,4,5,6,7]):
    print i

('t', 't', 1)
('h', 'h', 2)
('i', 'a', 3)
('s', 't', 4)
(' ', ' ', 5)
('i', 'i', 6)
('s', 's', 7)


    15. izip_longest(*iterables[,fillvalue]): return an iterator that aggregates the elements of *iterables. For shorter iterable in *iterables, the trailing elements are filled with fillvalue argument; if fillvalue is not defined, None is used. Continue until the longest iterable is exhausted.

In [7]:
for i in itertools.izip_longest([1,2,3,4,5],'zsdf'):
    print i
    
for j in itertools.izip_longest('go to hell','no',fillvalue='hehe'):
    print j

(1, 'z')
(2, 's')
(3, 'd')
(4, 'f')
(5, None)
('g', 'n')
('o', 'o')
(' ', 'hehe')
('t', 'hehe')
('o', 'hehe')
(' ', 'hehe')
('h', 'hehe')
('e', 'hehe')
('l', 'hehe')
('l', 'hehe')


    16. product(*iterables): the same as nexting multiple for loops between each iterable in *iterables. Return tuple

In [7]:
from itertools import product,izip,count
myproduct = product('99,111,90',(1,2,3),{'this':'that'})
myzip = izip(count(1),myproduct)
for pos,elem in myzip:
    print '%d) %s'%(pos,elem)

1) ('9', 1, 'this')
2) ('9', 2, 'this')
3) ('9', 3, 'this')
4) ('9', 1, 'this')
5) ('9', 2, 'this')
6) ('9', 3, 'this')
7) (',', 1, 'this')
8) (',', 2, 'this')
9) (',', 3, 'this')
10) ('1', 1, 'this')
11) ('1', 2, 'this')
12) ('1', 3, 'this')
13) ('1', 1, 'this')
14) ('1', 2, 'this')
15) ('1', 3, 'this')
16) ('1', 1, 'this')
17) ('1', 2, 'this')
18) ('1', 3, 'this')
19) (',', 1, 'this')
20) (',', 2, 'this')
21) (',', 3, 'this')
22) ('9', 1, 'this')
23) ('9', 2, 'this')
24) ('9', 3, 'this')
25) ('0', 1, 'this')
26) ('0', 2, 'this')
27) ('0', 3, 'this')


    17. combinations(iterable,r): return a tuple of length 'r', the tuple contains elements from iterable
  * return according to the lexicographical sort order. The argument 'r' is NOT optional
  * only return unique combinations where order matters. If ordering doesn't matter, use permutations()

In [8]:
from itertools import combinations,permutations
a = [1,2,3,4,5]
for elem in combinations('abcd',2):
    print elem
print ''
for elem in permutations('abcd',2):
    print elem

('a', 'b')
('a', 'c')
('a', 'd')
('b', 'c')
('b', 'd')
('c', 'd')

('a', 'b')
('a', 'c')
('a', 'd')
('b', 'a')
('b', 'c')
('b', 'd')
('c', 'a')
('c', 'b')
('c', 'd')
('d', 'a')
('d', 'b')
('d', 'c')
