# Itertools #

+ Iterator: object that can be iterated upon + implements the iterator protocol
    + Iterator Protocol = methods iter() and next() OR __iter__() and __next__()

In [26]:
import itertools
import operator

## Iterator Protocol ##

In [20]:
newList = [3, 5, 6, 7, 8]
iter1 = iter(newList)
first = next(iter1)
second = next(iter1)
third = next(iter1)

5


In [22]:
rangeList = range(1, 11)
rangeIter = iter(rangeList)
while True:
    try:
        print(next(rangeIter))
    except StopIteration:
        break

1
2
3
4
5
6
7
8
9
10


## Count, ZipLongest Methods ##

+ itertools.count() ==> returns evenly spaced values starting with number start and incrementing by step (infinite)
+ Built-in zip() function ==> aggregates values from iterables until SHORTEST iterable is exhausted
+ itertools.zip_longest() ==> aggregates values from iterables until LONGEST iterable is exhausted, fills in values for shorter iterable

In [41]:
counter = itertools.count(10, 2)
currNum = next(counter)
while currNum <= 20:
    print(currNum)
    currNum = next(counter)

10
12
14
16
18
20


In [47]:
newData = [100, 200, 300, 400, 500]
pairedData = list(zip(itertools.count(start=10, step=-10), newData))

[(10, 100), (0, 200), (-10, 300), (-20, 400), (-30, 500)]


In [50]:
zipLongestData = list(itertools.zip_longest(newData, range(10), fillvalue=None))
print(zipLongestData)

[(100, 0), (200, 1), (300, 2), (400, 3), (500, 4), (None, 5), (None, 6), (None, 7), (None, 8), (None, 9)]


## Cycle, Repeat, Starmap Method ##

+ itertools.cycle() ==> cycles through iterable infinitely
+ itertools.repeat() ==> repeats input value infinitely or n times ==> supply a stream of constant values to map or zip
+ itertools.starmap() ==> similar to map, used when argument parameters are already grouped in tuples

In [71]:
newCycle = itertools.cycle(["A", "B"])
newMap = list(map(pow, range(1,6), itertools.repeat(2)))
newStarmap = itertools.starmap(pow, zip(range(1,6), itertools.repeat(2))) ## (1,2), (2,2), (3,2), (4,2), (5,2)
list(newStarmap)

[1, 4, 9, 16, 25]

##  Combinations, Permutations, Cartesian Product Methods ##

+ itertools.combinations() ==> r-length tuples in sorted order + order does not matter
+ itertools.permutations() ==> r-length tuples + order does matter (all possible orderings eg. "AB", "BA")
+ itertools.product() ==> cartesian product between two sets (repeat=n for cartesian product with set itself with n instances of set)
+ itertools.combinations_with_replacement() ==> r-length tuples in sorted order with repeated elements (eg, "AA", "BB")

In [62]:
data = [x for x in range(3)] ## 0,1,2
newCombinations = itertools.combinations(["B", "G", "F"], 2)
newCombinationsRep = itertools.combinations_with_replacement(data, 2)
newPermutations = itertools.permutations(data, len(data))
newProduct = itertools.product(data, ["A", "B", "C", "D", "E"])
newSelfProduct = itertools.product(["A", "B", "C", "D", "E"], repeat=2)

[('A', 'A'), ('A', 'B'), ('A', 'C'), ('A', 'D'), ('A', 'E'), ('B', 'A'), ('B', 'B'), ('B', 'C'), ('B', 'D'), ('B', 'E'), ('C', 'A'), ('C', 'B'), ('C', 'C'), ('C', 'D'), ('C', 'E'), ('D', 'A'), ('D', 'B'), ('D', 'C'), ('D', 'D'), ('D', 'E'), ('E', 'A'), ('E', 'B'), ('E', 'C'), ('E', 'D'), ('E', 'E')]


## Chain Method ##

+ itertools.chain() ==> chains together iterables and loops through each iterable when previous has been exhausted

In [4]:
iterable1 = range(4)
iterable2 = ["A", "B", "C", "D"]
iterable3 = ["ABC", "DEF"]

chainIterable = itertools.chain(iterable1, iterable2, iterable3)
print(list(chainIterable))

[0, 1, 2, 3, 'A', 'B', 'C', 'D', 'ABC', 'DEF']


## iSlice Method ##

+ itertools.islice() ==> slices an iterable from start to stop index (if stop = None, iterates until iterable is exhausted)
    + useful for extracting related fields from data where internal structure is flattened (eg, multi-line report may contain a name field on every third line)
    + step ==> slice between start and stop index and returns value at every step

In [10]:
dataRange = range(20)
iSliceIterable = itertools.islice(dataRange, 6, 12) ##start = 6, stop = 10 (end of iterable), step = 2
print(list(iSliceIterable))

[6, 7, 8, 9, 10, 11]


## Compress, FilterFalse Methods ##

+ itertools.compress() ==> returns an iterable that filters elements from iterable that have corresponding element in selectors that evaluates to TRUE
    + stops when either iterable or selectors has been exhausted

+ filter() built-in function ==> returns an iterable containing all values in data that return TRUE when passed into function
+ itertools.filterfalse() ==> returns an iterable containing all values in data that return FALSE when passed into function

In [14]:
dataToCompress = ["A", "B", "C", "D"]
selectors = [True, False, False, True]
newCompress = itertools.compress(dataToCompress, selectors)
newFilter = filter(lambda x: x in ["A", "C"], dataToCompress)
newFilterFalse = itertools.filterfalse(lambda x: x in ["A", "C"], dataToCompress)

['B', 'D']


## DropWhile, TakeWhile Methods ##

+ itertools.dropwhile() ==> returns an iterator that drops values from iterable that return TRUE until function returns FALSE, then returns every value after
+ itertools.takewhile() ==> returns an iterator with values from iterable that evaluate to TRUE in function, then drops every value after a FALSE value 

In [37]:
newNumRange = [0,1,2,3,4,5,6,3,4]
newDropwhile = itertools.dropwhile(lambda x: x<=4, newNumRange)
newTakewhile = itertools.takewhile(lambda x: x<=4, newNumRange)
newstring = "helloworld"
newdropstr = itertools.dropwhile(lambda x: x in ["h", "l", "o"], newstring)

elloworld


## Accumulate Method ##

+ itertools.accumulate() ==> returns an iterable of accumulated sums
    + func argument ==> defauls to addition; also used to return accumulated results of other binary functions (multiplication, max, min)
    + initial argument ==> specifies an initial value to start at, then starts accumulating (adds an extra initial element to output iterable)

In [35]:
accData = [3, 5, 6, 4, 2]
newSum = itertools.accumulate(accData, func=operator.add, initial=50)
newAccProduct = itertools.accumulate(accData, func=operator.mul, initial=2)
newAccMax = itertools.accumulate(accData, func=max, initial=None)

[3, 5, 6, 6, 6]


## GroupBy Method ##

+ itertools.groupby() ==> returns consecutive keys and groups matching key from an iterable (must be sorted)
    + key = Function computing key value for each element
    + returns an iterator of tuples as (key, group); where group is an iterator of all items belonging to key
    + generates a break/new group every time value of key function changes
    + key == None ==> uses element as key

In [54]:
newStr = "AAAAABBBBCCCCAA"
newGroup = itertools.groupby(sorted(newStr), key=None)
groupList = [(key, len((list(group)))) for key, group in newGroup]

## Group all tuples with first value 'A' (key)
newTuples = [('A', 30), ('A', 40), ('B', 30), ('C', 24), ('A', 50)]
tupleGroupby = itertools.groupby(sorted(newTuples, key=lambda x: x[0]), key=lambda x: x[0])
tupleGroupsKeys = [(key, list(group)) for key, group in tupleGroupby]

## Group all hashmaps that belong to a specific key
students = [
    {"mark": 90, "grade": 'A'},
    {"mark": 80, "grade": 'B'},
    {"mark": 64, "grade": 'C'},
    {"mark": 94, "grade": 'A'},
    {"mark": 55, "grade": 'C'},
    {"mark": 82, "grade": 'B'},
]

dictGroupby = itertools.groupby(sorted(students, key=lambda x: x["grade"]), key=lambda x: x["grade"])
dictGroupsList = [(key, len(list(group))) for key, group in dictGroupby]
print(dictGroupsList)

[('A', 2), ('B', 2), ('C', 2)]


## Tee Method ##

+ itertools.tee() ==> returns n independent iterators from a single iterable (copies of original iterable) (default = 2 copies)
    + original iterator should no longer be used (use copies)


In [59]:
teeIter = range(15)
copy1, copy2 = itertools.tee(teeIter)
print(list(copy1), copy2)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] <itertools._tee object at 0x7f94da1abc00>
