### Aggregators

In [1]:
def squares(n):
    for i in range(n):
        yield i ** 2

In [5]:
sq = squares(5)
min(sq), max(sq)  # generator is exhausted by min

ValueError: max() arg is an empty sequence

In [7]:
class Person:
    pass
p = Person()
bool(p)  # default bool of python objects is True

True

In [None]:
class Person:
    def __bool__(self):
        return False
bool(Person())  # now the __bool__ method is referenced for the truth of Person

False

In [11]:
class Person:
    def __len__(self):
        return 0
bool(Person())  # if no __bool__ method exists, the __len__ method is checked for truth of class

False

In [13]:
class MySeq:
    def __init__(self, n):
        self.n = n
    
    def __len__(self):
        return self.n
    
    def __getitem__(self):
        pass

In [14]:
bool(MySeq(0)), bool(MySeq(10))  # while not required, a __len__ method on custom sequence types enforces compliance with Python "truth" standards

(False, True)

In [15]:
any([0, '', None])  # 0, None, empty iterables are falsey

False

In [16]:
all(['hello', 1])

True

In [19]:
from numbers import Number
from decimal import Decimal
isinstance(10, Number)

True

In [21]:
isinstance(Decimal('10.5'), Number)

True

In [26]:
l = [10, 20, 30, 40]

def is_numeric(v):  # define a predicate - takes in a single value and returns T/F
    return isinstance(v, Number)

pred_l = map(is_numeric, l)
list(pred_l)

[True, True, True, True]

In [29]:
list(is_numeric(item) for item in l)

[True, True, True, True]

In [30]:
list(map(lambda x: isinstance(x, Number), l))

[True, True, True, True]

In [31]:
all(pred_l)  # all elements in l are numbers

True

In [32]:
with open('car-brands.txt') as f:
    for row in f:
        print(len(row), row, end='')

11 Alfa Romeo
13 Aston Martin
5 Audi
8 Bentley
5 Benz
4 BMW
8 Bugatti
9 Cadillac
10 Chevrolet
9 Chrysler
8 Citroen
9 Corvette
4 DAF
6 Dacia
7 Daewoo
9 Daihatsu
7 Datsun
10 De Lorean
5 Dino
5 Dodge

In [35]:
# Is every brand at least 3 chars long?
with open('car-brands.txt') as f:
    res = all(map(lambda row: len(row) >= 4, f))  # >= 4 to account for newline char
res
# f is an iterator, so we can use the map fn directly on it to work with all rows rather than iterate one by one

True

In [36]:
with open('car-brands.txt') as f:
    res = any(map(lambda row: len(row) > 10, f))
res

True

In [37]:
# Do the same with a generator
with open('car-brands.txt') as f:
    res = all(len(row) > 10 for row in f)
res

False

### Slicing Iterators

In [38]:
import math

def factorials(n):
    for i in range(n):
        yield math.factorial(i)

In [39]:
facts = factorials(100)
facts[0:2]

TypeError: 'generator' object is not subscriptable

In [40]:
def slice_(iterable, start, stop):
    for _ in range(0, start):
        next(iterable)
    for _ in range(start, stop):
        yield next(iterable)

In [41]:
list(slice_(factorials(100), 0, 10))

[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880]

In [42]:
list(slice_(factorials(100), 3, 10))

[6, 24, 120, 720, 5040, 40320, 362880]

In [43]:
from itertools import islice
list(islice(factorials(100), 3, 10))  # the islice fn handles the start/stop/step behavior we expect for iterables, but on iterators instead. And it evaluates lazily

[6, 24, 120, 720, 5040, 40320, 362880]

In [45]:
list(islice(factorials(100), 3, 10, 2))

[6, 120, 5040, 362880]

### Selecting and Filtering Iterators

In [49]:
def gen_cubes(n):
    for i in range(n):
        print(f'Yielding {i}')
        yield i ** 3

In [52]:
# Retain only odd cubes
filtered = filter(lambda x: x%2, gen_cubes(10))
# Nothing gets printed since the filter fn is lazy

In [54]:
list(filtered)

Yielding 0
Yielding 1
Yielding 2
Yielding 3
Yielding 4
Yielding 5
Yielding 6
Yielding 7
Yielding 8
Yielding 9


[1, 27, 125, 343, 729]

In [59]:
from itertools import filterfalse
evens = filterfalse(lambda x: x%2, gen_cubes(10))  # filter fn keeps true vals, filterfalse keeps false vals
list(evens)  # lazy evaluation

Yielding 0
Yielding 1
Yielding 2
Yielding 3
Yielding 4
Yielding 5
Yielding 6
Yielding 7
Yielding 8
Yielding 9


[0, 8, 64, 216, 512]

In [60]:
from itertools import dropwhile, takewhile
from math import sin, pi

In [66]:
def sine_wave(n):
    start = 0
    max_ = 2 * pi
    step = (max_ - start) / (n - 1)
    for _ in range(n):
        yield round(sin(start), 2)
        start += step

In [67]:
list(sine_wave(15))

[0.0,
 0.43,
 0.78,
 0.97,
 0.97,
 0.78,
 0.43,
 0.0,
 -0.43,
 -0.78,
 -0.97,
 -0.97,
 -0.78,
 -0.43,
 -0.0]

In [69]:
subset = takewhile(lambda x: 0 <= x <= 0.9, sine_wave(15))
list(subset)  # iteration stops the first time we see a false val (i.e. 0.97)

[0.0, 0.43, 0.78]

In [72]:
l = [1,3,5,2,1]
sub = dropwhile(lambda x: x < 5, l)  # drop vals from l until val >= 5
list(sub)

[5, 2, 1]

In [73]:
from itertools import compress
data = ['a', 'b', 'c', 'd', 'e']
selectors = [True, False, 1, 0]  # remaining terms populate as None
list(zip(data, selectors))

[('a', True), ('b', False), ('c', 1), ('d', 0)]

In [74]:
[el for el,truth in zip(data, selectors) if truth]

['a', 'c']

In [75]:
list(compress(data, selectors))

['a', 'c']

### Infinite Iterators

In [76]:
from itertools import count, cycle, repeat, islice

In [77]:
g = count(10)
list(islice(g, 5))

[10, 11, 12, 13, 14]

In [78]:
g = count(1, 0.5)
list(islice(g, 5))

[1, 1.5, 2.0, 2.5, 3.0]

In [79]:
g = count(1+1j, 1+2j)
list(islice(g, 5))

[(1+1j), (2+3j), (3+5j), (4+7j), (5+9j)]

In [81]:
g = cycle(('red', 'green', 'blue'))
list(islice(g, 6))

['red', 'green', 'blue', 'red', 'green', 'blue']

In [82]:
def colors():
    yield 'red'
    yield 'green'
    yield 'blue'

In [84]:
cols = colors()
list(cols), list(cols)  # iterator is exhausted

(['red', 'green', 'blue'], [])

In [86]:
cols = colors()
g = cycle(cols)
list(islice(g, 10))  # cycle does not get "exhausted" when passed an iterator

['red', 'green', 'blue', 'red', 'green', 'blue', 'red', 'green', 'blue', 'red']

In [89]:
from collections import namedtuple

Card = namedtuple('Card', 'rank suit')

def card_deck():
    ranks = tuple(str(num) for num in range(2,11)) + tuple('JQKA')
    suits = ('Spades', 'Hearts', 'Diamonds', 'Clubs')
    for suit in suits:
        for rank in ranks:
            yield Card(rank, suit)

In [90]:
list(islice(card_deck(), 10))

[Card(rank='2', suit='Spades'),
 Card(rank='3', suit='Spades'),
 Card(rank='4', suit='Spades'),
 Card(rank='5', suit='Spades'),
 Card(rank='6', suit='Spades'),
 Card(rank='7', suit='Spades'),
 Card(rank='8', suit='Spades'),
 Card(rank='9', suit='Spades'),
 Card(rank='10', suit='Spades'),
 Card(rank='J', suit='Spades')]

In [91]:
# Deal out 4 hands of cards
hands = [list() for _ in range(4)]
ix = 0
for card in card_deck():
    ix = ix % 4  # to get 0, 1, 2, 3, 0, 1, 2, 3, ...
    hands[ix].append(card)
    ix += 1

hands

[[Card(rank='2', suit='Spades'),
  Card(rank='6', suit='Spades'),
  Card(rank='10', suit='Spades'),
  Card(rank='A', suit='Spades'),
  Card(rank='5', suit='Hearts'),
  Card(rank='9', suit='Hearts'),
  Card(rank='K', suit='Hearts'),
  Card(rank='4', suit='Diamonds'),
  Card(rank='8', suit='Diamonds'),
  Card(rank='Q', suit='Diamonds'),
  Card(rank='3', suit='Clubs'),
  Card(rank='7', suit='Clubs'),
  Card(rank='J', suit='Clubs')],
 [Card(rank='3', suit='Spades'),
  Card(rank='7', suit='Spades'),
  Card(rank='J', suit='Spades'),
  Card(rank='2', suit='Hearts'),
  Card(rank='6', suit='Hearts'),
  Card(rank='10', suit='Hearts'),
  Card(rank='A', suit='Hearts'),
  Card(rank='5', suit='Diamonds'),
  Card(rank='9', suit='Diamonds'),
  Card(rank='K', suit='Diamonds'),
  Card(rank='4', suit='Clubs'),
  Card(rank='8', suit='Clubs'),
  Card(rank='Q', suit='Clubs')],
 [Card(rank='4', suit='Spades'),
  Card(rank='8', suit='Spades'),
  Card(rank='Q', suit='Spades'),
  Card(rank='3', suit='Hearts'),


In [94]:
# repeat the above using the cycle fn
hands = [list() for _ in range(4)]
ix_cycle = cycle([0, 1, 2, 3])

for card in card_deck():
    hands[next(ix_cycle)].append(card)  # much simpler

hands

[[Card(rank='2', suit='Spades'),
  Card(rank='6', suit='Spades'),
  Card(rank='10', suit='Spades'),
  Card(rank='A', suit='Spades'),
  Card(rank='5', suit='Hearts'),
  Card(rank='9', suit='Hearts'),
  Card(rank='K', suit='Hearts'),
  Card(rank='4', suit='Diamonds'),
  Card(rank='8', suit='Diamonds'),
  Card(rank='Q', suit='Diamonds'),
  Card(rank='3', suit='Clubs'),
  Card(rank='7', suit='Clubs'),
  Card(rank='J', suit='Clubs')],
 [Card(rank='3', suit='Spades'),
  Card(rank='7', suit='Spades'),
  Card(rank='J', suit='Spades'),
  Card(rank='2', suit='Hearts'),
  Card(rank='6', suit='Hearts'),
  Card(rank='10', suit='Hearts'),
  Card(rank='A', suit='Hearts'),
  Card(rank='5', suit='Diamonds'),
  Card(rank='9', suit='Diamonds'),
  Card(rank='K', suit='Diamonds'),
  Card(rank='4', suit='Clubs'),
  Card(rank='8', suit='Clubs'),
  Card(rank='Q', suit='Clubs')],
 [Card(rank='4', suit='Spades'),
  Card(rank='8', suit='Spades'),
  Card(rank='Q', suit='Spades'),
  Card(rank='3', suit='Hearts'),


In [96]:
# simplify even further
hands = [list() for _ in range(4)]
hands_cycle = cycle(hands)

for card in card_deck():
    next(hands_cycle).append(card)

hands

[[Card(rank='2', suit='Spades'),
  Card(rank='6', suit='Spades'),
  Card(rank='10', suit='Spades'),
  Card(rank='A', suit='Spades'),
  Card(rank='5', suit='Hearts'),
  Card(rank='9', suit='Hearts'),
  Card(rank='K', suit='Hearts'),
  Card(rank='4', suit='Diamonds'),
  Card(rank='8', suit='Diamonds'),
  Card(rank='Q', suit='Diamonds'),
  Card(rank='3', suit='Clubs'),
  Card(rank='7', suit='Clubs'),
  Card(rank='J', suit='Clubs')],
 [Card(rank='3', suit='Spades'),
  Card(rank='7', suit='Spades'),
  Card(rank='J', suit='Spades'),
  Card(rank='2', suit='Hearts'),
  Card(rank='6', suit='Hearts'),
  Card(rank='10', suit='Hearts'),
  Card(rank='A', suit='Hearts'),
  Card(rank='5', suit='Diamonds'),
  Card(rank='9', suit='Diamonds'),
  Card(rank='K', suit='Diamonds'),
  Card(rank='4', suit='Clubs'),
  Card(rank='8', suit='Clubs'),
  Card(rank='Q', suit='Clubs')],
 [Card(rank='4', suit='Spades'),
  Card(rank='8', suit='Spades'),
  Card(rank='Q', suit='Spades'),
  Card(rank='3', suit='Hearts'),


In [97]:
g = repeat('Python')
list(islice(g, 5))

['Python', 'Python', 'Python', 'Python', 'Python']

In [99]:
el = 'Python'
g = repeat(el)
id(el), [id(i) for i in list(islice(g, 5))]  # repeat gives the same object each time

(2216861860848,
 [2216861860848, 2216861860848, 2216861860848, 2216861860848, 2216861860848])

### Chaining and Teeing
Concatenate multiple iterables together

In [103]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(4, 8))
l3 = (i**2 for i in range(8, 12))

for gen in l1, l2, l3:
    for item in gen:
        print(item, end=' ')

0 1 4 9 16 25 36 49 64 81 100 121 

In [107]:
def chain_iterables(*iterables):
    for iterable in iterables:
        yield from iterable

In [109]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(4, 8))
l3 = (i**2 for i in range(8, 12))

for item in chain_iterables(l1, l2, l3):
    print(item, end=' ')

0 1 4 9 16 25 36 49 64 81 100 121 

In [111]:
# This can be done simply by using the chain fn
from itertools import chain

l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(4, 8))
l3 = (i**2 for i in range(8, 12))

for item in chain(l1, l2, l3):
    print(item, end=' ')

0 1 4 9 16 25 36 49 64 81 100 121 

In [114]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(4, 8))
l3 = (i**2 for i in range(8, 12))

lists = [l1, l2, l3]
for item in chain(lists):
    print(item, end=' ')  # need to pass each iterable as separate args rather than an iterable containing the iterators

<generator object <genexpr> at 0x000002042B7DB4A0> <generator object <genexpr> at 0x000002042B7DB900> <generator object <genexpr> at 0x000002042B7DB9E0> 

In [116]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(4, 8))
l3 = (i**2 for i in range(8, 12))

lists = [l1, l2, l3]
for item in chain(*lists):
    print(item, end=' ')  # unpacking the generators in lists gets the job done

0 1 4 9 16 25 36 49 64 81 100 121 

In [117]:
def squares():
    yield (i**2 for i in range(4))
    yield (i**2 for i in range(4, 8))
    yield (i**2 for i in range(8, 12))

In [118]:
for item in chain(*squares()):
    print(item, end=' ')

0 1 4 9 16 25 36 49 64 81 100 121 

In [119]:
def squares():
    print('yielding first item')
    yield (i**2 for i in range(4))
    print('yielding second item')
    yield (i**2 for i in range(4, 8))
    print('yielding third item')
    yield (i**2 for i in range(8, 12))

def read_values(*args):
    print('finished reading values')

In [120]:
read_values(squares())

finished reading values


In [122]:
read_values(*squares())  # unpacking from the squares fn is NOT lazy

yielding first item
yielding second item
yielding third item
finished reading values


In [123]:
# Solve this by using the from_iterable fn
c = chain.from_iterable(squares())

In [124]:
for item in c:
    print(item, end=' ')

yielding first item
0 1 4 9 yielding second item
16 25 36 49 yielding third item
64 81 100 121 

In [125]:
# We can emulate this behavior in our own fn as follows
def chain_iterables(*iterables):
    for iterable in iterables:
        yield from iterable

def chain_from_iter(iterable):
    for item in iterable:
        yield from item  # note the subtle difference!

In [127]:
for item in chain_from_iter(squares()):
    print(item, end=' ')

yielding first item
0 1 4 9 yielding second item
16 25 36 49 yielding third item
64 81 100 121 

In [128]:
from itertools import tee

In [129]:
def squares(n):
    for i in range(n):
        yield i**2

gen = squares(10)

In [132]:
iters = tee(gen, 3)
iters  # each iterator is a different object

(<itertools._tee at 0x2042b7c7740>,
 <itertools._tee at 0x2042b7c7700>,
 <itertools._tee at 0x2042b7c7fc0>)

In [134]:
iter1, iter2, iter3 = iters
iter1 is iter2

False

In [135]:
next(iter1), next(iter1), next(iter1)

(0, 1, 4)

In [137]:
next(iter2), next(iter2)  # each iterable is an independent copy

(4, 9)

In [141]:
l = [1,2,3,4]
lists = tee(l, 2)
lists[0]

<itertools._tee at 0x2042b79e500>

In [142]:
list(lists[0]), list(lists[0])  # even though l is an iterable, tee creates a tuple of iterators. After iterating through the first copy, it's exhausted

([1, 2, 3, 4], [])

In [144]:
lists[0] is lists[0].__iter__(), '__next__' in dir(lists[0])  # confirms we have an iterator

(True, True)

### Mapping and Reducing

In [1]:
maps = map(lambda x: x**2, range(5))
type(maps)

map

In [5]:
iter(maps) is maps, '__next__' in dir(maps)  # we have an iterator

(True, True)

In [6]:
list(maps)

[0, 1, 4, 9, 16]

In [7]:
def add(t):
    return t[0] + t[1]

list(map(add, [(0,0), [1,1], range(2,4)]))

[0, 2, 5]

In [9]:
def add(x, y):
    return x + y

t = (2,3)
add(*t)

5

In [12]:
list(add(*t) for t in [(0,0), [1,1], range(2,4)])  # unpacking

[0, 2, 5]

In [14]:
# starmap fn can make this unpacking easier
from itertools import starmap

list(starmap(add, [(0,0), [1,1], range(2,4)]))  # apply a fn to an unpacked iterable

[0, 2, 5]

In [15]:
from functools import reduce

In [16]:
reduce(lambda x, y: x*y, [1,2,3,4])  # x=1, y=2 -> x=2, y=3 -> x=6, y=4 -> return 24

24

In [18]:
reduce(lambda x, y: x*y, [1,2,3,4], 10)  # initial value = 10 -> x=10, y=1 -> x=10, y=2 -> ... -> return 240

240

In [19]:
# Create fn that shows intermediate calculations done by reduce fn
def sum_(iterable):
    it = iter(iterable)
    acc = next(it)
    yield acc
    for item in it:
        acc += item
        yield acc

In [21]:
for i in sum_([10, 20, 30]):
    print(i, end=' ')

10 30 60 

In [22]:
def running_reduce(fn, iterable, start=None):
    it = iter(iterable)
    if start is None:
        acc = next(it)
    else:
        acc = start
    yield acc
    for item in it:
        acc = fn(acc, item)
        yield acc

In [24]:
list(running_reduce(lambda x,y: x+y, [10, 20, 30]))  # test with addition, we get same result as before

[10, 30, 60]

In [26]:
import operator  # use operator.sum to replace the lambda fn
list(running_reduce(operator.add, [10,20,30]))

[10, 30, 60]

In [27]:
list(running_reduce(operator.mul, [10,20,30]))  # try with multiplication

[10, 200, 6000]

In [28]:
list(running_reduce(operator.add, [10,20,30], 10))  # add in a start value

[10, 20, 40, 70]

In [29]:
from itertools import accumulate

list(accumulate([10, 20, 30]))  # default operation is addition

[10, 30, 60]

In [30]:
list(accumulate([10, 20, 30], operator.mul))

[10, 200, 6000]

In [31]:
# The accumulate fn doesn't have a start parameter, but we can emulate the behavior using chaining
from itertools import chain

list(chain([10], [1,2,3,4]))

[10, 1, 2, 3, 4]

In [32]:
list(accumulate(chain((10,), [1,2,3,4]), operator.mul))  # and now we've simulated a start value = 10

[10, 10, 20, 60, 240]

### Zipping

In [6]:
l1 = [1,2,3,4,5]
l2 = [1,2,3,4]
l3 = [1,2,3]

results = zip(l1, l2, l3)

In [7]:
iter(results) is results, '__next__' in dir(results)  # returns an iterator

(True, True)

In [8]:
list(results)  # zips on the shortest iterator

[(1, 1, 1), (2, 2, 2), (3, 3, 3)]

In [10]:
from itertools import zip_longest

res = zip_longest(l1, l2, l3)
list(res)  # fills missing vals with None

[(1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, None), (5, None, None)]

In [12]:
res2 = zip_longest(l1, l2, l3, fillvalue='N/A')
list(res2)  # can specify a fill value for missing vals

[(1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 'N/A'), (5, 'N/A', 'N/A')]

### Grouping

In [13]:
import itertools

with open('cars_2014.csv') as f:
    for row in itertools.islice(f, 0, 20):
        print(row, end='')


make,model
ACURA,ILX
ACURA,MDX
ACURA,RDX
ACURA,RLX
ACURA,TL
ACURA,TSX
ALFA ROMEO,4C
ALFA ROMEO,GIULIETTA
APRILIA,CAPONORD 1200
APRILIA,RSV4 FACTORY APRC ABS
APRILIA,RSV4 R APRC ABS
APRILIA,SHIVER 750
ARCTIC CAT,1000 XT
ARCTIC CAT,500 XT
ARCTIC CAT,550 XT
ARCTIC CAT,700 LTD
ARCTIC CAT,700 SUPER DUTY DIESEL
ARCTIC CAT,700 XT
ARCTIC CAT,90 2X4 4-STROKE


In [14]:
# How many models exist for each make?
from collections import defaultdict

In [15]:
makes = defaultdict(int)

with open('cars_2014.csv') as f:
    next(f)  # skip header row
    for row in f:
        make, _ = row.strip('\n').split(',')
        makes[make] += 1

for k,v in makes.items():
    print(k, v)

ACURA 6
ALFA ROMEO 2
APRILIA 4
ARCTIC CAT 96
ARGO 4
ASTON MARTIN 5
AUDI 27
BENTLEY 2
BLUE BIRD 1
BMW 86
BUGATTI 1
BUICK 5
CADILLAC 7
CAN-AM 61
CHEVROLET 33
CHRYSLER 2
DODGE 7
DUCATI 4
FERRARI 6
FIAT 2
FORD 34
FREIGHTLINER 7
GMC 12
HARLEY DAVIDSON 29
HINO 7
HONDA 91
HUSABERG 4
HUSQVARNA 9
HYUNDAI 13
INDIAN 3
INFINITI 8
JAGUAR 9
JEEP 5
JOHN DEERE 19
KAWASAKI 59
KENWORTH 11
KIA 10
KTM 13
KUBOTA 4
KYMCO 28
LAMBORGHINI 2
LAND ROVER 6
LEXUS 14
LINCOLN 6
LOTUS 1
MACK 9
MASERATI 3
MAZDA 5
MCLAREN 2
MERCEDES-BENZ 60
MINI 3
MITSUBISHI 8
NISSAN 24
PEUGEOT 3
POLARIS 101
PORSCHE 4
RAM 6
RENAULT 4
ROLLS ROYCE 3
SCION 5
SEAT 3
SKI-DOO 67
SMART 1
SRT 1
SUBARU 10
SUZUKI 48
TESLA 2
TOYOTA 19
TRIUMPH 10
VESPA 4
VICTORY 14
VOLKSWAGEN 16
VOLVO 8
YAMAHA 110


In [17]:
# Simpilify using groupby
data = (1,2,2,2,3)
list(itertools.groupby(data))  # if you don't specify a key, the data itself is used
# we get the groups and iterators for the elements that belong to each group

[(1, <itertools._grouper at 0x247a258e5e0>),
 (2, <itertools._grouper at 0x247a258e820>),
 (3, <itertools._grouper at 0x247a258eee0>)]

In [18]:
it = itertools.groupby(data)
for group_key, sub_iter in it:
    print(group_key, list(sub_iter))

1 [1]
2 [2, 2, 2]
3 [3]


In [19]:
data = (
    (1, 'abc'),
    (1, 'bcd'),

    (2, 'pyt'),
    (2, 'yth'),
    (2, 'tho'),

    (3, 'hon')
)

In [21]:
groups = itertools.groupby(data, key=lambda x: x[0])
for k,sub_iter in groups:
    print(k, list(sub_iter))

1 [(1, 'abc'), (1, 'bcd')]
2 [(2, 'pyt'), (2, 'yth'), (2, 'tho')]
3 [(3, 'hon')]


In [25]:
with open('cars_2014.csv') as f:
    next(f)
    make_groups = itertools.groupby(f, lambda x: x.split(',')[0])

In [26]:
list(make_groups)  # groupby is a lazy iterator, so make_groups doesn't store actual data since no processing has occurred

ValueError: I/O operation on closed file.

In [27]:
with open('cars_2014.csv') as f:
    next(f)
    make_groups = itertools.groupby(f, lambda x: x.split(',')[0])
    print(list(itertools.islice(make_groups, 1, 5)))

[('ALFA ROMEO', <itertools._grouper object at 0x00000247A24AF700>), ('APRILIA', <itertools._grouper object at 0x00000247A24A4E20>), ('ARCTIC CAT', <itertools._grouper object at 0x00000247A24A4A00>), ('ARGO', <itertools._grouper object at 0x00000247A258EF40>)]


In [29]:
with open('cars_2014.csv') as f:
    next(f)
    make_groups = itertools.groupby(f, lambda x: x.split(',')[0])
    make_counts = ((key, len(models)) for key, models in make_groups)
    print(list(itertools.islice(make_counts, 1, 5)))  # iterators don't have a len method

TypeError: object of type 'itertools._grouper' has no len()

In [31]:
def len_iter(iterator):
    ones = (1 for _ in iterator)
    return sum(ones)

with open('cars_2014.csv') as f:
    next(f)
    make_groups = itertools.groupby(f, lambda x: x.split(',')[0])
    make_counts = ((key, len_iter(models)) for key, models in make_groups)
    print(list(itertools.islice(make_counts, 1, 5)))

[('ALFA ROMEO', 2), ('APRILIA', 4), ('ARCTIC CAT', 96), ('ARGO', 4)]


### Combinatorics

In [37]:
# Cartersian product
from itertools import product, islice

In [38]:
def matrix(n):
    for i in range(1, n+1):
        for j in range(1, n+1):
            yield f'{i} x {j} = {i * j}'

In [39]:
list(islice(matrix(10), 10, 20))

['2 x 1 = 2',
 '2 x 2 = 4',
 '2 x 3 = 6',
 '2 x 4 = 8',
 '2 x 5 = 10',
 '2 x 6 = 12',
 '2 x 7 = 14',
 '2 x 8 = 16',
 '2 x 9 = 18',
 '2 x 10 = 20']

In [41]:
l1 = ['x1', 'x2', 'x3', 'x4']
l2 = ['y1', 'y2', 'y3']

list(product(l1, l2))  # cartesian product finds all combinations of n iterables

[('x1', 'y1'),
 ('x1', 'y2'),
 ('x1', 'y3'),
 ('x2', 'y1'),
 ('x2', 'y2'),
 ('x2', 'y3'),
 ('x3', 'y1'),
 ('x3', 'y2'),
 ('x3', 'y3'),
 ('x4', 'y1'),
 ('x4', 'y2'),
 ('x4', 'y3')]

In [46]:
# rewrite using the product fn
def matrix(n):
    for i,j in product(range(1, n+1), range(1, n+1)):
        yield(i, j, i*j)

In [49]:
list(islice(matrix(10), 5, 15))

[(1, 6, 6),
 (1, 7, 7),
 (1, 8, 8),
 (1, 9, 9),
 (1, 10, 10),
 (2, 1, 2),
 (2, 2, 4),
 (2, 3, 6),
 (2, 4, 8),
 (2, 5, 10)]

In [54]:
from itertools import tee
# rewrite using the product fn with tee
def matrix(n):
    for i,j in product(tee(range(1, n+1), 2)):
        yield(i, j, i*j)

In [55]:
list(matrix(3))  # tee returns a single iterator. We need to "unpack" the iterator of iterators

ValueError: not enough values to unpack (expected 2, got 1)

In [56]:
from itertools import tee
# rewrite using the product fn with tee
def matrix(n):
    for i,j in product(*tee(range(1, n+1), 2)):
        yield(i, j, i*j)

In [57]:
list(matrix(3))  # after unpacking, we have the expected behavior

[(1, 1, 1),
 (1, 2, 2),
 (1, 3, 3),
 (2, 1, 2),
 (2, 2, 4),
 (2, 3, 6),
 (3, 1, 3),
 (3, 2, 6),
 (3, 3, 9)]

In [61]:
# Create a grid in n-D
from itertools import count, takewhile

def grid(min_val, max_val, step, *, num_dimensions=2):
    axis = takewhile(lambda x: x <= max_val, count(min_val, step))
    axes = tee(axis, num_dimensions)
    return product(*axes)

In [60]:
list(grid(-1, 1, 0.5))

[(-1, -1),
 (-1, -0.5),
 (-1, 0.0),
 (-1, 0.5),
 (-1, 1.0),
 (-0.5, -1),
 (-0.5, -0.5),
 (-0.5, 0.0),
 (-0.5, 0.5),
 (-0.5, 1.0),
 (0.0, -1),
 (0.0, -0.5),
 (0.0, 0.0),
 (0.0, 0.5),
 (0.0, 1.0),
 (0.5, -1),
 (0.5, -0.5),
 (0.5, 0.0),
 (0.5, 0.5),
 (0.5, 1.0),
 (1.0, -1),
 (1.0, -0.5),
 (1.0, 0.0),
 (1.0, 0.5),
 (1.0, 1.0)]

In [67]:
# What are the odds of rolling an 8 with 2 dice?

# Brute force solution
sample_space = list(product(*tee(range(1, 7), 2)))
sample_space[:10]

[(1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (2, 1),
 (2, 2),
 (2, 3),
 (2, 4)]

In [70]:
outcomes = list(filter(lambda x: x[0] + x[1] == 8, sample_space))
outcomes

[(2, 6), (3, 5), (4, 4), (5, 3), (6, 2)]

In [71]:
odds = len(outcomes) / len(sample_space)
odds

0.1388888888888889

In [72]:
from fractions import Fraction
odds = Fraction(len(outcomes), len(sample_space))
odds

Fraction(5, 36)

#### Permutations and Combinations

In [74]:
from itertools import permutations  # order is important for permutations

l1 = 'abc'
list(permutations(l1))

[('a', 'b', 'c'),
 ('a', 'c', 'b'),
 ('b', 'a', 'c'),
 ('b', 'c', 'a'),
 ('c', 'a', 'b'),
 ('c', 'b', 'a')]

In [76]:
list(permutations(l1, 2))  # change the # of els in the permutation -> ('a', 'b') and ('b', 'a') are unique

[('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'c'), ('c', 'a'), ('c', 'b')]

In [77]:
# Combinations
from itertools import combinations

list(combinations([1,2,3,4], 2))

[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]

In [79]:
list(combinations([4,3,2,1], 2))  # order matters in combinations -> (4,3) appears in this one, but (3,4) in the previous one

[(4, 3), (4, 2), (4, 1), (3, 2), (3, 1), (2, 1)]

In [82]:
from itertools import combinations_with_replacement
list(combinations_with_replacement([1,2,3,4], 2))  # now we can have element "self"-pairs

[(1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (2, 2),
 (2, 3),
 (2, 4),
 (3, 3),
 (3, 4),
 (4, 4)]

In [96]:
# Calculate the odds of pulling 4 consecutive aces from a deck of cards
from itertools import cycle, chain

SUITS = 'SHDC'
RANKS = tuple(map(str, range(2, 11))) + tuple('JQKA')

deck = [rank + suit for suit in SUITS for rank in RANKS]
deck

['2S',
 '3S',
 '4S',
 '5S',
 '6S',
 '7S',
 '8S',
 '9S',
 '10S',
 'JS',
 'QS',
 'KS',
 'AS',
 '2H',
 '3H',
 '4H',
 '5H',
 '6H',
 '7H',
 '8H',
 '9H',
 '10H',
 'JH',
 'QH',
 'KH',
 'AH',
 '2D',
 '3D',
 '4D',
 '5D',
 '6D',
 '7D',
 '8D',
 '9D',
 '10D',
 'JD',
 'QD',
 'KD',
 'AD',
 '2C',
 '3C',
 '4C',
 '5C',
 '6C',
 '7C',
 '8C',
 '9C',
 '10C',
 'JC',
 'QC',
 'KC',
 'AC']

In [102]:
# Create the deck using a cartesian product
deck = [rank + suit for suit, rank in product(SUITS, RANKS)]
deck[:8]

['2S', '3S', '4S', '5S', '6S', '7S', '8S', '9S']

In [104]:
from collections import namedtuple
Card = namedtuple('Card', 'rank suit')

deck = [Card(rank, suit) for suit,rank in product(SUITS, RANKS)]
deck[:8]

[Card(rank='2', suit='S'),
 Card(rank='3', suit='S'),
 Card(rank='4', suit='S'),
 Card(rank='5', suit='S'),
 Card(rank='6', suit='S'),
 Card(rank='7', suit='S'),
 Card(rank='8', suit='S'),
 Card(rank='9', suit='S')]

In [108]:
deck = (Card(rank, suit) for suit,rank in product(SUITS, RANKS))

sample_space = combinations(deck, 4)

total = 0
acceptable = 0
for outcome in sample_space:
    total += 1
    if all(card.rank == 'A' for card in outcome):
        acceptable += 1
print(f'total={total}, acceptable={acceptable}')
print('odds = {}'.format(Fraction(acceptable, total)))
print('odds = {:.10f}'.format(acceptable / total))

total=270725, acceptable=1
odds = 1/270725
odds = 0.0000036938


In [109]:
deck = (Card(rank, suit) for suit,rank in product(SUITS, RANKS))

sample_space = combinations(deck, 4)

total = 0
acceptable = 0
for outcome in sample_space:
    total += 1
    if all(map(lambda x: x.rank == 'A', outcome)):  # eliminate the loop inside all by using map
        acceptable += 1
print(f'total={total}, acceptable={acceptable}')
print('odds = {}'.format(Fraction(acceptable, total)))
print('odds = {:.10f}'.format(acceptable / total))

total=270725, acceptable=1
odds = 1/270725
odds = 0.0000036938
