Comprehensions are a way of creating iterable data structures.

Style: declarative, functional.

Types of comprehensions:
* list:    `[expr(item) for item in iterable]`
* set:    `{expr(item) for item in iterable}`
* dict:    `{key_expr:value_expr for item in iterable}`

In [17]:
words = "Why sometimes I have believed as many \
        as six impossible things before breakfast".split()
[len(word) for word in words]

# Similar to
lengths = []
for word in words:
    lengths.append(len(word))
print(lengths)

from math import factorial
f = [len(str(factorial(x))) for x in range(20)] # Find # of digits in fact
print(f, type(f))

[3, 9, 1, 4, 8, 2, 4, 2, 3, 10, 6, 6, 9]
[1, 1, 1, 1, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 18] <class 'list'>


In [18]:
# Eliminating duplicates
{len(str(factorial(x))) for x in range(20)}

{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 18}

In [23]:
from pprint import pprint as pp
country_to_capital = {'UK': 'London',
                      'Brazil': 'Brazilia',
                      'Morocco': 'Rabat',
                      "Sweden": 'Stockholm'}
# Convert country-to-cap to cap-to-country
capital_to_country = {capital: country for 
                      country, capital in country_to_capital.items()}
pp(capital_to_country)

words = ["hi", "hello", "foxtrot", "hotel"]
{x[0]: x for x in words} # later keys overwrite earlier keys

{'Brazilia': 'Brazil',
 'London': 'UK',
 'Rabat': 'Morocco',
 'Stockholm': 'Sweden'}


{'f': 'foxtrot', 'h': 'hotel'}

In [25]:
# Do not cram too much complexity into comprehensions!
# Better create a separate function
import os
import glob

file_sizes = {os.path.realpath(p): os.stat(p).st_size
              for p in glob.glob('*.py')}
pp(file_sizes)

{'d:\\code\\sandbox\\anaconda\\[pluralsight] Python fundamentals\\exceptional.py': 1006,
 'd:\\code\\sandbox\\anaconda\\[pluralsight] Python fundamentals\\roots.py': 1162,
 'd:\\code\\sandbox\\anaconda\\[pluralsight] Python fundamentals\\word_list.py': 260,
 'd:\\code\\sandbox\\anaconda\\[pluralsight] Python fundamentals\\words.py': 990}


In [9]:
from math import sqrt

def is_prime(x):
    if x<2:
        return False
    for i in range(2, int(sqrt(x))+1):
        if x%i==0:
            return False
    return True

# Filtering clause if ...
primes=[x for x in range(101) if is_prime(x)]
print(primes)

prime_square_divisors = {x*x:(1,x,x*x) for x in range(101) if is_prime(x)}
print()
pp(prime_square_divisors)

[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]



NameError: name 'pp' is not defined

# Iteration protocols
* Iterable protocol: `iterator = iter(iterable)` (can be passed to the built-in `iter()` function to get an iterator object)
* Iterator protocol: `item = next(iterator)` (can be passed to the built-in `next()` function to fetch the next item)

In [41]:
iterable = ['Spring', 'Summer', 'Autumn', 'Winter']
iterator=iter(iterable)
next(iterator) # Spring
next(iterator) # Summer
next(iterator)
next(iterator)
next(iterator) # Exception: StopIteration

StopIteration: 

In [45]:
def first(iterable):
    iterator = iter(iterable)
    try:
        return next(iterator)
    except StopIteration:
        raise ValueError("iterable is empty")
        
print(first(["1st","2nd"]))
first(set())

1st


ValueError: iterable is empty

# Generators
* Iterators specifying *iterable sequences*
* Lazily evaluated: the next value provided on demand
* Can model infinite sequences (such as data streams)
* are composable into pipelines (for natural stream processing)

In [44]:
# Define generator
def gen123():
    yield 1
    yield 2
    yield 3
    #return # implicit
    
g = gen123() # Each generator's call return separator iterator object 
print(g)
next(g)
next(g)
next(g)
#next(g) # StopIteration

for v in gen123(): # no StopIteration, because it is handled
    print(v)
    
h=gen123()
i=gen123()
print(h is i, h==i)
next(i)
next(i)
next(h)

<generator object gen123 at 0x0000007C800EC4C0>
1
2
3
False False


1

In [70]:
def gen246():
    print("About to yield 2")
    yield 2
    print("About to yield 4")
    yield 4
    print("About to yield 6")
    yield 6
    print("About to return")
    
g = gen246()
next(g)
next(g) #Conitnue running until next yield
next(g)

About to yield 2
About to yield 4
About to yield 6


6

## Stateful generators
* Maintain state in local variables
* Generators can resume execution
* Complex control flow
* Lazy evaluation

In [41]:
# Infinite sequence
def lucas():
    yield 2
    a = 2
    b = 1
    while True:
        yield b
        a, b = b, a + b # tuple unpacking
        
for x, i in zip(lucas(),range(10)):  # zip constructs a tuple generator
    print(x)
    if i==10:
        break

2
1
3
4
7
11
18
29
47
76


## Generator comprehensions
`(expr(item) for item in iterable)`
* Similar syntax to list comprehensions
* Create a generator object
* Lazy evaluation

In [11]:
million_squares = (x*x for x in range(1,100001)) # Lazy generation
list(million_squares) # 40 MB of memory for 1M
list(million_squares) # exhausted iterator

sum(x*x for x in range(1,10000001))
sum(x for x in range(1001) if is_prime(x)) # sum of prime integers <=1000

76127

## itertools module

In [21]:
from itertools import islice, count

# islice(all_primes, 1000)
thousand_primes=islice((x for x in count() if is_prime(x)),1000)
print(thousand_primes)
print(list(thousand_primes))
sum(thousand_primes) # 0 - iterator is exhausted
sum(islice((x for x in count() if is_prime(x)),1000))

<itertools.islice object at 0x0000007C800D6638>
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, 11

3682913

In [25]:
any([False, False, True]) # OR
all([False, False, True]) # AND

print(any(is_prime(x) for x in range(1328,1361)))
print(all(name==name.title() for name in ['London','New York']))

False
True


In [28]:
sunday = [12, 14, 15, 15, 17, 21, 22, 22, 23, 22, 20, 18]
monday = [13, 14, 14, 14, 16, 20, 21, 22, 23, 22, 19, 17]

# zip(sunday, monday) - generates tuples
for sun, mon in zip(sunday, monday):
    print("average=", "{:4.1f}".format((sun+mon)/2))

average= 12.5
average= 14.0
average= 14.5
average= 14.5
average= 16.5
average= 20.5
average= 21.5
average= 22.0
average= 23.0
average= 22.0
average= 19.5
average= 17.5


In [32]:
from itertools import chain
temperatures = chain(sunday, monday) # concatenate in a single list (gen)

all(t>0 for t in temperatures)

True

In [43]:
# Infinitely find prime numbers in lucas series
for x in (p for p in lucas() if is_prime(p)):
    print(x)

2
3
7
11
29
47
199
521
2207
3571
9349
3010349
54018521
370248451
6643838879
119218851371
5600748293801
688846502588399


KeyboardInterrupt: 