# Iterator / Iterable
Iterable objects in python ecosystem:
- builtin collections and strings: list, tuple, set, dict, str
- scientific data: numpy.ndarray, pandas.DataFrame|Series
- your data

Mechanism:
- builtin iter() => `__iter__()`  (abstract class Iterable)
- builtin next() => `__next__()`  (abstract class Iterator which inherits Iterable)

In [203]:
import numpy as np
import pandas as pd
from collections.abc import Iterable, Iterator, Sized

In [3]:
cities: list[str] = ["Toulouse", "Nice", "Pau"]
cities.append('Montpellier')
cities.extend(['Narbonne', 'Marseille'])
cities[2:2] = ['Lyon', 'Lille']
cities

['Toulouse',
 'Nice',
 'Lyon',
 'Lille',
 'Pau',
 'Montpellier',
 'Narbonne',
 'Marseille']

In [4]:
del cities[3:]
cities

['Toulouse', 'Nice', 'Lyon']

update list:
- methods: append, pop, extend, insert, remove, clear
- instruction: del
- operator[] with index or slice

In [6]:
for city in cities:
    print(city, city.upper(), len(city))

Toulouse TOULOUSE 8
Nice NICE 4
Lyon LYON 4


In [7]:
# everything is not iterable
# for x in 1234:
#     print(x)
# TypeError: 'int' object is not iterable

In [8]:
data = np.random.normal(10, 2.5, 1_000_000)
data

array([ 9.76734732,  9.91082219,  6.37252333, ...,  8.99853937,
       10.97762653,  8.6005564 ])

In [9]:
# NB:not the best way, but it works
sum(data)

10002683.125113327

In [10]:
type(cities)

list

In [11]:
type(data)

numpy.ndarray

In [12]:
data.__iter__

<method-wrapper '__iter__' of numpy.ndarray object at 0x000001D0385F3030>

In [13]:
cities.__iter__

<method-wrapper '__iter__' of list object at 0x000001D03B03F880>

In [14]:
it = iter(cities) # call __iter__
it

<list_iterator at 0x1d03b027970>

In [15]:
assert isinstance(cities, list)
assert isinstance(cities, Iterable)
assert isinstance(data, np.ndarray)
assert isinstance(data, Iterable)
assert isinstance(it, Iterable)
assert isinstance(it, Iterator)

In [16]:
it = iter(cities)
next(it)
sum(len(city) for city in it) # an iterator is iterable

8

In [17]:
len

<function len(obj, /)>

In [18]:
for i, city in enumerate(cities, start=1):
    print(f"- {i:02d}: {city}")  # f-string

- 01: Toulouse
- 02: Nice
- 03: Lyon


In [19]:
# lazy object: wait for iteration
zip(cities, range(100), 'abcdefghijklmnopqrstuvwxyz')

<zip at 0x1d03b06af00>

In [20]:
list(zip(cities, range(100), 'abcdefghijklmnopqrstuvwxyz'))

[('Toulouse', 0, 'a'), ('Nice', 1, 'b'), ('Lyon', 2, 'c')]

In [21]:
for city, i, letter in zip(cities, range(100), 'abcdefghijklmnopqrstuvwxyz'):
    print(f"{letter} - {i} - {city}")

a - 0 - Toulouse
b - 1 - Nice
c - 2 - Lyon


In [22]:
it = iter(zip(cities, range(100), 'abcdefghijklmnopqrstuvwxyz'))
it

<zip at 0x1d03b064500>

In [23]:
# 1 step
next(it)

('Toulouse', 0, 'a')

In [24]:
# 2nd step
# focus on the unpack mechanism
city, i, letter = next(it)
print(f"{letter} - {i} - {city}")

b - 1 - Nice


In [25]:
# without unpack, less readable code
next_element = next(it)
next_element

('Lyon', 2, 'c')

In [26]:
print(f"{next_element[2]} - {next_element[1]} - {next_element[0]}")

c - 2 - Lyon


In [27]:
# NB: done by loop for and all low-level functions doing iterations
try: 
    next(it)
except StopIteration:
    print("I stop with exception: StopIteration")

I stop with exception: StopIteration


In [28]:
z = zip(cities, range(100), 'abcdefghijklmnopqrstuvwxyz')
it = iter(z)
print(z)
print(it)
assert z is it
assert iter(iter(iter(it))) is z

<zip object at 0x000001D03B066FC0>
<zip object at 0x000001D03B066FC0>


In [29]:
print(min(data))
print(max(data))
print(sum(data))

-1.5914648517894996
21.704464082038854
10002683.125113327


In [30]:
print(np.min(data))
print(np.max(data))
print(np.sum(data))

-1.5914648517894996
21.704464082038854
10002683.12511321


In [31]:
print(data.min())
print(data.min())
print(data.sum())

-1.5914648517894996
-1.5914648517894996
10002683.12511321


In [32]:
t1 = 'Pau', '64000', 77_000
t2 = ('Toulouse', '31000', 500_000)
t3: tuple[str, str, int] = 'Nice', '06000', 200_000
for t in t1, t2, t3:
    print('tuple:', t)
    print('1st element:', t[0])
    print('last element:', t[-1])
    print('slice:', t[:2])
    print('type:', type(t))
    print('length:', len(t))
    print('is iterable:', isinstance(t, Iterable))
    print()

tuple: ('Pau', '64000', 77000)
1st element: Pau
last element: 77000
slice: ('Pau', '64000')
type: <class 'tuple'>
length: 3
is iterable: True

tuple: ('Toulouse', '31000', 500000)
1st element: Toulouse
last element: 500000
slice: ('Toulouse', '31000')
type: <class 'tuple'>
length: 3
is iterable: True

tuple: ('Nice', '06000', 200000)
1st element: Nice
last element: 200000
slice: ('Nice', '06000')
type: <class 'tuple'>
length: 3
is iterable: True



In [65]:
# t1[0] = None
# TypeError: 'tuple' object does not support item assignment

In [67]:
set_city = set(cities)
set_city

{'Lyon', 'Nice', 'Toulouse'}

In [73]:
assert isinstance(set_city, Iterable)
print('length:', len(set_city))
for city in set_city:
    print(city)

length: 3
Lyon
Nice
Toulouse


In [77]:
# TypeError: 'set' object is not subscriptable
# set_city[0]

In [81]:
# no doubles
set_city.add('Lyon')
set_city.add('Pau')
set_city

{'Lyon', 'Nice', 'Pau', 'Toulouse'}

In [83]:
cities2 = sorted(set_city)
cities2

['Lyon', 'Nice', 'Pau', 'Toulouse']

## generators

### expression for, map/reduce

In [124]:
# expression for = generator
g = (len(city) for city in set_city)
g

<generator object <genexpr> at 0x000001D03D5F20C0>

In [125]:
# expression is the only paramatyer
sum(len(city) for city in set_city)

19

In [126]:
# if other parameters, keep parenthesis
sum((len(city) for city in set_city), 100)

119

In [127]:
assert isinstance(g, Iterable)
assert isinstance(g, Iterator)
assert not isinstance(g, Sized)

assert iter(g) is g

In [128]:
next(g)

4

In [136]:
# list comprehension
lengths = [len(city) for city in set_city]
lengths

[4, 4, 8, 3]

In [144]:
# dict comprehension
dict_city_length ={city: len(city) for city in set_city}
dict_city_length

{'Lyon': 4, 'Nice': 4, 'Toulouse': 8, 'Pau': 3}

In [158]:
# expression for with filter
[x**2 for x in data if x > 21]

[442.0084319356163,
 470.8017009308806,
 441.69167166113687,
 444.6650120271503,
 471.0837610885147,
 441.77482278273726]

In [162]:
list(
    map(lambda x: x**2,
        filter(lambda x: x>21, data)
    )
)

[442.0084319356163,
 470.8017009308806,
 441.69167166113687,
 444.6650120271503,
 471.0837610885147,
 441.77482278273726]

In [164]:
def filter_greater20(x) -> bool:
    return x > 20

def compute(x):
    return x**2 + 1

In [168]:
list(map(compute, filter(filter_greater20, data)))

[440.36539654634163,
 438.10204342427073,
 408.1677507150128,
 412.3029211950237,
 401.89084265646386,
 436.4509994862474,
 416.90129516910713,
 405.8441677846338,
 436.84545575858283,
 404.88445550291897,
 443.0084319356163,
 403.90904320096155,
 413.31610451716904,
 405.8465230171499,
 422.7798507653327,
 471.8017009308806,
 401.0666463907652,
 401.72584313116806,
 442.69167166113687,
 414.5458007819649,
 401.585711311246,
 403.272893949475,
 411.71487213310473,
 405.7471459058514,
 402.2906901479315,
 445.6650120271503,
 403.56934583691697,
 405.9129830031492,
 416.97664082729926,
 436.7789565864773,
 407.7258324142892,
 404.49392766621804,
 402.3074054678494,
 407.18963941256266,
 417.36534086906437,
 422.1072556564673,
 435.17971236861194,
 418.8244945004565,
 472.0837610885147,
 442.77482278273726,
 408.4269563015705]

### yield

In [173]:
def gen0():
    yield 1
    yield 11
    yield 111
    

In [175]:
for v in gen0():
    print(v)

1
11
111


In [177]:
g = gen0()
g

<generator object gen0 at 0x000001D03D616A30>

In [179]:
next(g)

1

In [181]:
next(g)

11

In [183]:
## infinite generator
def gen1():
    v = 1
    while(True):
        yield v
        v *= 11
    

In [191]:
for v, _ in zip(gen1(), range(10)):
    print(v)

1
11
121
1331
14641
161051
1771561
19487171
214358881
2357947691


In [201]:
s = sum(v for v, _ in zip(gen1(), range(1000)))
s

2469932918005826334124088385085221477709733385238396234869182951830739390375433175367866116456946191973803561189036523363533798726571008961243792655536655282201820357872673322901148243453211756020067624545609411212063417307681204817377763465511222635167942816318177424600927358163388910854695041070577642045540560963004207926938348086979035423732739933235077042750354729095729602516751896320598857608367865475244863114521391548985943858154775884418927768284663678512441565517194156946312753546771163991252528017732162399536497445066348868438762510366191040118080751580689254476068034620047646422315123643119627205531371694188794408120267120500325775293645416335230014278578281272863450085145349124727476223298887655183167465713337723258182649072572861625150703747030550736347589416285606367521524529665763903537989935510874657420361426804068643262800901916285076966174176854351055183740078763891951775452021781225066361670593917001215032839838911476044840388663443684517735022039957481918726697789827

In [207]:
def gen2():
    yield 1
    yield 11
    yield 111
    yield from [1111, 11111, 111111]

In [209]:
list(gen2())

[1, 11, 111, 1111, 11111, 111111]

### Exercise: Fibonacci
yield series 0 1 1 2 3 5 8 ... (no limit or limit number of terms)

In [239]:
def fibo_inf():
    n1 = 0
    n2 = 1
    yield n1
    yield n2
    while True:
        n1, n2 = n2, n1 + n2
        yield n2

def fibo(n: int):
    yield from (v for v, _ in zip(fibo_inf(), range(n)))

def fibo2(n: int):
    g = fibo_inf()
    for _ in range(n):
        yield next(g)

def fibo3(n: int):
    n1 = 0
    n2 = 1
    if n > 0:
        yield n1
    if n > 1:
        yield n2
    for _ in range(n-2):
        n1, n2 = n2, n1 + n2
        yield n2

In [215]:
# call generator function => Generator
g = fibo_inf()

# consume generator (iterate)
for _ in range(10):
    print(next(g))

0
1
1
2
3
5
8
13
21
34


In [227]:
fibo(10)

<generator object fibo at 0x000001D03D450120>

In [233]:
list(fibo(10))

[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]

In [235]:
fibo2(10)

<generator object fibo2 at 0x000001D03D891620>

In [241]:
list(fibo2(10))

[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]

In [243]:
list(fibo3(10))

[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]