# Generadores

Los generadores son funciones que devuelven un iterable "lazy".

`yield` vs `return`?

In [1]:
import sys

def s(obj):
    return f"{round((sys.getsizeof(obj) * 1e-6), ndigits=10)} MB"

In [2]:
from itertools import islice


def take(n, iterable):
    "Return first n items of the iterable as a list"
    return list(islice(iterable, n))

In [3]:
import time


def my_generator():
    for i in range(5):
        time.sleep(1)
        yield i

In [4]:
my_generator()

<generator object my_generator at 0x1090943d0>

In [5]:
generator = my_generator()

In [6]:
for generated in generator:
    print(generated)

0
1
2
3
4


In [121]:
n = 1_000

In [8]:
lista_n = []

for i in range(n):
    lista_n.append(i**2)

In [9]:
s(lista_n)

'0.009032 MB'

In [123]:
lista = [i**2 for i in range(n)]

In [12]:
for i in lista:
    print(i)
    break

0


In [124]:
generador = (i**2 for i in range(n))

In [73]:
def my_generator(n):
    for i in range(n):
        yield i**2

In [115]:
take(2, my_generator(20))

[0, 1]

In [116]:
mygen = my_generator(20)

In [117]:
take(2, mygen)

[0, 1]

In [118]:
for i in generador:
    print(i)
    break

400


In [119]:
next(generador)

441

In [120]:
s(lista)

'8.697472 MB'

In [67]:
s(generador)

'0.000128 MB'

In [71]:
take(10, lista)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [72]:
take(10, generador)

[100, 121, 144, 169, 196, 225, 256, 289, 324, 361]

# Corutinas



**Nota:** las corrutinas es mejor estudiarlas en el contexto de `async` (ver notebook siguiente)

In [6]:
def bare_bones():
    print("Mi primera corutina!")
    while True:
        value = yield
        print(value)


coroutine = bare_bones()

In [7]:
next(coroutine)

Mi primera corutina!


In [8]:
coroutine.send("Toma otro!")

Toma otro!


In [9]:
coroutine.send("Y otro!")

Y otro!


In [10]:
coroutine.close()

In [11]:
coroutine.send("Y ahora?")

StopIteration: 

In [12]:
def bare_bones():
    print("Mi primera corutina!")
    try:
        while True:
            value = yield
            print(value)
    except GeneratorExit:
        print("Fin!!")


coroutine = bare_bones()
next(coroutine)
coroutine.send("Toma otro!")
coroutine.close()

Mi primera corutina!
Toma otro!
Fin!!


In [13]:
def filter_line(cadena: str):
    print("Encendido")
    while True:
        line = yield
        if cadena in line:
            print(line)


cor = filter_line("33")
next(cor)

Encendido


In [16]:
cor.send("Marco, age:33")

Marco, age:33


In [None]:
#cor.send("Jessica, age:24")
cor.send("Marco, age:33") # 33
# cor.send("Filipe, age:55")

In [17]:
def coroutine(func):
    def start(*args, **kwargs):
        cr = func(*args, **kwargs)
        next(cr)
        return cr

    return start


@coroutine
def bare_bones():
    while True:
        value = yield
        print(value)


cor = bare_bones()
cor.send("Using a decorator!")

Using a decorator!


In [18]:
cor.close()

### Pipelines

![](https://stackabuse.s3.amazonaws.com/media/coroutines-in-python-1.png)

> source: https://stackabuse.com/coroutines-in-python/

Las corutinas son candidatas naturales para realizar estas operaciones, pueden pasarse datos entre sí con los métodos `send()` y también pueden servir como "consumidores" finales.

In [19]:
def productor(cor):
    n = 1
    while n < 100:
        cor.send(n)
        n = n * 2


@coroutine
def filtro(num, cor):
    while True:
        n = yield
        if n < num:
            cor.send(n)


@coroutine
def printer():
    while True:
        n = yield
        print(n)


prnt = printer()
filt = filtro(50, prnt)
productor(filt)

1
2
4
8
16
32


* No son "thread safe"
* Una corutina que acepte corutinas como argumentos NO puede enviarse a si misma (coroutine looping)

### Example (beer)

> source?  
> https://www.pybloggers.com/2018/06/python-generators-tutorial/  
> https://www.dataquest.io/blog/python-generators-tutorial/

In [58]:
beer_data = "recipeData.csv"
lines = (line for line in open(beer_data, encoding="ISO-8859-1"))
lists = (l.split(",") for l in lines)

In [59]:
# Take the column names out of the generator and store them, leaving only data
columns = next(lists)

In [60]:
# Take these columns and use them to create an informative dictionary
beerdicts = (dict(zip(columns, data)) for data in lists)

### Current workflow  
  
  


![img](https://i.imgur.com/HV4koXz.jpg)

In [56]:
s(beerdicts)

'0.000128 MB'

In [57]:
next(beerdicts)

{'BeerID': '1',
 'Name': 'Vanilla Cream Ale',
 'URL': '/homebrew/recipe/view/1633/vanilla-cream-ale',
 'Style': 'Cream Ale',
 'StyleID': '45',
 'Size(L)': '21.77',
 'OG': '1.055',
 'FG': '1.013',
 'ABV': '5.48',
 'IBU': '17.65',
 'Color': '4.83',
 'BoilSize': '28.39',
 'BoilTime': '75',
 'BoilGravity': '1.038',
 'Efficiency': '70',
 'MashThickness': 'N/A',
 'SugarScale': 'Specific Gravity',
 'BrewMethod': 'All Grain',
 'PitchRate': 'N/A',
 'PrimaryTemp': '17.78',
 'PrimingMethod': 'corn sugar',
 'PrimingAmount': '4.5 oz',
 'UserId\n': '116\n'}

import itertools
itertools.tee(beerdicts, 4)

In [61]:
beer_counts = {}
for bd in beerdicts:
    if bd["Style"] not in beer_counts:
        beer_counts[bd["Style"]] = 1
    else:
        beer_counts[bd["Style"]] += 1

In [None]:
beer_counts.items()

In [64]:
most_popular = 0
most_popular_type = None

for beer, count in beer_counts.items():
    if count > most_popular:
        most_popular = count
        most_popular_type = beer
        
        
print(most_popular_type)
print(most_popular)

American IPA
11938


We now know that American IPAs are the most popular homebrewed beer in the data set, and we know how many entries they have in the data. We can try figuring out how strong our beer should be. This data is contained in the "ABV" (Alcohol By Volume) key. Since we are working with dictionaries as the output of our generator stream, why don't we add another generator to hone in on the exact values we want to output.

In [91]:
global_var = []

def load():
    lines = (line for line in open("recipeData.csv", encoding="ISO-8859-1"))
    lists = (l.split(",") for l in lines)
    global beerdicts
    beerdicts = (dict(zip(columns, data)) for data in lists)


load()

abv = (float(bd["ABV"]) for bd in beerdicts if bd["Style"] == "American IPA")

### Current workflow
![img](https://i.imgur.com/hPjtGB8.jpg)

In [92]:
average = sum(abv) / most_popular
average

6.444430390350145

![img](https://i.imgur.com/zDmeJgr.jpg)

### Generate infinite data

In [93]:
def alwaysdata():
    while True:
        # code
        # code
        yield 'result'

In [105]:
next(alwaysdata())

'result'

In [106]:
next(alwaysdata())
next(alwaysdata())
next(alwaysdata())
next(alwaysdata())

'result'

In [107]:
next(alwaysdata())

'result'

### "Non-consuming" generators (restarter)

In [None]:
beer_data = "generators/recipeData.csv"


def genfunc():
    for l in open(beer_data, encoding="ISO-8859-1"):
        yield l.split(",")

In [None]:
ahora_si_se_consume = genfunc()

In [None]:
next(genfunc())

### Exercise


* Descargar este archivo: https://raw.githubusercontent.com/realpython/materials/master/generators/techcrunch.csv
* Leer el archivo por lineas
* Separar cada linea en los valores
* Extraer los nombres de las columnas
* Usar estos nombres para crear un generador de diccionarios
* Calcular la suma total de dinero en todas las rondas "A"

* Filtrar donde `round = a` y hacer una suma del `raisedAmt` de todas esas columnas.

In [108]:
import requests

url = "https://raw.githubusercontent.com/realpython/materials/master/generators/techcrunch.csv"
r = requests.get(url)

with open("techcrunch.csv", "wb") as f:
    f.write(r.content)

In [130]:
filename = "techcrunch.csv"
lines = (line for line in open(filename))
listas = (texto.split(",") for texto in lines)

In [135]:
next(listas)

['mycityfaces',
 'MyCityFaces',
 '7',
 'web',
 'Scottsdale',
 'AZ',
 '1-Jan-08',
 '50000',
 'USD',
 'seed\n']

In [155]:
filename = "techcrunch.csv"
lines = (line for line in open(filename))

############   vvvvvv

listas = (texto.rstrip().split(",") for texto in lines)

############   ^^^^^^
##### cuidado que puede haber por ahí
##### algun caracter que nos de problemas
##### revisad el método .rstrip() o incluso un .replace() o .strip()


columnas = next(listas)

In [156]:
companies = (dict(zip(columnas, datos)) for datos in listas)

In [157]:
# next(companies)

💸💸💸💸

In [158]:
money = (
    int(diccionario["raisedAmt"])
    for diccionario in companies
    if diccionario["round"].lower() == "a"
)

In [160]:
money_total_a = sum(money)

In [161]:
money_total_a

4376015000

In [162]:
print(f"💸 El dinero total conseguido en rondas de financiación A = {money_total_a}")

💸 El dinero total conseguido en rondas de financiación A = 4376015000


In [167]:
ronda = "a"
filename = "techcrunch.csv"
lines = (line for line in open(filename))
listas = (texto.rstrip().split(",") for texto in lines)
columnas = next(listas)
companies = (dict(zip(columnas, datos)) for datos in listas)
money = (
    int(diccionario["raisedAmt"])
    for diccionario in companies
    if diccionario["round"].lower() == ronda
)
money_total_a = sum(money)

print(
    f"💸 El dinero total conseguido en rondas de financiación {ronda.upper()} = {money_total_a}"
)

💸 El dinero total conseguido en rondas de financiación A = 4376015000


💸💸💸💸

In [161]:
money_total_a

4376015000

### Más información y fuentes:
* https://stackabuse.com/coroutines-in-python/
* https://realpython.com/introduction-to-python-generators/

Ricardo Ander-Egg Aguilar

* 🖥: https://ricardoanderegg.com/
* 🐦: https://twitter.com/ricardoanderegg
* 👨🏻‍🎓: https://www.linkedin.com/in/ricardoanderegg/