# Generadores

Los generadores son funciones que devuelven un iterable "lazy".

`yield` vs `return`?

In [1]:
import sys

def s(obj):
    return f"{round((sys.getsizeof(obj) * 1e-6), ndigits=10)} MB"

In [2]:
from itertools import islice


def take(n, iterable):
    "Return first n items of the iterable as a list"
    return list(islice(iterable, n))

In [3]:
import time


def my_generator():
    for i in range(5):
        time.sleep(1)
        yield i

In [4]:
my_generator()

<generator object my_generator at 0x10c0dc4a0>

In [7]:
next(my_generator())

0

In [8]:
generator = my_generator()

In [9]:
for g in generator:
    print(g)

0
1
2
3
4


In [10]:
next(generator)

StopIteration: 

In [62]:
# lista_n = []

# for i in range(n):
#     lista_n.append(i**2)

In [18]:
# for i in lista:
#     print(i)
#     break

In [37]:
next(generador)

4

In [38]:
def my_generator(n):
    for i in range(n):
        yield i**2

In [42]:
take(2, my_generator(20))

[0, 1]

In [43]:
mygen = my_generator(20)

In [50]:
take(5, mygen)

[144, 169, 196, 225, 256]

In [51]:
# for i in generador:
#     print(i)
#     break

In [119]:
next(generador)

441

In [119]:
n = 10_000_000

In [120]:
generador = (i**2 for i in range(n))

In [121]:
lista = [i**2 for i in range(n)]

In [122]:
s(lista)

'81.528048 MB'

In [123]:
s(generador)

'0.000112 MB'

In [89]:
take(10, lista)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [115]:
take(10, generador)

[62500, 63001, 63504, 64009, 64516, 65025, 65536, 66049, 66564, 67081]

# Corutinas



**Nota:** las corrutinas es mejor estudiarlas en el contexto de `async` (ver notebook siguiente)

In [124]:
def bare_bones():
    print("Mi primera corutina!")
    while True:
        value = yield
        print(value)


coroutine = bare_bones()

In [126]:
next(coroutine)

Mi primera corutina!


In [127]:
coroutine.send("hola")

hola


In [128]:
coroutine.send("Toma otro!")

Toma otro!


In [129]:
coroutine.send("Y otro!")

Y otro!


In [130]:
coroutine.close()

In [131]:
coroutine.send("Y ahora?")

StopIteration: 

In [157]:
def b2():
    print("Mi primera corutina!")
    while True:
        myc = yield
        print(myc.hola())


c2 = b2()

In [158]:
c2.send(None)

Mi primera corutina!


In [168]:
def f():
    print(1)

In [160]:
class C:
    def __init__(self, t):
        self.t = t
    def hola(self):
        print(self.t)

In [166]:
c = C("cascascas")

In [167]:
c2.send(c)

cascascas
None


In [169]:
def bare_bones():
    print("Mi primera corutina!")
    try:
        while True:
            value = yield
            print(value)
    except GeneratorExit:
        print("Fin!!")


coroutine = bare_bones()
# next(coroutine)
coroutine.send(None)
coroutine.send("Toma otro!")
coroutine.close()

Mi primera corutina!
Toma otro!
Fin!!


In [170]:
def filter_line(cadena: str):
    print("Encendido")
    while True:
        line = yield
        if cadena in line:
            print(line)


cor = filter_line("33")
next(cor)

Encendido


In [171]:
cor.send("Marco, age:33")

Marco, age:33


In [173]:
cor.send("Jessica, age:24")
cor.send("Marco, age:33") # 33
cor.send("Filipe, age:55")

Marco, age:33


In [174]:
def coroutine(func):
    def start(*args, **kwargs):
        cr = func(*args, **kwargs)
        next(cr)
        return cr

    return start


@coroutine
def bare_bones():
    while True:
        value = yield
        print(value)


cor = bare_bones()
cor.send("Using a decorator!")

Using a decorator!


In [18]:
cor.close()

### Pipelines

![](https://stackabuse.s3.amazonaws.com/media/coroutines-in-python-1.png)

> source: https://stackabuse.com/coroutines-in-python/

Las corutinas son candidatas naturales para realizar estas operaciones, pueden pasarse datos entre sí con los métodos `send()` y también pueden servir como "consumidores" finales.

In [175]:
def productor(cor):
    n = 1
    while n < 100:
        cor.send(n)
        n = n * 2


@coroutine
def filtro(num, cor):
    while True:
        n = yield
        if n < num:
            cor.send(n)


@coroutine
def printer():
    while True:
        n = yield
        print(n)


prnt = printer()
filt = filtro(50, prnt)
productor(filt)

1
2
4
8
16
32


* No son "thread safe"
* Una corutina que acepte corutinas como argumentos NO puede enviarse a si misma (coroutine looping)

## Ejercicio: crear varios generadores:

1. Cada linea del archivo
2. Cada elemento del generador `1`, convertido a numero entero
3. Cada elemento al cuadrado `**2` del generador `2`

  * ¿Cuánto suman todos esos números que hemos elevado al cuadrado?

4. Crear una función generadora que me devuelva todos los múltiplos (infinitos) de un número.

In [None]:
from base64 import b64decode, b64encode
import requests

u1 = "aHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL3BvbHlyYW5kL2FkdmVudG9mY29kZV8yMDIwL21haW4vZDAxL2lucHV0X3AxLnR4dA=="


with open("input.txt", "w") as f:
    f.write(requests.get(b64decode(u1).decode()).text)


with open("input.txt") as f:
    data = f.read()

### Example (beer)

> source?  
> https://www.pybloggers.com/2018/06/python-generators-tutorial/  
> https://www.dataquest.io/blog/python-generators-tutorial/

In [181]:
import requests

response = requests.get("https://raw.githubusercontent.com/thecbp/blog_data/master/recipeData.csv")
response.raise_for_status()
text = response.text

with open("recipeData.csv", "w") as f:
    f.write(text)

In [182]:
beer_data = "recipeData.csv"
lines = (line for line in open(beer_data, encoding="ISO-8859-1"))
lists = (l.split(",") for l in lines)

In [183]:
# Take the column names out of the generator and store them, leaving only data
columns = next(lists)

In [190]:
dict(zip([1,2,3], [6,7,8]))

{1: 6, 2: 7, 3: 8}

In [185]:
# Take these columns and use them to create an informative dictionary
beerdicts = (dict(zip(columns, data)) for data in lists)

### Current workflow  
  
  


![img](https://i.imgur.com/HV4koXz.jpg)

In [191]:
s(beerdicts)

'0.000112 MB'

In [57]:
next(beerdicts)

{'BeerID': '1',
 'Name': 'Vanilla Cream Ale',
 'URL': '/homebrew/recipe/view/1633/vanilla-cream-ale',
 'Style': 'Cream Ale',
 'StyleID': '45',
 'Size(L)': '21.77',
 'OG': '1.055',
 'FG': '1.013',
 'ABV': '5.48',
 'IBU': '17.65',
 'Color': '4.83',
 'BoilSize': '28.39',
 'BoilTime': '75',
 'BoilGravity': '1.038',
 'Efficiency': '70',
 'MashThickness': 'N/A',
 'SugarScale': 'Specific Gravity',
 'BrewMethod': 'All Grain',
 'PitchRate': 'N/A',
 'PrimaryTemp': '17.78',
 'PrimingMethod': 'corn sugar',
 'PrimingAmount': '4.5 oz',
 'UserId\n': '116\n'}

import itertools
itertools.tee(beerdicts, 4)

In [192]:
beer_counts = {}
for bd in beerdicts:
    if bd["Style"] not in beer_counts:
        beer_counts[bd["Style"]] = 1
    else:
        beer_counts[bd["Style"]] += 1

In [194]:
beer_counts.items()

dict_items([('Holiday/Winter Special Spiced Beer', 331), ('American IPA', 11940), ('Belgian Blond Ale', 496), ('American Pale Ale', 7581), ('Imperial IPA', 1478), ('Cream Ale', 829), ('Robust Porter', 897), ('Bohemian Pilsener', 344), ('Saison', 2617), ('Northern English Brown', 284), ('English IPA', 784), ('Traditional Bock', 102), ('Premium American Lager', 108), ('Belgian Golden Strong Ale', 354), ('N/A', 596), ('Double IPA', 864), ('Blonde Ale', 1753), ('Light American Lager', 72), ('German Pilsner (Pils)', 466), ('American Brown Ale', 1152), ('Oatmeal Stout', 961), ('Specialty Beer', 430), ('American Amber Ale', 2038), ('Kï¿½lsch', 869), ('Witbier', 1072), ('Weizen/Weissbier', 919), ('Trappist Single', 123), ('Russian Imperial Stout', 929), ('Specialty IPA: Black IPA', 638), ('Sweet Stout', 919), ('Strong Scotch Ale', 230), ('Belgian Tripel', 563), ('American Stout', 1268), ('Belgian Pale Ale', 625), ('Dark American Lager', 58), ('Dry Stout', 484), ('Belgian Dark Strong Ale', 538)

In [195]:
most_popular = 0
most_popular_type = None

for beer, count in beer_counts.items():
    if count > most_popular:
        most_popular = count
        most_popular_type = beer
        
        
print(most_popular_type)
print(most_popular)

American IPA
11940


We now know that American IPAs are the most popular homebrewed beer in the data set, and we know how many entries they have in the data. We can try figuring out how strong our beer should be. This data is contained in the "ABV" (Alcohol By Volume) key. Since we are working with dictionaries as the output of our generator stream, why don't we add another generator to hone in on the exact values we want to output.

In [218]:
global_var = []

def load():
    lines = (line for line in open("recipeData.csv", encoding="ISO-8859-1"))
    lists = (l.split(",") for l in lines)
    global beerdicts
    beerdicts = (dict(zip(columns, data)) for data in lists)


load()

In [219]:
abv = (float(bd["ABV"]) for bd in beerdicts if bd["Style"] == "American IPA")

### Current workflow
![img](https://i.imgur.com/hPjtGB8.jpg)

In [220]:
average = sum(abv) / most_popular
average

6.44429396984925

![img](https://i.imgur.com/zDmeJgr.jpg)

### Generate infinite data

In [93]:
def alwaysdata():
    while True:
        # code
        # code
        yield 'result'

In [105]:
next(alwaysdata())

'result'

In [106]:
next(alwaysdata())
next(alwaysdata())
next(alwaysdata())
next(alwaysdata())

'result'

In [107]:
next(alwaysdata())

'result'

### "Non-consuming" generators (restarter)

In [None]:
beer_data = "generators/recipeData.csv"


def genfunc():
    for l in open(beer_data, encoding="ISO-8859-1"):
        yield l.split(",")

In [None]:
ahora_si_se_consume = genfunc()

In [None]:
next(genfunc())

### Exercise


* Descargar este archivo: https://raw.githubusercontent.com/realpython/materials/master/generators/techcrunch.csv
* Leer el archivo por lineas
* Separar cada linea en los valores
* Extraer los nombres de las columnas
* Usar estos nombres para crear un generador de diccionarios
* Calcular la suma total de dinero en todas las rondas "A"

* Filtrar donde `round = a` y hacer una suma del `raisedAmt` de todas esas columnas.

In [230]:
import requests

url = "https://raw.githubusercontent.com/realpython/materials/master/generators/techcrunch.csv"
r = requests.get(url)

with open("techcrunch.csv", "wb") as f:
    f.write(r.content)

In [233]:
filename = "techcrunch.csv"
lines = (line for line in open(filename))
listas = (texto.split(",") for texto in lines)

In [232]:
next(listas)

['permalink',
 'company',
 'numEmps',
 'category',
 'city',
 'state',
 'fundedDate',
 'raisedAmt',
 'raisedCurrency',
 'round\n']

In [270]:
filename = "techcrunch.csv"
lines = (line for line in open(filename))

############   vvvvvv

listas = (texto.rstrip().split(",") for texto in lines)

############   ^^^^^^
##### cuidado que puede haber por ahí
##### algun caracter que nos de problemas
##### revisad el método .rstrip() o incluso un .replace() o .strip()


columnas = next(listas)

In [271]:
companies = (dict(zip(columnas, datos)) for datos in listas)

In [272]:
dinero = []
for diccionario in companies:
    try:
        cantidad = int(diccionario["raisedAmt"])
        if diccionario["round"].lower() == "a":
            dinero.append(cantidad)
    except:
        continue

In [240]:
next(companies)

{'permalink': 'lifelock',
 'company': 'LifeLock',
 'numEmps': '',
 'category': 'web',
 'city': 'Tempe',
 'state': 'AZ',
 'fundedDate': '1-May-07',
 'raisedAmt': '6850000',
 'raisedCurrency': 'USD',
 'round': 'b'}

💸💸💸💸

In [243]:
money = (
    int(diccionario["raisedAmt"])
    for diccionario in companies
    if diccionario["round"].lower() == "a"
)

In [244]:
money_total_a = sum(money)

In [245]:
money_total_a

4376015000

In [246]:
print(f"💸 El dinero total conseguido en rondas de financiación A = {money_total_a}")

💸 El dinero total conseguido en rondas de financiación A = 4376015000


In [247]:
ronda = "b"
filename = "techcrunch.csv"
lines = (line for line in open(filename))
listas = (texto.rstrip().split(",") for texto in lines)
columnas = next(listas)
companies = (dict(zip(columnas, datos)) for datos in listas)
money = (
    int(diccionario["raisedAmt"])
    for diccionario in companies
    if diccionario["round"].lower() == ronda
)
money_total_a = sum(money)

print(
    f"💸 El dinero total conseguido en rondas de financiación {ronda.upper()} = {money_total_a}"
)

💸 El dinero total conseguido en rondas de financiación B = 4600210000


💸💸💸💸

In [161]:
money_total_a

4376015000

### Más información y fuentes:
* https://stackabuse.com/coroutines-in-python/
* https://realpython.com/introduction-to-python-generators/

Ricardo Ander-Egg Aguilar

* 🖥: https://ricardoanderegg.com/
* 🐦: https://twitter.com/ricardoanderegg
* 👨🏻‍🎓: https://www.linkedin.com/in/ricardoanderegg/