[View in Colaboratory](https://colab.research.google.com/github/rdenadai/jupyter-pool/blob/master/nomes.ipynb)

In [1]:
!apt-get install libxml2-dev libxslt-dev
!pip install aiomultiprocess aiohttp cchardet aiodns numba lxml

Reading package lists... Done
Building dependency tree       
Reading state information... Done
Note, selecting 'libxslt1-dev' instead of 'libxslt-dev'
libxslt1-dev is already the newest version (1.1.29-2.1ubuntu1).
libxml2-dev is already the newest version (2.9.4+dfsg1-4ubuntu1.2).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.


In [0]:
import asyncio
import functools
import concurrent.futures
from collections import namedtuple
from bs4 import BeautifulSoup as bs
import aiohttp
from aiomultiprocess import Pool
from numba import jit
import numpy as np
import pandas as pd

In [0]:
# Nome = namedtuple('Nome', ['href', 'nome'])

async def fetch(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            # print(url)
            return await resp.text()


async def start(urls):
    responses = []
    async with Pool() as pool:
        responses = await pool.map(fetch, urls)
    return responses


def salvar_nomes(filename, nomes):
    with open(filename, 'w') as h:
        for nome in nomes:
            h.write(f'{nome}\n')


def parse_names(response, name_size=7):
    data = []
    parser = bs(response, 'lxml')
    links = parser.find_all('a', class_='lista-nome')
    for link in links:
        # data.append(Nome(href=link['href'], nome=link.string))
        nome = link.string.strip()
        if len(nome) == name_size:
            data.append(nome)
    return '|'.join(data)


@jit(parallel=True)
def name_split(stringNomes):
    print('Realizando split dos nomes...')
    nomes = np.array([])
    for lre in stringNomes:
        nomes = np.concatenate((nomes, lre.split('|')), axis=None)
    yield filter(None, nomes.tolist())
    

def retorna_nomes(sexo='M', name_size=7, pages=150):
    nomes = []
    try:
        if sexo.upper() == 'M':
            urls = [f'https://www.dicionariodenomesproprios.com.br/nomes-masculinos/{i}' for i in range(1, pages)]
        elif sexo.upper() == 'F':
            urls = [f'https://www.dicionariodenomesproprios.com.br/nomes-femininos/{i}' for i in range(1, pages)]
        loop = asyncio.get_event_loop()
        responses = loop.run_until_complete(asyncio.gather(start(urls)))[0]
        print('Terminado o download das páginas...')
        with concurrent.futures.ProcessPoolExecutor(max_workers=4) as exc:
            print('Iniciando parser das páginas...')
            result = exc.map(functools.partial(parse_names, name_size=name_size), responses, chunksize=5)
            print('Extraindo nomes do resultado...')
            nomes = name_split(result)
    except KeyboardInterrupt:
        loop.stop()
    return sorted(list(nomes)[0])


def side_by_side(M, N):
    return [M[i:i+N] for i in range(0, len(M), N)]

In [78]:
N = 20
pages = 100
print('Executando rotina (sexo masculino)...')
M = retorna_nomes(sexo='M', pages=pages)
print('Executando rotina (sexo feminino)...')
F = retorna_nomes(sexo='F', pages=pages)

Executando rotina (sexo masculino)...
Terminado o download das páginas...
Iniciando parser das páginas...
Extraindo nomes do resultado...
Realizando split dos nomes...
Executando rotina (sexo feminino)...
Terminado o download das páginas...
Iniciando parser das páginas...
Extraindo nomes do resultado...
Realizando split dos nomes...


In [79]:
print('Apresentando resultados...')
print('Nomes Masculinos:')
df = pd.DataFrame(side_by_side(M, N), columns=['' for _ in range(N)])
df[df.isnull()] = ''
display(df)
print('\n')
print('Nomes Femininos:')
df = pd.DataFrame(side_by_side(F, N), columns=['' for _ in range(N)])
df[df.isnull()] = ''
display(df)
# print('Salvar arquivos...')
# print('Salvando nomes masculinos...')
# salvar_nomes('nomes_masculinos.txt', M)
# print('Salvando nomes femininos...')
# salvar_nomes('nomes_femininos.txt', F)

Apresentando resultados...
Nomes Masculinos:


Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
0,Absalom,Acelino,Adalton,Adelino,Adelson,Aderbal,Adonias,Adriano,Adílson,Afrânio,Agnaldo,Aguilar,Ajagunã,Alberto,Alcindo,Alcídes,Alencar,Alfonso,Alfredo,Allyson
1,Almeida,Aloísio,Aluísio,Amâncio,Amândio,Américo,Amílcar,Ananias,Andrade,Aniceto,Anselmo,Antenor,Anthony,Antoine,Antunes,Antônio,Aprígio,Aquiles,Arantes,Araquém
2,Araruna,Aristeu,Arlindo,Armando,Armindo,Armênio,Arnaldo,Arsênio,Artêmio,Asenate,Assuero,Augusto,Aurélio,Azarias,Azevedo,Balbino,Barnabé,Baruque,Basílio,Batista
3,Belmiro,Ben-Hur,Benigno,Benício,Birigui,Bismark,Bolívar,Brandon,Brandão,Brendon,Caetano,Calisto,Calixto,Cardoso,Carlito,Carmelo,Castiel,Cecílio,Chantal,Charles
4,Charlie,Chester,Cirineu,Ciríaco,Clayton,Cleiton,Cleyton,Cleófas,Clinton,Cláudio,Coimbra,Colombo,Conrado,Corinto,Crispim,Cândido,Damásio,Delfino,Delmiro,Deodato
5,Deodoro,Dickson,Dimitre,Dimitri,Dominic,Domício,Donaldo,Dorival,Douglas,Edilson,Edmundo,Ednaldo,Eduardo,Edvaldo,Eleazar,Eliézer,Elpídio,Emanuel,Emerson,Enrique
6,Ernesto,Escobar,Estevam,Estevan,Esteves,Estevão,Estácio,Estênio,Etienne,Eugênio,Eusébio,Euzébio,Evandro,Everson,Everton,Fabiano,Feitosa,Felício,Fidélis,Filippo
7,Filêmon,Firmino,Fonseca,Freitas,Galdino,Galileu,Genival,Genésio,Geovane,Geraldo,Gerardo,Germano,Getúlio,Giacomo,Glauber,Gleison,Gláucio,Gonçalo,Goulart,Gregory
8,Guaraci,Guarani,Gênesis,Hadrian,Harahel,Harriet,Havilah,Heather,Henrico,Henrike,Herbert,Hermano,Herodes,Hilbert,Hilário,Hiroshi,Honório,Horácio,Ibrahim,Isidoro
9,Jacinto,Jackson,Jacques,Jailson,Jailton,Jeffrey,Jesuíno,Joaquim,Jocelin,Jocelyn,Joelson,Johnson,Jordano,Josemar,Juliano,Justino,Juvenal,Jônatas,Júpiter,Kaluanã




Nomes Femininos:


Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20
0,Aaliyah,Aaminah,Abayomi,Abigail,Adelina,Adeline,Adelita,Adriana,Adriane,Adriele,Alberta,Alcione,Alegria,Alessia,Alethea,Alethia,Alisson,Allegra,Amapola,Amábile
1,América,Anabela,Analisa,Andresa,Andreza,Andréia,Anelise,Anninka,Anselma,Antônia,Arabela,Araceli,Aretusa,Ariadne,Arianna,Arianne,Arienne,Aritana,Arlinda,Arminda
2,Ashiley,Assunta,Augusta,Aurélia,Açucena,Balbina,Barbosa,Bartira,Belfort,Belinda,Belmira,Bendita,Benilda,Benícia,Bethany,Bettina,Betânia,Beverly,Bibiana,Blanche
3,Bridget,Brigite,Brunnah,Brígida,Bárbara,Cacilda,Caitlin,Calista,Calíope,Cambuci,Cameron,Camille,Camilly,Camocim,Camélia,Carlota,Carmela,Cassidy,Catrina,Cecília
4,Celeste,Chandra,Chelsea,Clarice,Clarita,Cleanto,Cláudia,Corália,Cristal,Cynthia,Cândida,Dakarai,Dalvina,Damiana,Dandara,Daniela,Daniele,Danúbia,Darlene,Deborah
5,Dejanir,Delfina,Demeter,Destiny,Diadema,Djamila,Dolores,Dominga,Dorothy,Drielle,Dâmaris,Désirée,Edeline,Edilene,Edinéia,Ednalva,Ednilda,Eduarda,Edwiges,Eleanor
6,Electra,Elenice,Eleonor,Elisama,Elizete,Emeline,Enedina,Eudóxia,Eugênia,Eulália,Eurides,Evelina,Eveline,Evelise,Evellyn,Eyshila,Fabiana,Fabiane,Fabíola,Felícia
7,Frances,Francis,Gadelha,Geisila,Genilda,Genésia,Geovana,Geralda,Gerlane,Germana,Geórgia,Gilmara,Giovana,Giselda,Giselle,Giullia,Giórgia,Glaucia,Guiomar,Géssica
8,Hadassa,Heloise,Heloísa,Hideaki,Hélcias,Iandara,Idalina,Iemanjá,Ingride,Iolanda,Ipanema,Iracema,Isabela,Isabele,Isadora,Isidora,Ismênia,Isolina,Ivanete,Ivanice
9,Ivonete,Izabele,Izolina,Jaciara,Jacimar,Jacinta,Jamilly,Janaina,Jandaia,Jandira,Jasmine,Jeanine,Jenifer,Jesebel,Jesuína,Jezabel,Joannes,Jocasta,Jordana,Joseane
