In [None]:
import warnings # type: ignore
warnings.filterwarnings('ignore') # type: ignore
from bs4 import BeautifulSoup # type: ignore
import pandas as pd # type: ignore
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeout # type: ignore
import time # type: ignore

GAME = 'Red_and_Blue'
STARTER = 'Bulbasaur'
AVAILABLE_POKEMON = []

async def get_html(url, selector, sleep=5, retries=3):
    html = None
    for i in range(1, retries + 1):
        time.sleep(sleep * i)
        try:
            async with async_playwright() as p:
                browser = await p.webkit.launch()
                page = await browser.new_page()
                await page.goto(url)
                print(await page.title())
                html = await page.inner_html(selector)
        except PlaywrightTimeout:
            print(f"Timeout error on {url}")
            continue
        else:
            break
    return html

def get_hrefs(html):
    soup = BeautifulSoup(html, 'html.parser')
    info_cards = soup.find_all('div', class_='infocard')
    href_list = []
    for card in info_cards:
        link = card.find('a', class_='ent-name')
        if link and link.has_attr('href'):
            href_list.append(link['href'])
    return href_list

html = await get_html(f"https://bulbapedia.bulbagarden.net/wiki/Appendix:{GAME}_walkthrough", '#bodyContent')

In [2]:
soup = BeautifulSoup(html)
parts = soup.find('table', {'class':'roundy'})
rows = parts.find_all('tr')
hrefs = []
for row in rows[1:]:
    part = row.find_all('th')[0]
    try:
        link = part.find('a')
        href = link.get('href')
        hrefs.append(href)
    except:
        continue

In [None]:
for href in hrefs[:5]:
    html = await get_html(f"https://bulbapedia.bulbagarden.net{href}", '.mw-parser-output')
    soup = BeautifulSoup(html)
    pokemon_tables = pd.read_html(str(soup), match='A colored background')
    for table in pokemon_tables:
        if len(table) > 0:
            for pokemon in table['Pokémon']:
                if str(pokemon) != 'nan' and not str(pokemon).startswith('A colored background'):
                    if str(pokemon) == 'First partner Pokémon':
                        pokemon = STARTER
                    if 'Pokémon' not in str(pokemon):
                        AVAILABLE_POKEMON.append(pokemon)

In [None]:
set(AVAILABLE_POKEMON)

In [None]:
html = await get_html('https://pokemondb.net/pokedex/stats/gen1', '#main')
soup = BeautifulSoup(html)

In [6]:
stats_table = pd.read_html(str(soup), attrs={'id':'pokedex'})[0]

In [7]:
stats_table = stats_table.sort_values(by='Total', ascending=False, ignore_index=True)

In [None]:
stats_table

In [9]:
TEAM = []

In [10]:
for index, row in stats_table.iterrows():
    if row['Name'] in AVAILABLE_POKEMON:
        TEAM.append(row['Name'])

In [None]:
TEAM

In [None]:
len(set(AVAILABLE_POKEMON))

In [None]:
len(TEAM)

In [None]:
set(AVAILABLE_POKEMON) - set(TEAM)

In [26]:
table = soup.find('table', {'id':'pokedex'}).find('tbody')
rows = table.find_all('tr')
name_hrefs = []
for row in rows:
    name_hrefs.append(row.find('a', {'class':'ent-name'}).get('href'))

In [27]:
name_hrefs

['/pokedex/bulbasaur',
 '/pokedex/ivysaur',
 '/pokedex/venusaur',
 '/pokedex/charmander',
 '/pokedex/charmeleon',
 '/pokedex/charizard',
 '/pokedex/squirtle',
 '/pokedex/wartortle',
 '/pokedex/blastoise',
 '/pokedex/caterpie',
 '/pokedex/metapod',
 '/pokedex/butterfree',
 '/pokedex/weedle',
 '/pokedex/kakuna',
 '/pokedex/beedrill',
 '/pokedex/pidgey',
 '/pokedex/pidgeotto',
 '/pokedex/pidgeot',
 '/pokedex/rattata',
 '/pokedex/raticate',
 '/pokedex/spearow',
 '/pokedex/fearow',
 '/pokedex/ekans',
 '/pokedex/arbok',
 '/pokedex/pikachu',
 '/pokedex/raichu',
 '/pokedex/sandshrew',
 '/pokedex/sandslash',
 '/pokedex/nidoran-f',
 '/pokedex/nidorina',
 '/pokedex/nidoqueen',
 '/pokedex/nidoran-m',
 '/pokedex/nidorino',
 '/pokedex/nidoking',
 '/pokedex/clefairy',
 '/pokedex/clefable',
 '/pokedex/vulpix',
 '/pokedex/ninetales',
 '/pokedex/jigglypuff',
 '/pokedex/wigglytuff',
 '/pokedex/zubat',
 '/pokedex/golbat',
 '/pokedex/oddish',
 '/pokedex/gloom',
 '/pokedex/vileplume',
 '/pokedex/paras',
 '/