In [None]:
import warnings # type: ignore
warnings.filterwarnings('ignore') # type: ignore
from bs4 import BeautifulSoup # type: ignore
import pandas as pd # type: ignore
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeout # type: ignore
import time # type: ignore

GAME = 'Red_and_Blue'
STARTER = 'Bulbasaur'
AVAILABLE_POKEMON = []

async def get_html(url, selector, sleep=5, retries=3):
    html = None
    for i in range(1, retries + 1):
        time.sleep(sleep * i)
        try:
            async with async_playwright() as p:
                browser = await p.webkit.launch()
                page = await browser.new_page()
                await page.goto(url)
                print(await page.title())
                html = await page.inner_html(selector)
        except PlaywrightTimeout:
            print(f"Timeout error on {url}")
            continue
        else:
            break
    return html

html = await get_html(f"https://bulbapedia.bulbagarden.net/wiki/Appendix:{GAME}_walkthrough", '#bodyContent')
soup = BeautifulSoup(html)
parts = soup.find('table', {'class':'roundy'})
rows = parts.find_all('tr')
hrefs = []
for row in rows[1:]:
    part = row.find_all('th')[0]
    try:
        link = part.find('a')
        href = link.get('href')
        hrefs.append(href)
    except:
        continue

for href in hrefs[:10]:
    html = await get_html(f"https://bulbapedia.bulbagarden.net{href}", '.mw-parser-output')
    soup = BeautifulSoup(html)
    pokemon_tables = pd.read_html(str(soup), match='A colored background')
    for table in pokemon_tables:
        if len(table) > 0:
            for pokemon in table['Pokémon']:
                if str(pokemon) != 'nan' and not str(pokemon).startswith('A colored background'):
                    if str(pokemon) == 'First partner Pokémon':
                        pokemon = STARTER
                    if 'Pokémon' not in str(pokemon):
                        AVAILABLE_POKEMON.append(pokemon)

html = await get_html('https://pokemondb.net/pokedex/stats/gen1', '#main')
soup = BeautifulSoup(html)
stats_table = pd.read_html(str(soup), attrs={'id':'pokedex'})[0].sort_values(by='Total', ascending=False, ignore_index=True)

TEAM = []
for index, row in stats_table.iterrows():
    if row['Name'] in AVAILABLE_POKEMON:
        TEAM.append(row['Name'])
TEAM = TEAM[:6]
TEAM

In [None]:
html = await get_html('https://pokemondb.net/evolution#evo-g1', '#main')
soup = BeautifulSoup(html)
rows = soup.find_all('div', {'class':'infocard-filter-block'})
EVO_CHART = {}
for row in rows:
    names = row.find_all('a', {'class':'ent-name'})
    base = names[0].text.strip()
    for i, name in enumerate(names):
        if i == 0:
            EVO_CHART[base] = []
        else:
            EVO_CHART[base].append(name.text.strip())

In [None]:
def get_base(evolved_name, pokemon_dict):
    for base, evolved_list in pokemon_dict.items():
        if evolved_name in evolved_list:
            return base
    return evolved_name

BASE_TEAM = []
for pokemon in TEAM:
    base_name = get_base(pokemon, EVO_CHART)
    BASE_TEAM.append(base_name)
BASE_TEAM