In [1]:
import warnings # type: ignore
warnings.filterwarnings('ignore') # type: ignore
from bs4 import BeautifulSoup # type: ignore
import pandas as pd # type: ignore
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeout # type: ignore
import time # type: ignore

GAME = 'Red_and_Blue'
STARTER = 'bulbasaur'
AVAILABLE_POKEMON = []

async def get_html(url, selector, sleep=5, retries=3):
    html = None
    for i in range(1, retries + 1):
        time.sleep(sleep * i)
        try:
            async with async_playwright() as p:
                browser = await p.webkit.launch()
                page = await browser.new_page()
                await page.goto(url)
                print(await page.title())
                html = await page.inner_html(selector)
        except PlaywrightTimeout:
            print(f"Timeout error on {url}")
            continue
        else:
            break
    return html

def get_hrefs(html):
    soup = BeautifulSoup(html, 'html.parser')
    info_cards = soup.find_all('div', class_='infocard')
    href_list = []
    for card in info_cards:
        link = card.find('a', class_='ent-name')
        if link and link.has_attr('href'):
            href_list.append(link['href'])
    return href_list

html = await get_html(f"https://bulbapedia.bulbagarden.net/wiki/Appendix:{GAME}_walkthrough", '#bodyContent')

Appendix:Red and Blue walkthrough - Bulbapedia, the community-driven Pokémon encyclopedia


In [2]:
soup = BeautifulSoup(html)
parts = soup.find('table', {'class':'roundy'})
rows = parts.find_all('tr')
hrefs = []
for row in rows[1:]:
    part = row.find_all('th')[0]
    try:
        link = part.find('a')
        href = link.get('href')
        hrefs.append(href)
    except:
        continue

In [3]:
for href in hrefs[:5]:
    html = await get_html(f"https://bulbapedia.bulbagarden.net{href}", '.mw-parser-output')
    soup = BeautifulSoup(html)
    pokemon_tables = pd.read_html(str(soup), match='A colored background')
    for table in pokemon_tables:
        if len(table) > 0:
            for pokemon in table['Pokémon']:
                if str(pokemon) != 'nan' and not str(pokemon).startswith('A colored background'):
                    if str(pokemon) == 'First partner Pokémon':
                        pokemon = STARTER
                    AVAILABLE_POKEMON.append(pokemon)

Appendix:Red and Blue walkthrough/Section 1 - Bulbapedia, the community-driven Pokémon encyclopedia
Appendix:Red and Blue walkthrough/Section 2 - Bulbapedia, the community-driven Pokémon encyclopedia
Appendix:Red and Blue walkthrough/Section 3 - Bulbapedia, the community-driven Pokémon encyclopedia
Appendix:Red and Blue walkthrough/Section 4 - Bulbapedia, the community-driven Pokémon encyclopedia
Appendix:Red and Blue walkthrough/Section 5 - Bulbapedia, the community-driven Pokémon encyclopedia


In [4]:
set(AVAILABLE_POKEMON)

{'Abra',
 'Bellsprout',
 'Bulbasaur',
 'Caterpie',
 'Clefairy',
 'Ekans',
 'Geodude',
 'Gift Pokémon',
 'Jigglypuff',
 'Kakuna',
 'Mankey',
 'Meowth',
 'Metapod',
 'Nidoran♀',
 'Nidoran♂',
 'Oddish',
 'Paras',
 'Pidgey',
 'Pikachu',
 'Rattata',
 'Sandshrew',
 'Spearow',
 'Special Pokémon',
 'Weedle',
 'Zubat'}

In [5]:
html = await get_html('https://pokemondb.net/pokedex/stats/gen1', '#main')
soup = BeautifulSoup(html)

Generation 1 new Pokémon stats | Pokémon Database


In [6]:
stats_table = pd.read_html(str(soup), attrs={'id':'pokedex'})[0]

In [12]:
stats_table = stats_table.sort_values(by='Total', ascending=False, ignore_index=True)

In [13]:
stats_table

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,150,Mewtwo,Psychic,680,106,110,90,154,90,130
1,151,Mew,Psychic,600,100,100,100,100,100,100
2,149,Dragonite,Dragon Flying,600,91,134,95,100,100,80
3,144,Articuno,Ice Flying,580,90,85,100,95,125,85
4,146,Moltres,Fire Flying,580,90,100,90,125,85,90
...,...,...,...,...,...,...,...,...,...,...
146,14,Kakuna,Bug Poison,205,45,25,50,25,25,35
147,11,Metapod,Bug,205,50,20,55,25,25,30
148,129,Magikarp,Water,200,20,10,55,15,20,80
149,10,Caterpie,Bug,195,45,30,35,20,20,45


In [14]:
TEAM = []

In [15]:
for index, row in stats_table.iterrows():
    if row['Name'] in AVAILABLE_POKEMON:
        TEAM.append(row['Name'])

In [16]:
TEAM

['Clefairy',
 'Pikachu',
 'Oddish',
 'Bulbasaur',
 'Abra',
 'Mankey',
 'Sandshrew',
 'Geodude',
 'Bellsprout',
 'Meowth',
 'Ekans',
 'Paras',
 'Nidoran♀',
 'Nidoran♂',
 'Jigglypuff',
 'Spearow',
 'Rattata',
 'Pidgey',
 'Zubat',
 'Kakuna',
 'Metapod',
 'Caterpie',
 'Weedle']