In [1]:
import warnings # type: ignore
warnings.filterwarnings('ignore') # type: ignore
from bs4 import BeautifulSoup # type: ignore
import pandas as pd # type: ignore
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeout # type: ignore
import time # type: ignore

GAME = 'Red_and_Blue'
AVAILABLE_POKEMON = []
# LOCATIONS = pd.DataFrame()

async def get_html(url, selector, sleep=5, retries=3):
    html = None
    for i in range(1, retries + 1):
        time.sleep(sleep * i)
        try:
            async with async_playwright() as p:
                browser = await p.webkit.launch()
                page = await browser.new_page()
                await page.goto(url)
                print(await page.title())
                html = await page.inner_html(selector)
        except PlaywrightTimeout:
            print(f"Timeout error on {url}")
            continue
        else:
            break
    return html

def get_hrefs(html):
    soup = BeautifulSoup(html, 'html.parser')
    info_cards = soup.find_all('div', class_='infocard')
    href_list = []
    
    for card in info_cards:
        link = card.find('a', class_='ent-name')
        if link and link.has_attr('href'):
            href_list.append(link['href'])
    
    return href_list

html = await get_html(f"https://bulbapedia.bulbagarden.net/wiki/Appendix:{GAME}_walkthrough", '#bodyContent')

Appendix:Red and Blue walkthrough - Bulbapedia, the community-driven Pokémon encyclopedia


In [2]:
soup = BeautifulSoup(html)
parts = soup.find('table', {'class':'roundy'})
rows = parts.find_all('tr')
hrefs = []
for row in rows[1:]:
    part = row.find_all('th')[0]
    try:
        link = part.find('a')
        href = link.get('href')
        hrefs.append(href)
    except:
        continue

In [3]:
for href in hrefs[:3]:
    html = await get_html(f"https://bulbapedia.bulbagarden.net{href}", '.mw-parser-output')
    soup = BeautifulSoup(html)
    pokemon_tables = pd.read_html(str(soup), match='A colored background')
    for table in pokemon_tables:
        if len(table) > 0:
            for pokemon in table['Pokémon']:
                if str(pokemon) != 'nan' and not str(pokemon).startswith('A colored background'):
                    if str(pokemon) == 'First partner Pokémon':
                        pokemon = 'Bulbasaur'
                    AVAILABLE_POKEMON.append(pokemon)

Appendix:Red and Blue walkthrough/Section 1 - Bulbapedia, the community-driven Pokémon encyclopedia
Appendix:Red and Blue walkthrough/Section 2 - Bulbapedia, the community-driven Pokémon encyclopedia
Appendix:Red and Blue walkthrough/Section 3 - Bulbapedia, the community-driven Pokémon encyclopedia


In [4]:
set(AVAILABLE_POKEMON)

{'Caterpie',
 'First partner Pokémon',
 'Kakuna',
 'Metapod',
 'Nidoran♀',
 'Nidoran♂',
 'Pidgey',
 'Pikachu',
 'Rattata',
 'Spearow',
 'Weedle'}

In [5]:
html = await get_html('https://pokemondb.net/pokedex/stats/gen1', '#main')
soup = BeautifulSoup(html)

Generation 1 new Pokémon stats | Pokémon Database


In [6]:
stats_table = pd.read_html(str(soup), attrs={'id':'pokedex'})[0]

In [9]:
stats_table[stats_table['Type'].str.contains('Ground', na=False)]

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
26,27,Sandshrew,Ground,300,50,75,85,20,30,40
27,28,Sandslash,Ground,450,75,100,110,45,55,65
30,31,Nidoqueen,Poison Ground,505,90,92,87,75,85,76
33,34,Nidoking,Poison Ground,505,81,102,77,85,75,85
49,50,Diglett,Ground,265,10,55,25,35,45,95
50,51,Dugtrio,Ground,425,35,100,50,50,70,120
73,74,Geodude,Rock Ground,300,40,80,100,30,30,20
74,75,Graveler,Rock Ground,390,55,95,115,45,45,35
75,76,Golem,Rock Ground,495,80,120,130,55,65,45
94,95,Onix,Rock Ground,385,35,45,160,30,45,70
