In [4]:
import chromedriver_binary
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import json
import time


In [5]:
class PokeScraper:
    def __init__(self) -> None:
        options = Options()
        options.add_argument("--headless")
        driver = webdriver.Chrome(options=options)
        self.driver = driver
        self.pokedexes = [
            "https://pokemondb.net/pokedex/game/red-blue-yellow",
            "https://pokemondb.net/pokedex/game/firered-leafgreen",
            "https://pokemondb.net/pokedex/game/gold-silver-crystal",
            "https://pokemondb.net/pokedex/game/heartgold-soulsilver",
            "https://pokemondb.net/pokedex/game/ruby-sapphire-emerald",
            "https://pokemondb.net/pokedex/game/diamond-pearl",
            "https://pokemondb.net/pokedex/game/platinum",
            "https://pokemondb.net/pokedex/game/black-white",
            "https://pokemondb.net/pokedex/game/black-white-2",
            "https://pokemondb.net/pokedex/game/x-y",
            "https://pokemondb.net/pokedex/game/omega-ruby-alpha-sapphire",
            "https://pokemondb.net/pokedex/game/sun-moon",
            "https://pokemondb.net/pokedex/game/ultra-sun-ultra-moon",
            "https://pokemondb.net/pokedex/game/sword-shield",
            "https://pokemondb.net/pokedex/game/brilliant-diamond-shining-pearl",
            "https://pokemondb.net/pokedex/game/scarlet-violet",
        ]
        self.national = {}

    def scrape_all_stats(self):
        res = []
        for i in range(1, 10):
            self.driver.get(f"https://pokemondb.net/pokedex/stats/gen{i}")
            self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
            print(f"starting gen {i}")
            stat_names = [
                x.text
                for x in self.driver.find_elements(
                    By.CSS_SELECTOR, "table#pokedex > thead > tr > th"
                )[-7:]
            ]

            for pokemon in self.driver.find_elements(
                By.CSS_SELECTOR, "table#pokedex > tbody > tr"
            ):
                sprite = pokemon.find_element(
                    By.CSS_SELECTOR, "img.icon-pkmn"
                ).get_attribute("src")
                name = pokemon.find_element(By.CSS_SELECTOR, "td.cell-name").text
                url = pokemon.find_element(
                    By.CSS_SELECTOR, "td.cell-name > a.ent-name"
                ).get_attribute("href")
                types = [
                    x.lower()
                    for x in pokemon.find_element(
                        By.CSS_SELECTOR, "td.cell-icon"
                    ).text.split("\n")
                ]
                total_stats = dict(
                    zip(
                        stat_names,
                        [
                            x.text
                            for x in pokemon.find_elements(By.CSS_SELECTOR, "td")[-7:]
                        ],
                    )
                )
                generation = self.national.get(name, ["no home"])

                res.append(
                    dict(
                        sprite=sprite,
                        name=name,
                        types=types,
                        total_stats=total_stats,
                        generation=generation,
                        url=url,
                    )
                )

        json.dump(res, open("pokestats.json", "w+"))

    def scrape_all_dex(self):
        for dex in self.pokedexes:
            gen = dex.split("/")[-1]
            self.driver.get(dex)
            print(f"starting {gen}")
            for pokemon in self.driver.find_elements(By.CSS_SELECTOR, "a.ent-name"):
                if pokemon.text not in self.national:
                    self.national[pokemon.text] = []
                self.national[pokemon.text].append(gen)


ps = PokeScraper()
ps.scrape_all_dex()
ps.scrape_all_stats()


starting red-blue-yellow
starting firered-leafgreen
starting gold-silver-crystal
starting heartgold-soulsilver
starting ruby-sapphire-emerald
starting diamond-pearl
starting platinum
starting black-white
starting black-white-2
starting x-y
starting omega-ruby-alpha-sapphire
starting sun-moon
starting ultra-sun-ultra-moon
starting sword-shield
starting brilliant-diamond-shining-pearl
starting scarlet-violet
starting gen 1
starting gen 2
starting gen 3
starting gen 4
starting gen 5
starting gen 6
starting gen 7
starting gen 8
starting gen 9
