In [2]:
import os
import json
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import traceback

In [3]:
# Function to check if JSON file exists, if not create it
def check_create_json(json_path):
    # If the file doesn't exist, create an empty JSON file
    if not os.path.exists(json_path):
        with open(json_path, 'w') as file:
            json.dump({}, file, indent=4)  # Create an empty dictionary in JSON format
        return {}  # Return an empty dictionary
    else:
        # If the file exists, load and return the existing data
        with open(json_path, 'r') as file:
            return json.load(file)

In [4]:
# Helper function to extract type
def extract_type(type_string):
    return type_string.split(" · ")

In [5]:
def parse_evolution_chart(evo_chart):
    result = {}
    temp = None  # Temporary storage for the current Pokémon
    
    for data in evo_chart:
        if data.startswith("#"):  # New Pokémon entry
            # Save the previous Pokémon if exists
            if temp:
                result[temp["id"]] = {
                    "name": temp["name"],
                    "type": temp["type"],
                    "evolves_to": temp["evolves_to"]
                }
            
            # Parse the new Pokémon
            details = data.split("\n")
            pokemon_id = details[0].strip("#")
            name = details[1]
            types = extract_type(details[2])
            
            temp = {
                "id": pokemon_id,
                "name": name,
                "type": types,
                "evolves_to": []
            }
        
        elif "Level" in data or "use" in data or "Friendship" in data:
            # Evolution condition detected
            if temp:
                # Get the next Pokémon data if available
                next_index = evo_chart.index(data) + 1
                if next_index < len(evo_chart) and evo_chart[next_index].startswith("#"):
                    next_evo_data = evo_chart[next_index].split("\n")
                    next_evo = {
                        "id": next_evo_data[0].strip("#"),
                        "name": next_evo_data[1],
                        "type": extract_type(next_evo_data[2]),
                        "condition": data.strip("()")  # Remove parentheses
                    }
                    
                    # Append the evolution data
                    temp["evolves_to"].append(next_evo)
                else:
                    print(f"Warning: Missing evolution data after condition '{data}'")
    
    # Add the last Pokémon to the result
    if temp:
        result[temp["id"]] = {
            "name": temp["name"],
            "type": temp["type"],
            "evolves_to": temp["evolves_to"]
        }
    
    return result


In [6]:
def scrape_pokemon_data_to_json(driver,url: str, json_path: str = './pokemon_data.json'):
    try:
        driver.get(url)
    except:
        pass

    # Load existing data or create a new dictionary if the file doesn't exist
    pokemon_data = check_create_json(json_path)

    # Pokemon Name
    pokemon_name = driver.find_element(By.XPATH, '//*[@id="main"]/h1').text

    # Description
    pokemon_description_list = driver.find_elements(By.XPATH, '//*[@id="main"]/p')
    description = "\n".join([element.text for element in pokemon_description_list])

    # Info
    pokemon_info = [e.text for e in driver.find_element(By.CLASS_NAME, 'vitals-table').find_elements(By.TAG_NAME, 'td')]
    pokemon_id = pokemon_info[0]
    pokemon_type = pokemon_info[1].split()
    pokemon_species = pokemon_info[2]
    height_m = pokemon_info[3].split()[0]
    weight_kg = pokemon_info[4].split()[0]

    # Evolution Chart
    try:
        evo = driver.find_element(By.CLASS_NAME, 'infocard-list-evo')
        evo_chart = [e.text for e in evo.find_elements(By.TAG_NAME, 'span') if e.text.strip() != ""]
        evo_dict = parse_evolution_chart(evo_chart)
    except NoSuchElementException:
        print(f"No evolution chart found for {pokemon_name} (ID: {pokemon_id})")
        evo_dict = {}

    # Effectiveness
    defend = driver.find_elements(By.CLASS_NAME, 'type-table')
    def_effective_dict = {}
    for col in defend:
        for i in range(len(col.find_elements(By.TAG_NAME, 'tr')[0].find_elements(By.TAG_NAME, 'th'))):
            p_type = col.find_elements(By.TAG_NAME, 'tr')[0].find_elements(By.TAG_NAME, 'th')[i].text
            def_effective = col.find_elements(By.TAG_NAME, 'tr')[1].find_elements(By.TAG_NAME, 'td')[i].text
            if def_effective == '¼':
                def_effective = 1/4
            elif def_effective == '½':
                def_effective = 1/2
            elif def_effective == '⅛':
                def_effective = 1/8
            elif def_effective == '':
                def_effective = 1
            def_effective_dict[p_type] = float(def_effective)

    # Add or update Pokémon information in the existing dictionary
    pokemon_data[pokemon_id] = {
        "name": pokemon_name,
        "description": description,
        "species": pokemon_species,
        "type": pokemon_type,
        "height_m": height_m,
        "weight_kg": weight_kg,
        "evolution": evo_dict,
        "effectiveness": def_effective_dict
    }

    # Save the updated data to the JSON file after each Pokémon is scraped
    with open(json_path, 'w') as json_file:
        json.dump(pokemon_data, json_file, indent=4)

    print(f"{pokemon_name} Successful")

    

In [7]:
driver = None  # Initialize driver variable
try:
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Optional: Run in headless mode
    driver = webdriver.Chrome(options=chrome_options)
    print("Driver start.")
    driver.set_page_load_timeout(5)
    for i in range(60,152):
        url = f'https://pokemondb.net/pokedex/{i:04}'
        scrape_pokemon_data_to_json(driver, url)
        
except Exception as e:
    print(f"An error occurred: {e}")
    traceback.print_exc()
finally:
    # Close the driver if it exists
    if driver:
        driver.quit()
        print("Driver quit.")
    

Driver start.
#0062
Poliwrath
Water · Fighting
(trade holding Kings Rock)
#0186
Politoed
Water'
Poliwag Successful
#0062
Poliwrath
Water · Fighting
(trade holding Kings Rock)
#0186
Politoed
Water'
Poliwhirl Successful
#0062
Poliwrath
Water · Fighting
(trade holding Kings Rock)
#0186
Politoed
Water'
Poliwrath Successful
Abra Successful
Kadabra Successful
Alakazam Successful
Machop Successful
Machoke Successful
Machamp Successful
Bellsprout Successful
Weepinbell Successful
Victreebel Successful
Tentacool Successful
Tentacruel Successful
Geodude Successful
Graveler Successful
Golem Successful
Ponyta Successful
Rapidash Successful
#0080
Slowbro
Water · Psychic
(trade holding Kings Rock)
#0199
Slowking
Water · Psychic'
Slowpoke Successful
#0080
Slowbro
Water · Psychic
(trade holding Kings Rock)
#0199
Slowking
Water · Psychic'
Slowbro Successful
Magnemite Successful
Magneton Successful
Farfetch'd Successful
Doduo Successful
Dodrio Successful
Seel Successful
Dewgong Successful
Grimer Successf

Traceback (most recent call last):
  File "/home/tanatorn/miniconda3/envs/pokedex/lib/python3.12/site-packages/urllib3/connectionpool.py", line 536, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/home/tanatorn/miniconda3/envs/pokedex/lib/python3.12/site-packages/urllib3/connection.py", line 507, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/home/tanatorn/miniconda3/envs/pokedex/lib/python3.12/http/client.py", line 1428, in getresponse
    response.begin()
  File "/home/tanatorn/miniconda3/envs/pokedex/lib/python3.12/http/client.py", line 331, in begin
    version, status, reason = self._read_status()
                              ^^^^^^^^^^^^^^^^^^^
  File "/home/tanatorn/miniconda3/envs/pokedex/lib/python3.12/http/client.py", line 292, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/t

KeyboardInterrupt: 