In [4]:
import json
import requests
from dataclasses import dataclass
from selectolax.parser import HTMLParser



def get_html():
    """Returns an instance of HTMLParser with the HTML content of the page."""
    
    url = "https://www.wikidex.net/wiki/Lista_de_habilidades"
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
    }
    
    response = requests.get(url, headers=headers)
    html_parser = HTMLParser(response.text)
    return html_parser


def extract(element, css_selector, option='text'):
    """Extracts the text or attributes of an element selected by a CSS selector."""
    selected_element = element.css_first(css_selector)
    
    if selected_element is not None:
        if option == 'text':
            return selected_element.text(strip=True)
        elif option == 'attrs':
            return selected_element.attributes
        elif option == 'title':
            return selected_element.attributes[option]
        else:
            return selected_element


def save_json(data, filename):
    """Saves a dictionary as a JSON file."""
    
    with open(filename, 'w', encoding="UTF-8") as f:
        json.dump(data, f, indent=4, ensure_ascii=False)




PERMITTED_GENS = ['Tercera generación', 'Cuarta generación', 'Quinta generación', 'Sexta generación']

@dataclass(slots=True)
class Ability:
    spa: str
    eng: str
    gen: str
    

html = get_html()
rows = html.css(".tabpokemon tbody tr")[1:]

permitted_gens = ['Tercera generación', 'Cuarta generación', 'Quinta generación', 'Sexta generación']
translations = {}

for row in rows[1:]:
    columns = row.css("td")

    ability = Ability(
        spa = extract(columns[1], "a"),
        eng = extract(columns[1], "i"),
        gen = extract(columns[3], "a", 'title')
    )
    
    if ability.gen in permitted_gens:
        translations[ability.spa] = ability.eng
        

print(f"Done {len(translations)} translations.")
save_json(translations, "translations_abilities.json")

Done 191 translations.
