In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

import logging
import json
import os

# Set up the web driver options and service
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-blink-features=AutomationControlled')
service = Service(ChromeDriverManager().install())

# Setting up logging of information
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Define the base URL and the location to save the JSON
url = 'https://www.wikidex.net/wiki/Lista_de_habilidades'
fileName = 'i18n_abilities.json'

# All generations we want to scrap the code and exceptions of items
gens = ['Tercera generación', 'Cuarta generación', 'Quinta generación', 'Sexta generación']
not_include = ['Cacofonía']

def save_json(data, filename):
    with open(filename, 'w', encoding="UTF-8") as f:
        json.dump(data, f, indent=4, ensure_ascii=False)

In [None]:
with webdriver.Chrome(service=service, options=options) as driver:
    logger.info("Extracting abilities translations..")
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    table = soup.select_one("#tm-globalid-0")
    
    items = table.select('tbody tr')
    
    translations = {}
    for row in items:
        columns = row.select('td')
        
        spa = columns[1].select_one('a').text
        eng = columns[1].select_one('i').text
        gen = columns[3].select_one('a').get('title')
        
        if spa in not_include:
            continue
        
        if gen in gens:
            translations[spa] = eng
            logging.info(f'({gen}): {spa} --> {eng}')
            
    save_json(translations, fileName)
    logger.info(f"Saved all {len(translations)} translations into data.json")
logger.info("Saved all. Exiting program")