In [None]:
import time
import csv
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC

# URL of the dictionary page
base_url = "https://www.ines.gov.br/dicionario-de-libras/"

# Setting up Chrome driver options
options = webdriver.ChromeOptions()
options.add_experimental_option("detach", True)  # Keeps the browser open after script ends
driver = webdriver.Chrome(options=options)
driver.get(base_url)  # Open the dictionary website

# WebDriver wait object (max 15 seconds for elements to appear)
wait = WebDriverWait(driver, 15)

# CSV file to store the metadata
csv_filename = "INES_Metadata.csv"
file_exists = os.path.isfile(csv_filename)  # Check if file already exists

# Open CSV file (append mode so we don't overwrite previous data)
with open(csv_filename, "a", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)

    # If file is new, write the header
    if not file_exists:
        writer.writerow(["Letter", "Word", "Video URL", "Image URL"])

    # Find all letters on the page (A-Z)
    letters = driver.find_elements(By.CSS_SELECTOR, "ul.list-inline li a")

    for letter in letters:
        try:
            # Highlight the letter so the user knows where to click
            driver.execute_script("arguments[0].style.border='3px solid red'", letter)
            print(f"Highlighted letter: {letter.text}")

            # Ask the user to manually click the letter
            input(f"Click '{letter.text}' in the browser, then press Enter here to continue...")

            # Wait a few seconds for the word list to load
            time.sleep(5)

            # Find the dropdown that contains the word list
            select_element = wait.until(EC.presence_of_element_located((By.ID, "input-palavras")))

            # Extract words from the dropdown list
            select = Select(select_element)
            words = [option.text.strip() for option in select.options if option.text.strip()]  # Remove empty options

            for word in words:
                try:
                    # Select a word from the dropdown
                    select.select_by_visible_text(word)
                    time.sleep(3)  # Give time for video & image to load

                    # Get video URL (should be inside a <source> tag within <video>)
                    video_element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#input-video video source[type='video/mp4']")))
                    video_url = video_element.get_attribute("src")

                    # Get image URL (if available)
                    try:
                        image_element = driver.find_element(By.CSS_SELECTOR, "#input-imagem img")
                        image_url = image_element.get_attribute("src")
                    except:
                        image_url = "No Image Available"  # Default if no image is found

                    # Write data to CSV
                    writer.writerow([letter.text, word, video_url, image_url])
                    file.flush()  # Save progress immediately
                    print(f"Saved: {word} | Video: {video_url} | Image: {image_url}")

                except Exception as e:
                    print(f"Error processing word '{word}': {e}")  # Print error if something fails

        except Exception as e:
            print(f"Error processing letter '{letter.text}': {e}")  # Print error if letter fails

print(f"Data saved in {csv_filename}")  # Let the user know the file is saved

# Keep the browser open for verification
input("Press Enter to close the browser...")

# Close browser when done
driver.quit()
