In [1]:
import string
import requests
from bs4 import BeautifulSoup
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed

# Function to get words starting with a given letter
def get_words_starting_with(letter: str) -> list:
    try:
        result = requests.get(f"https://www.teanglann.ie/en/fgb/_{letter}")
        result.raise_for_status()
    except requests.exceptions.HTTPError as e:
        print(f"Failed to fetch words for letter '{letter}': {e}")
        return []
    soup = BeautifulSoup(result.content, "html.parser")
    samples = soup.find_all("span", class_="abcItem")
    words = [(sample.a.text, f"https://www.teanglann.ie/en/fgb/{sample.a.text}") for sample in samples]
    return words

# Function to get all words from Teanglann.ie
def get_all_words() -> list:
    all_words = []
    letters = string.ascii_lowercase
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(get_words_starting_with, letter): letter for letter in letters}
        for future in as_completed(futures):
            try:
                words = future.result()
                all_words.extend(words)
            except Exception as e:
                print(f"Error fetching words: {e}")
    return all_words

# Save data to CSV
def save_data_to_csv(data: list, filename: str):
    df = pd.DataFrame(data, columns=['Word', 'Link'])
    df.to_csv(filename, index=False)

# Main script
if __name__ == "__main__":
    all_words = get_all_words()
    save_data_to_csv(all_words, 'teanglann_words.csv')