In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import warnings
from urllib.parse import urljoin
warnings.filterwarnings("ignore")

In [76]:
def fetch_clues(words):
    for word in words:
        print(f"\nWord: {word}\n" + "=" * (6 + len(word)))

        # Base URL structure for each word
        base_url = f"https://www.anagrammer.com/crossword/answer/{word}/1/1/1/1/"

        # Fetch the max page number
        try:
            response = requests.get(f"{base_url}1")
            response.raise_for_status()
            soup = BeautifulSoup(response.content, 'html.parser')

            pagination = soup.select(".pagination")[1]
            page_numbers = [int(link.get_text(strip=True)) for link in pagination.find_all("a") if link.get_text(strip=True).isdigit()]
            max_page = max(page_numbers) + 1 if page_numbers else 1
        except Exception as e:
            print(f"Error fetching pagination for '{word}': {e}")
            continue

        # Loop through each page for the word
        for page_num in range(1, max_page):
            url = f"{base_url}{page_num}"
            
            try:
                response = requests.get(url)
                response.raise_for_status()
                soup = BeautifulSoup(response.content, 'html.parser')

                # Locate the table containing clues
                header = soup.find("th", string="Last Seen in these Crosswords & Puzzles")
                if not header:
                    continue
                table = header.find_parent("table")

                # Extract relevant clues from the table
                for a_tag in table.find_all("a", href=True):
                    clue_sources = ["The-Times---Cryptic", "The-Telegraph---Cryptic", "The-Telegraph---Toughie", "The-Guardian---Cryptic-crossword"]
                    if any(source in a_tag["href"] for source in clue_sources):
                        full_url = urljoin(base_url, a_tag["href"])

                        # Fetch and parse each clue page
                        try:
                            clue_response = requests.get(full_url)
                            clue_response.raise_for_status()
                            clue_soup = BeautifulSoup(clue_response.content, 'html.parser')

                            # Find and print clues relevant to the word
                            for row in clue_soup.find_all("tr"):
                                columns = row.find_all("td")
                                if len(columns) == 2:
                                    clue = columns[0].get_text(strip=True)
                                    answer = columns[1].get_text(strip=True)
                                    if answer.lower() == word:
                                        print(f"{clue} ({len(word)})")
                        except Exception as e:
                            print(f"Error fetching clue for '{word}' at {full_url}: {e}")
            except Exception as e:
                print(f"Error fetching page {page_num} for '{word}': {e}")

In [77]:
# Example list of words
words = ["riel", "rued", "reek"]
fetch_clues(words)


Word: riel
Oxford college without love or money (4)

Word: rued
Deplored coarse speaking (4)
Regretted being churlish, by the sound of it? (4)
Was unhappy about drive round much of Europe, heading west (4)
Regretted impolite sound (4)
Felt sorry about making vulgar broadcast (4)
Felt bad about heartless objection, in retrospect (4)
Regretted making left turn to enter (4)
Regretted vulgar sounds (4)
Regretted being coarse, it's said (4)
Here regretted having sounded cross (4)
Regretted having reflected grandeur of lost ancestor (4)
Was sorry for sounding discourteous (4)
Regretted sounding offensive (4)
Regretted being impolite to the audience (4)
Regretted sounding discourteous (4)
Herb's daughter was remorseful (4)
Was sorry when Herb died (4)
Regretted being impolite in speech (4)
Felt remorse for being ill-mannered in speech (4)
Regretted being offensive in speech (4)
Regretted being offensive in speech (4)
Regretted being offensive in speech (4)
Regretted being offensive in speech

In [78]:
# Example list of words
words = ["reef"]
fetch_clues(words)


Word: reef
Person recruited starts to examine food chain in water (4)
Bank's not entirely free finance (4)
One can get free from this knot (4)
Admire efforts to ring bank (4)
Bank right to reject payment (4)
Hazard at sea -- do something to reduce canvas (4)
Official covering English bank (4)
The knot sailors try to avoid (4)
A knot that seamen consider dangerous (4)
Judge imprisoning head of European bank (4)
Knot, turnstone etc may feed here (4)
No charge first to last, one gathered on yacht (4)
Rocks about going over eastern France (4)
Rocky part of sail (4)
It's not too hard to get free from this knot (4)
Bank where judge keeps euros at first (4)
Such a knot not tied — fine to take the other end (4)
Bank not imposing a charge first to last (4)
River charge raised means potential danger for ships (4)
Part of sail that could spell danger to sailor (4)
Bank in free fall (4)
Knot sailors try to avoid (4)
Recalling Edward Elgar for starters: "Where Corals Lie"? (4)
Folding part of sail