In [1]:
CSV_FILE = 'holidaycheck_scores.csv'

In [2]:
URLS = {
    "Ananea Castelo Suites Hotel": "https://www.holidaycheck.de/hi/ananea-castelo-suites-algarve/069563af-47db-44a3-bdb1-3441ae3a2ac4",
    "PortoBay Falésia": "https://www.holidaycheck.de/hi/portobay-falesia/44a47534-85c4-3114-a6da-472d82e16e29",
    "Regency Salgados Hotel & Spa": "https://www.holidaycheck.de/hi/regency-salgados-hotel-spa/b0478236-7644-46b4-8fde-bd6cb1832cf8",
    "NAU São Rafael Atlântico": "https://www.holidaycheck.de/hi/nau-sao-rafael-suites-all-inclusive/739da55a-710e-3514-83f6-8e01149442a5",
    "NAU Salgados Dunas Suites": "https://www.holidaycheck.de/hi/nau-salgados-vila-das-lagoas-apartment/602ac74a-9c28-3d74-8dd9-37c47c53cd4a",
    "Vidamar Resort Hotel Algarve": "https://www.holidaycheck.de/hi/vidamar-hotel-resort-algarve/e641bc1e-59d5-37a0-832e-90e6bbb51977",
}

In [3]:
import re
import requests
from bs4 import BeautifulSoup

def get_holidaycheck_score(url: str, timeout: int = 15) -> float | None:
    """
    Fetch overall HolidayCheck score (0–6 scale) from a hotel page.

    Returns
    -------
    float or None
        Score if found, else None.
    """
    if not url:
        return None

    resp = requests.get(url, timeout=timeout)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # You need to inspect the page once and adjust this selector.
    # Typical patterns include something like "4,5 / 6".
    text = soup.get_text(" ", strip=True)

    # Find patterns like "4,5 / 6" or "4.5 / 6"
    m = re.search(r"(\d+[.,]\d)\s*/\s*6", text)
    if not m:
        return None

    raw = m.group(1).replace(",", ".")
    try:
        return float(raw)
    except ValueError:
        return None


In [4]:
get_holidaycheck_score(URLS["Ananea Castelo Suites Hotel"])

5.5