# Test Booking Score Retrieval

In [62]:
import os
import re
import json
import random
from time import sleep
from datetime import datetime

import pandas as pd
import requests
from bs4 import BeautifulSoup


In [63]:
CSV_FILE = 'booking_scores.csv'

In [64]:
URLS = {
    "Ananea Castelo Suites Hotel": "https://www.booking.com/hotel/pt/castelo-suites.en-gb.html",
    "PortoBay Falésia": "https://www.booking.com/hotel/pt/porto-bay-falesia.en-gb.html",
    "Regency Salgados Hotel & Spa": "https://www.booking.com/hotel/pt/regency-salgados-amp-spa.en-gb.html",
    "NAU São Rafael Atlântico": "https://www.booking.com/hotel/pt/sao-rafael-suites-all-inclusive.en-gb.html",
    "NAU Salgados Dunas Suites": "https://www.booking.com/hotel/pt/westin-salgados-beach-resort-algarve.en-gb.html",
    "Vidamar Resort Hotel Algarve": "https://www.booking.com/hotel/pt/vidamar-algarve-hotel.en-gb.html",
}

In [65]:
UA_HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/124.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.8",
    "Referer": "https://www.booking.com/",
}

In [90]:
NUM_RE = re.compile(r"\d+(?:[.,]\d+)?")


In [78]:
def fetch_booking_rating(url, session, retries=2):
    for attempt in range(retries + 1):
        try:
            r = session.get(url, headers=UA_HEADERS, timeout=20)
            r.raise_for_status()
            soup = BeautifulSoup(r.text, "html.parser")

            # look for any <script type="application/ld+json"> with aggregateRating
            for tag in soup.find_all("script", type="application/ld+json"):
                try:
                    data = json.loads(tag.string or "")
                except Exception:
                    continue
                if not isinstance(data, dict):
                    continue
                if "aggregateRating" in data:
                    rating = data["aggregateRating"].get("ratingValue")
                    if rating:
                        return float(str(rating).replace(",", "."))
        except Exception as e:
            print(f"[warn] {url} attempt {attempt+1} failed: {e}")
        sleep(random.uniform(2.0, 4.0) * (attempt + 1))
    return None


In [79]:
# ---- 3) Load or create CSV ----
if not os.path.exists(CSV_FILE):
    print(f"Creating {CSV_FILE} …")
    df = pd.DataFrame(index=list(URLS.keys()))
    df.index.name = "Hotel"
    df["Average Score"] = pd.NA
    df.to_csv(CSV_FILE)
else:
    df = pd.read_csv(CSV_FILE,sep=';', index_col="Hotel")

In [80]:
for h in URLS.keys():
    if h not in df.index:
        df.loc[h] = pd.Series(dtype="float64")
if "Average Score" not in df.columns:
    df["Average Score"] = pd.NA


In [81]:
# ---- 4) Fetch today’s scores ----
today_col = datetime.now().strftime("%Y-%m-%d")  # date-only column
session = requests.Session()

In [82]:
new_scores = {}
for hotel, url in URLS.items():
    print(f"→ {hotel}")
    score = fetch_booking_rating(url, session)
    new_scores[hotel] = score
    if score is not None:
        print(f"   {score}/10")
    else:
        print("   (no score)")
    sleep(random.uniform(2.5, 5.0))

→ Ananea Castelo Suites Hotel
   8.8/10
→ PortoBay Falésia
   8.7/10
→ Regency Salgados Hotel & Spa
   8.8/10
→ NAU São Rafael Atlântico
   8.0/10
→ NAU Salgados Dunas Suites
   8.2/10
→ Vidamar Resort Hotel Algarve
   8.1/10


In [87]:
df[today_col] = pd.Series(new_scores)
date_cols = [c for c in df.columns if re.fullmatch(r"\d{4}-\d{2}-\d{2}", str(c))]
if date_cols:
    df["Average Score"] = df[date_cols].mean(axis=1, numeric_only=True)

In [89]:
df.to_csv(CSV_FILE, sep=";", index_label="Hotel")
print(f"\nSaved {CSV_FILE}. Added/updated column: {today_col}")
print(df[[today_col]].dropna())


Saved booking_scores.csv. Added/updated column: 2025-09-20
                              2025-09-20
Hotel                                   
Ananea Castelo Suites Hotel          8.8
PortoBay Falésia                     8.7
Regency Salgados Hotel & Spa         8.8
NAU São Rafael Atlântico             8.0
NAU Salgados Dunas Suites            8.2
Vidamar Resort Hotel Algarve         8.1
