In [2]:
import os

from dotenv import load_dotenv
from etg_client import GuestRoom

load_dotenv()

# ETG API Credentials
ETG_KEY_ID = os.environ["ETG_KEY_ID"]
ETG_API_KEY = os.environ["ETG_API_KEY"]
ETG_REQUEST_TIMEOUT = 30.0

# Gemini API Key
GEMINI_API_KEY = os.environ["GEMINI_API_KEY"]
GEMINI_REQUEST_TIMEOUT = 30.0

# Search Parameters
CITY = "Дрезден"
REGION_ID: int | None = None

CHECKIN_DATE = "2026-01-15"
CHECKOUT_DATE = "2026-01-17"

CURRENCY = "EUR"
LANGUAGE = "ru"
RESIDENCY = "RU"

GUESTS: list[GuestRoom] = [{"adults": 2, "children": [2, 4]}]
LIMIT = 1000

# User preferences for AI
USER_PREFERENCES = "Для семьи из 4 человек (2 взрослых, 2 маленьких детей 2 и 4 года). 2 комнаты не менее 50 м2. Высокие оценки. В отзывах нет информации про отсутствие интернета, грязь, грубость от сотрудников"

# Reviews settings
REVIEWS_PER_SEGMENT = 30
REVIEWS_MAX_AGE_YEARS = 5
NEUTRAL_RATING_THRESHOLD = 7.0
NEGATIVE_RATING_THRESHOLD = 5.0

# Filters
MIN_PRICE: float | None = 120.0  # None = no minimum
MAX_PRICE: float | None = 400.0  # None = no maximum

# Output
ARTIFACTS_DIR = "artifacts"

In [3]:
from etg_client import (
    ETGClient,
    Hotel,
    HotelContent,
)

client = ETGClient(ETG_KEY_ID, ETG_API_KEY, timeout=ETG_REQUEST_TIMEOUT)

In [4]:
def find_region_id(client: ETGClient, city_name: str, language: str) -> int | None:
    """Find region ID for a city name."""
    print(f"Looking up region ID for '{city_name}'...")
    regions = client.suggest_region(city_name, language)

    if not regions:
        print(f"  No regions found for '{city_name}'")
        return None

    # Only accept City type
    for region in regions:
        if region["type"] == "City":
            region_id = region["id"]
            print(f"  Found: {region['name']} ({region.get('country_code', '')}), region_id={region_id}")
            return region_id

    # No city found - show available options
    print(f"  No city found. Available regions:")
    for r in regions[:5]:
        print(f"    - {r['name']} (type: {r['type']}, id: {r['id']})")

    return None

In [5]:
# Find region
region_id = REGION_ID or find_region_id(client, CITY, LANGUAGE)
if not region_id:
    raise ValueError(f"Could not find region for '{CITY}'")

print(f"\nSearching hotels in {CITY}...")
print(f"  Dates: {CHECKIN_DATE} to {CHECKOUT_DATE}")
print(f"  Currency: {CURRENCY}, Limit: {LIMIT}")

Looking up region ID for 'Дрезден'...
  [ETG] /api/b2b/v3/search/multicomplete/ - 200 in 1.12s
  Found: Дрезден, Саксония (DE), region_id=1058

Searching hotels in Дрезден...
  Dates: 2026-01-15 to 2026-01-17
  Currency: EUR, Limit: 1000


In [6]:
import pandas as pd

# Search hotels
results = client.search_hotels_by_region(
    region_id=region_id,
    checkin=CHECKIN_DATE,
    checkout=CHECKOUT_DATE,
    residency=RESIDENCY,
    guests=GUESTS,
    currency=CURRENCY,
    language=LANGUAGE,
    hotels_limit=LIMIT,
)

hotels: list[Hotel] = results.get("hotels", [])
total_hotels = results.get("total_hotels", len(hotels))

if not hotels:
    print("No hotels found for the given criteria.")
    df_hotels = pd.DataFrame()
else:
    print(f"Found {len(hotels)} hotels (total available: {total_hotels})\n")

    # Create DataFrame
    hotels_data = []
    for hotel in hotels:
        rates = hotel.get("rates", [])
        if rates:
            first_rate = rates[0]
            payment_types = first_rate.get("payment_options", {}).get("payment_types", [])
            price = float(payment_types[0].get("show_amount", 0)) if payment_types else None
            room_name = first_rate.get("room_name", "")
            meal = first_rate.get("meal", "")
        else:
            price = None
            room_name = ""
            meal = ""
        
        hotels_data.append({
            "hotel_id": hotel["id"],
            "hid": hotel["hid"],
            "price": price,
            "currency": CURRENCY,
            "room": room_name[:40] if room_name else "",
            "meal": meal,
        })

    df_hotels = pd.DataFrame(hotels_data)
    df_hotels = df_hotels.sort_values("price", ascending=True).reset_index(drop=True)
    df_hotels.index += 1  # Start from 1

    # Display settings
    pd.set_option("display.max_colwidth", 50)
    pd.set_option("display.max_rows", 100)

df_hotels

  [ETG] /api/b2b/v3/search/serp/region/ - 200 in 8.70s
Found 41 hotels (total available: 41)



Unnamed: 0,hotel_id,hid,price,currency,room,meal
1,ao_dresden_hauptbahnhof,7491207,153.0,EUR,Кровать в общем номере (питание для дете,nomeal
2,dresden_zentrum_hotel,10303459,193.0,EUR,Двухместный номер Standard (2 отдельные,breakfast
3,bb_hotel_dresden,7373556,200.0,EUR,Номер Standard (питание для детей не вкл,nomeal
4,beherbergungsschiff_dpoppelmann,7902507,208.34,EUR,Четырёхместный номер Standard c 1 комнат,nomeal
5,aparthotel_munzgasse,7584077,219.0,EUR,Четырёхместные семейные апартаменты с 2,nomeal
6,bei_anton_in_dresden,6931888,222.0,EUR,Люкс Цокольный этаж с 2 комнатами (питан,nomeal
7,ferienwohnung_villa_kadenstrasse,8475090,228.0,EUR,Апартаменты Standard с 2 комнатами (пита,nomeal
8,trobischhof,9701365,228.84,EUR,Номер Standard с 2 комнатами с видом на,nomeal
9,primeflats_apartments_innere_neustadt_bautzner...,8906457,241.0,EUR,Люкс с 2 комнатами (питание для детей не,nomeal
10,felix_suiten_im_lebendigen_haus_am_zwinger,8980841,252.0,EUR,Четырёхместная студия (питание для детей,nomeal


In [7]:
def get_hotel_price(hotel: Hotel) -> float | None:
    """Extract minimum price from hotel rates."""
    rates = hotel.get("rates", [])
    if rates:
        payment_types = rates[0].get("payment_options", {}).get("payment_types", [])
        if payment_types:
            try:
                return float(payment_types[0].get("show_amount", 0))
            except (ValueError, TypeError):
                return None
    return None


def filter_hotels_by_price(
    hotels: list[Hotel],
    min_price: float | None = None,
    max_price: float | None = None,
) -> list[Hotel]:
    """Filter hotels by price range."""
    if min_price is None and max_price is None:
        return hotels
    
    filtered = []
    for hotel in hotels:
        price = get_hotel_price(hotel)
        if price is None:
            continue
        if min_price is not None and price < min_price:
            continue
        if max_price is not None and price > max_price:
            continue
        filtered.append(hotel)
    
    return filtered


# Apply price filter
hotels = filter_hotels_by_price(hotels, MIN_PRICE, MAX_PRICE)
print(f"After price filter: {len(hotels)} hotels")

After price filter: 32 hotels


In [8]:
def fetch_hotel_content(
    client: ETGClient,
    hotel_hids: list[int],
    language: str,
) -> dict[int, HotelContent]:
    """Fetch content for hotels."""
    print("Fetching hotel content...")
    content_map: dict[int, HotelContent] = {}

    batch_size = 100
    for i in range(0, len(hotel_hids), batch_size):
        batch = hotel_hids[i : i + batch_size]
        print(f"  Fetching content for {len(batch)} hotels...")

        content = client.get_hotel_content(hids=batch, language=language)
        for hotel in content:
            content_map[hotel["hid"]] = hotel

    print(f"  Total: {len(content_map)} hotels content fetched")
    return content_map

In [9]:
hotel_hids = [h["hid"] for h in hotels]

content_map = fetch_hotel_content(client, hotel_hids, LANGUAGE)

Fetching hotel content...
  Fetching content for 32 hotels...
  [ETG] /api/content/v1/hotel_content_by_ids/ - 200 in 0.93s
  Total: 32 hotels content fetched


In [10]:
content_data = []
for hid, content in content_map.items():
    content_data.append({
        "hid": hid,
        "name": content.get("name", "")[:40],
        "stars": content.get("star_rating", 0),
        "kind": content.get("kind", ""),
        "address": content.get("address", "")[:50],
        "latitude": content.get("latitude"),
        "longitude": content.get("longitude"),
        "check_in": content.get("check_in_time", ""),
        "check_out": content.get("check_out_time", ""),
    })

df_content = pd.DataFrame(content_data)

# Merge with prices from df_hotels
df_full = df_hotels.merge(df_content, on="hid", how="left")
df_full = df_full[["hotel_id", "name", "stars", "kind", "price", "meal", "address"]]
df_full = df_full.sort_values("price", ascending=True).reset_index(drop=True)
df_full.index += 1

df_full

Unnamed: 0,hotel_id,name,stars,kind,price,meal,address
1,ao_dresden_hauptbahnhof,a&o Dresden Hauptbahnhof - Hostel,0.0,Hostel,153.0,nomeal,"Strehlener Strasse 10, Дрезден"
2,dresden_zentrum_hotel,Holiday Inn Express Dresden Zentrum by I,0.0,Hotel,193.0,breakfast,"Prager Str. 13, Дрезден"
3,bb_hotel_dresden,B&B HOTEL Dresden-Messe,0.0,BNB,200.0,nomeal,"Weißeritzstraße 10, Дрезден"
4,beherbergungsschiff_dpoppelmann,Schiffsherberge Pöppelmann,0.0,Hotel,208.34,nomeal,"Uferstraße 14, Дрезден"
5,aparthotel_munzgasse,Aparthotel Münzgasse,0.0,Apart-hotel,219.0,nomeal,"Muenzgasse 10, Дрезден"
6,bei_anton_in_dresden,Limehome Dresden Antonstraße,0.0,Apartment,222.0,nomeal,"Antonstraße 5a, Дрезден"
7,ferienwohnung_villa_kadenstrasse,Ferienwohnung Villa Kadenstraße,0.0,Villas_and_Bungalows,228.0,nomeal,"Kadenstr. 59, Дрезден"
8,trobischhof,Pension Trobischhof,0.0,Guesthouse,228.84,nomeal,"Alttrachau 41, Дрезден"
9,primeflats_apartments_innere_neustadt_bautzner...,Limehome Dresden Hoyerswerdaerstraße,0.0,Apartment,241.0,nomeal,"Hoyerswerdaer Straße 26, Дрезден"
10,felix_suiten_im_lebendigen_haus_am_zwinger,Felix Suiten am Zwinger,0.0,Apart-hotel,252.0,nomeal,"Kleine Brüdergasse 5, Дрезден"


In [11]:
from datetime import datetime, timedelta
from typing import TypedDict

from etg_client import ETGAPIError


class HotelReviewsFiltered(TypedDict):
    reviews: list
    total_reviews: int
    positive_count: int
    neutral_count: int
    negative_count: int


# Base languages for reviews
BASE_REVIEW_LANGUAGES = ["ru", "en"]


def fetch_reviews(
    client: ETGClient,
    hotel_hids: list[int],
    language: str,
) -> dict[int, list]:
    """Fetch reviews for hotels in multiple languages (zh, ru, en + user language)."""
    print("Fetching hotel reviews...")

    # Build language list: base languages + user language if not in base
    languages = BASE_REVIEW_LANGUAGES.copy()
    if language not in languages:
        languages.append(language)

    print(f"  Languages: {', '.join(languages)}")

    reviews_map: dict[int, list] = {}
    total_fetched = 0

    batch_size = 100
    for lang in languages:
        lang_count = 0
        try:
            for i in range(0, len(hotel_hids), batch_size):
                batch = hotel_hids[i : i + batch_size]
                if i == 0:
                    print(f"  [{lang}] Fetching reviews for {len(hotel_hids)} hotels...")

                hotels_reviews = client.get_hotel_reviews(hids=batch, language=lang)

                for hotel_data in hotels_reviews:
                    hid = hotel_data["hid"]
                    reviews = hotel_data["reviews"]

                    # Add language tag to each review
                    for r in reviews:
                        r["_lang"] = lang

                    if hid not in reviews_map:
                        reviews_map[hid] = []
                    reviews_map[hid].extend(reviews)
                    lang_count += len(reviews)

            total_fetched += lang_count
            print(f"  [{lang}] {lang_count} reviews")
        except ETGAPIError as e:
            print(f"  [{lang}] Skipped (API error: {str(e)[:50]}...)")
            continue

    print(f"  Total: {total_fetched} reviews fetched")
    return reviews_map


def filter_reviews(
    reviews_map: dict[int, list],
    max_age_years: int = REVIEWS_MAX_AGE_YEARS,
    reviews_per_segment: int = REVIEWS_PER_SEGMENT,
) -> dict[int, HotelReviewsFiltered]:
    """Filter reviews by date and segment by rating."""
    print("Filtering reviews...")

    cutoff_date = (datetime.now() - timedelta(days=max_age_years * 365)).isoformat()

    filtered_map: dict[int, HotelReviewsFiltered] = {}
    stats = {"positive": 0, "neutral": 0, "negative": 0}

    for hid, reviews in reviews_map.items():
        total_reviews = len(reviews)

        # Filter by date and rating > 0
        valid_reviews = [
            r for r in reviews
            if r["created"] >= cutoff_date and r["rating"] > 0
        ]

        # Split by segments
        positive = [r for r in valid_reviews if r["rating"] >= NEUTRAL_RATING_THRESHOLD]
        neutral = [r for r in valid_reviews if NEGATIVE_RATING_THRESHOLD <= r["rating"] < NEUTRAL_RATING_THRESHOLD]
        negative = [r for r in valid_reviews if r["rating"] < NEGATIVE_RATING_THRESHOLD]

        # Sort each segment by date (newest first) and limit
        positive.sort(key=lambda x: x["created"], reverse=True)
        neutral.sort(key=lambda x: x["created"], reverse=True)
        negative.sort(key=lambda x: x["created"], reverse=True)

        positive = positive[:reviews_per_segment]
        neutral = neutral[:reviews_per_segment]
        negative = negative[:reviews_per_segment]

        stats["positive"] += len(positive)
        stats["neutral"] += len(neutral)
        stats["negative"] += len(negative)

        # Combine back into single list
        combined_reviews = positive + neutral + negative

        filtered_map[hid] = {
            "reviews": combined_reviews,
            "total_reviews": total_reviews,
            "positive_count": len(positive),
            "neutral_count": len(neutral),
            "negative_count": len(negative),
        }

    print(f"  Filtered: +{stats['positive']} / ~{stats['neutral']} / -{stats['negative']} (last {max_age_years} years)")
    return filtered_map


raw_reviews = fetch_reviews(client, hotel_hids, LANGUAGE)
reviews_map = filter_reviews(raw_reviews)

Fetching hotel reviews...
  Languages: ru, en
  [ru] Fetching reviews for 32 hotels...
  [ETG] /api/content/v1/hotel_reviews_by_ids/ - 200 in 0.41s
  [ru] 102 reviews
  [en] Fetching reviews for 32 hotels...
  [ETG] /api/content/v1/hotel_reviews_by_ids/ - 200 in 0.38s
  [en] 15 reviews
  Total: 117 reviews fetched
Filtering reviews...
  Filtered: +15 / ~0 / -1 (last 5 years)


In [12]:
# Create DataFrame with reviews summary
reviews_data = []
for hid, data in reviews_map.items():
    hotel_id = next((h["id"] for h in hotels if h["hid"] == hid), "")
    reviews_data.append({
        "hotel_id": hotel_id,
        "hid": hid,
        "total": data["total_reviews"],
        "positive": data["positive_count"],
        "neutral": data["neutral_count"],
        "negative": data["negative_count"],
    })

df_reviews = pd.DataFrame(reviews_data)
df_reviews = df_reviews.sort_values("total", ascending=False).reset_index(drop=True)
df_reviews.index += 1

# Merge with hotel info
df_reviews_full = df_reviews.merge(
    df_content[["hid", "name", "stars"]],
    on="hid",
    how="left"
)
df_reviews_full = df_reviews_full[["hotel_id", "name", "stars", "total", "positive", "neutral", "negative"]]


def show_reviews(hotel_id: str, segment: str = "all", limit: int = 5) -> None:
    """
    Show reviews for a hotel.
    
    Args:
        hotel_id: Hotel ID (e.g. 'rosewood_hong_kong')
        segment: 'positive', 'negative', 'neutral', or 'all'
        limit: Number of reviews to show per segment
    """
    hid = next((h["hid"] for h in hotels if h["id"] == hotel_id), None)
    if not hid:
        print(f"Hotel '{hotel_id}' not found")
        return
    
    data = reviews_map.get(hid)
    if not data:
        print(f"No reviews for hotel '{hotel_id}'")
        return
    
    hotel_name = content_map.get(hid, {}).get("name", hotel_id)
    print(f"{'='*60}")
    print(f"{hotel_name}")
    print(f"Total: {data['total_reviews']} | +{data['positive_count']} / ~{data['neutral_count']} / -{data['negative_count']}")
    print(f"{'='*60}\n")
    
    reviews = data["reviews"]
    
    def print_segment(name: str, filter_fn, limit: int):
        segment_reviews = [r for r in reviews if filter_fn(r)][:limit]
        if not segment_reviews:
            return
        print(f"--- {name} ({len(segment_reviews)}) ---")
        for r in segment_reviews:
            rating = r["rating"]
            date = r["created"][:10]
            lang = r.get("_lang", "?")
            plus = r.get("review_plus", "").strip()
            minus = r.get("review_minus", "").strip()
            print(f"\n[{rating}/10] {date} [{lang}]")
            if plus:
                print(f"  + {plus[:300]}")
            if minus:
                print(f"  - {minus[:300]}")
        print()
    
    if segment in ("all", "positive"):
        print_segment("POSITIVE", lambda r: r["rating"] >= NEUTRAL_RATING_THRESHOLD, limit)
    if segment in ("all", "neutral"):
        print_segment("NEUTRAL", lambda r: NEGATIVE_RATING_THRESHOLD <= r["rating"] < NEUTRAL_RATING_THRESHOLD, limit)
    if segment in ("all", "negative"):
        print_segment("NEGATIVE", lambda r: r["rating"] < NEGATIVE_RATING_THRESHOLD, limit)


df_reviews_full

Unnamed: 0,hotel_id,name,stars,total,positive,neutral,negative
0,ibis_dresden_bastei_2,ibis Dresden Zentrum,3,35,3,0,0
1,star_inn_hotel_premium_dresden_im_haus_altmarkt,Star G Hotel Premium Dresden Altmarkt,3,17,3,0,0
2,ao_dresden_hauptbahnhof,a&o Dresden Hauptbahnhof - Hostel,0,12,2,0,1
3,nh_dresden,NH Dresden Neustadt,4,11,1,0,0
4,aparthotel_munzgasse,Aparthotel Münzgasse,0,8,0,0,0
5,achat_comfort_hotel_dresden,ACHAT Hotel Dresden Altstadt,3,8,2,0,0
6,maritim_hotel_dresden_2,Maritim Hotel & Internationales Congress,4,5,1,0,0
7,ramada_hotel_dresden,Ramada by Wyndham Dresden,4,4,1,0,0
8,innside_by_melia_dresden,INNSiDE by Meliá Dresden,4,3,1,0,0
9,amedia_plaza_dresden,"Amedia Plaza Dresden, Trademark Collecti",0,3,0,0,0


In [13]:
# Example: view reviews for a specific hotel
# show_reviews("four_seasons_st_petersburg")              # all segments, 5 per segment
# show_reviews("four_seasons_st_petersburg", "negative")  # only negative
# show_reviews("four_seasons_st_petersburg", "all", 10)   # all segments, 10 per segment

show_reviews("four_seasons_st_petersburg", limit=3)

Hotel 'four_seasons_st_petersburg' not found


In [14]:
class HotelCombined(Hotel, HotelContent):
    """Combined hotel data from search, content, and reviews."""
    reviews: HotelReviewsFiltered


def combine_hotel_data(
    hotels: list[Hotel],
    content_map: dict[int, HotelContent],
    reviews_map: dict[int, HotelReviewsFiltered],
) -> list[HotelCombined]:
    """Combine search results, content, and reviews."""
    print("Combining hotel data...")
    combined: list[HotelCombined] = []

    for hotel in hotels:
        hid = hotel["hid"]
        content = content_map.get(hid, {})
        reviews = reviews_map.get(hid, {
            "reviews": [],
            "total_reviews": 0,
            "positive_count": 0,
            "neutral_count": 0,
            "negative_count": 0,
        })

        combined.append({  # type: ignore[arg-type]
            **hotel,
            **content,
            "reviews": reviews,
        })

    print(f"  Combined {len(combined)} hotels")
    return combined

In [15]:
combined = combine_hotel_data(hotels, content_map, reviews_map)

Combining hotel data...
  Combined 32 hotels


In [16]:
import json

# Estimate token count for combined data
combined_json = json.dumps(combined, ensure_ascii=False)
char_count = len(combined_json)
# Rough estimate: ~4 chars per token for English, ~2-3 for Russian/mixed
estimated_tokens = char_count // 3

print(f"JSON size: {char_count:,} chars ({char_count / 1024 / 1024:.2f} MB)")
print(f"Estimated tokens: ~{estimated_tokens:,}")

JSON size: 3,375,073 chars (3.22 MB)
Estimated tokens: ~1,125,024


In [17]:
import asyncio
import json
import time
from typing import Any

from pydantic import BaseModel, ValidationError
from google.genai.types import HarmBlockThreshold, HarmCategory

from pydantic_ai import Agent
from pydantic_ai.models.google import GoogleModel, GoogleModelSettings


class HotelScore(BaseModel):
    hotel_id: str
    score: int
    top_reasons: list[str]
    score_penalties: list[str]


class ScoringResponse(BaseModel):
    results: list[HotelScore]


MODEL_NAME = "gemini-3-flash-preview"
settings = GoogleModelSettings(
    temperature=0.2,
)
model = GoogleModel(MODEL_NAME)
scoring_agent = Agent(
    model,
    output_type=ScoringResponse,
    model_settings=settings,
)


SCORING_PROMPT = """Score hotels for user preferences. Return JSON that matches the schema.

User: {user_preferences}

Hotels: {hotels_json}

Rules:
- Score 0-100.
- Return ALL hotels sorted by score desc.
- top_reasons: 2-3 short phrases (<=10 words each).
- score_penalties: up to 5 short facts, ordered by severity, explaining why the score is lower; empty list if none.
- Do not include markdown or extra text.
"""


def prepare_hotel_for_llm(hotel: dict[str, Any]) -> dict[str, Any]:
    """Prepare hotel data for LLM scoring with key information."""
    # Extract rates info (room types, meals, prices)
    rates_info = []
    for rate in hotel.get("rates", [])[:5]:  # Top 5 rates
        pt = rate.get("payment_options", {}).get("payment_types", [])
        price = pt[0].get("show_amount") if pt else None
        meal_data = rate.get("meal_data", {})

        rate_info = {
            "room": rate.get("room_name", "")[:60],
            "price": f"{price} {CURRENCY}" if price else None,
            "meal": meal_data.get("value", rate.get("meal", "")),
            "has_breakfast": meal_data.get("has_breakfast", False),
        }

        # Add cancellation info if available
        cancel = None
        for p in pt:
            cp = p.get("cancellation_penalties", {})
            if cp.get("free_cancellation_before"):
                cancel = cp["free_cancellation_before"][:10]
                break
        if cancel:
            rate_info["free_cancel_before"] = cancel

        rates_info.append(rate_info)

    # Extract amenities
    amenities = []
    for g in hotel.get("amenity_groups", []):
        for a in g.get("amenities", []):
            amenities.append(a.get("name", "") if isinstance(a, dict) else str(a))

    # Extract reviews
    reviews = []
    hr = hotel.get("reviews", {})
    for r in (hr.get("reviews", []) if isinstance(hr, dict) else [])[:10]:
        reviews.append({
            "id": r.get("id"),
            "rating": r.get("rating"),
            "plus": (r.get("review_plus") or "")[:150],
            "minus": (r.get("review_minus") or "")[:150],
        })

    return {
        "hotel_id": hotel.get("id", ""),
        "name": hotel.get("name", ""),
        "stars": hotel.get("star_rating", 0),
        "kind": hotel.get("kind", ""),
        "rates": rates_info,
        "amenities": amenities[:20],
        "reviews": reviews,
    }


async def score_hotels_batch(hotels_data: list[dict], user_preferences: str, retries: int = 3) -> ScoringResponse | None:
    prompt = SCORING_PROMPT.format(
        user_preferences=user_preferences,
        hotels_json=json.dumps(hotels_data, ensure_ascii=False),
    )

    for attempt in range(retries):
        try:
            result = await scoring_agent.run(prompt)
            return result.output
        except (ValidationError, ValueError) as e:
            if attempt < retries - 1:
                print(f"      Retry {attempt+1}: {str(e)[:60]}...")
                await asyncio.sleep(1)
                continue
            print(f"      Failed after {retries} attempts: {e}")
            return None
    return None


async def score_all_hotels(hotels: list[dict], user_preferences: str, batch_size: int = 25) -> list[dict]:
    print(f"Scoring {len(hotels)} hotels...")
    hotels_for_llm = [prepare_hotel_for_llm(h) for h in hotels]
    all_results = []
    total = (len(hotels_for_llm) + batch_size - 1) // batch_size

    for i in range(0, len(hotels_for_llm), batch_size):
        batch = hotels_for_llm[i:i + batch_size]
        n = i // batch_size + 1
        print(f"  Batch {n}/{total}: {len(batch)} hotels...")
        t = time.perf_counter()

        result = await score_hotels_batch(batch, user_preferences)

        if result:
            print(f"    Done in {time.perf_counter()-t:.1f}s, scored {len(result.results)} hotels")
            all_results.extend([h.model_dump() for h in result.results])
        else:
            print(f"    Batch failed, skipping {len(batch)} hotels")

    all_results.sort(key=lambda x: x.get("score", 0), reverse=True)
    print(f"\nScored {len(all_results)}/{len(hotels)} hotels")
    return all_results



In [18]:
# Score hotels
scoring_results = await score_all_hotels(combined, USER_PREFERENCES, batch_size=25)

Scoring 32 hotels...
  Batch 1/2: 25 hotels...
    Done in 43.2s, scored 24 hotels
  Batch 2/2: 7 hotels...
    Done in 40.9s, scored 7 hotels

Scored 31/32 hotels


In [19]:
# Country code to URL name mapping
COUNTRY_URL_MAP = {
    "DE": "germany", "RU": "russia", "FR": "france", "IT": "italy", "ES": "spain",
    "GB": "united_kingdom", "US": "usa", "CN": "china", "JP": "japan", "TH": "thailand",
    "AE": "uae", "TR": "turkey", "GR": "greece", "AT": "austria", "CH": "switzerland",
    "NL": "netherlands", "BE": "belgium", "PT": "portugal", "CZ": "czech_republic",
    "PL": "poland", "HU": "hungary", "SE": "sweden", "NO": "norway", "DK": "denmark",
    "FI": "finland", "IE": "ireland", "AU": "australia", "NZ": "new_zealand",
}


def get_ostrovok_url(hotel_id: str, hid: int, city: str, country_code: str) -> str:
    """Generate Ostrovok hotel URL."""
    country = COUNTRY_URL_MAP.get(country_code, country_code.lower())
    city_slug = city.lower().replace(" ", "_")
    return f"https://ostrovok.ru/hotel/{country}/{city_slug}/mid{hid}/{hotel_id}/"


def display_top_hotels(
    results: list[dict[str, Any]],
    hotels_data: list[dict[str, Any]],
    city: str,
    country_code: str = "DE",
    top_n: int = 10,
) -> pd.DataFrame:
    """Display top N scored hotels with details and Ostrovok links."""
    # Build hotel_id -> hid mapping
    hid_map = {h.get("id", ""): h.get("hid", 0) for h in hotels_data}
    name_map = {h.get("id", ""): h.get("name", h.get("id", "")) for h in hotels_data}
    
    print(f"\n{'='*80}")
    print(f"TOP {top_n} HOTELS")
    print(f"{'='*80}\n")

    data = []
    for i, hotel in enumerate(results[:top_n], 1):
        hotel_id = hotel.get("hotel_id", "")
        hid = hid_map.get(hotel_id, 0)
        score = hotel.get("score", 0)
        name = name_map.get(hotel_id, hotel_id)
        reasons = hotel.get("top_reasons", [])
        penalties = hotel.get("score_penalties", [])
        
        url = get_ostrovok_url(hotel_id, hid, city, country_code) if hid else ""

        # Print detailed info
        print(f"{i}. {name}")
        print(f"   Score: {score}/100")
        if reasons:
            print(f"   + {'; '.join(reasons[:3])}")
        if penalties:
            print(f"   - {'; '.join(penalties[:5])}")
        print(f"   {url}")
        print()
        
        # Collect for DataFrame
        data.append({
            "name": name[:40],
            "score": score,
            "reasons": "; ".join(reasons[:2])[:80] if reasons else "",
            "penalties": "; ".join(penalties[:5])[:120] if penalties else "",
            "url": url,
        })
    
    df = pd.DataFrame(data)
    df.index = range(1, len(df) + 1)
    total_found = len(hotels_data)
    selected = min(top_n, len(results))
    print(f"Всего найдено {total_found} отелей на эти даты. ")
    print(f"Подобраны лучшие {selected} по вашим критериям.")
    return df


# Get country code from first hotel's region
first_hotel = combined[0] if combined else {}
region = first_hotel.get("region", {})
country_code = region.get("country_code", "DE")

pd.set_option("display.max_colwidth", 100)
display_top_hotels(scoring_results, combined, CITY, country_code, top_n=10)


TOP 10 HOTELS

1. Aparthotel am Schloss
   Score: 95/100
   + Предлагает апартаменты с 2 отдельными комнатами; Идеально подходит для размещения семьи из 4 человек; Наличие кухни и балкона в номере
   https://ostrovok.ru/hotel/germany/дрезден/mid7767302/aparthotel_am_schloss/

2. Aparthotel Münzgasse
   Score: 95/100
   + Наличие семейных апартаментов с 2 комнатами; Просторное размещение для взрослых и детей; Центральное расположение и наличие кухни
   https://ostrovok.ru/hotel/germany/дрезден/mid7584077/aparthotel_munzgasse/

3. Aparthotel Neumarkt
   Score: 92/100
   + Исключительно высокие оценки (10/10); В отзывах отмечены очень большие номера; Превосходное расположение в центре города
   - Количество комнат не указано явно в описании тарифа
   https://ostrovok.ru/hotel/germany/дрезден/mid7487828/aparthotel_neumarkt/

4. Felix Suiten am Zwinger
   Score: 92/100
   + Апартаменты с 2 комнатами идеально подходят для семьи; Наличие лифта и бесплатного Wi-Fi
   - Точная площадь номера н

Unnamed: 0,name,score,reasons,penalties,url
1,Aparthotel am Schloss,95,Предлагает апартаменты с 2 отдельными комнатами; Идеально подходит для размещени,,https://ostrovok.ru/hotel/germany/дрезден/mid7767302/aparthotel_am_schloss/
2,Aparthotel Münzgasse,95,Наличие семейных апартаментов с 2 комнатами; Просторное размещение для взрослых,,https://ostrovok.ru/hotel/germany/дрезден/mid7584077/aparthotel_munzgasse/
3,Aparthotel Neumarkt,92,Исключительно высокие оценки (10/10); В отзывах отмечены очень большие номера,Количество комнат не указано явно в описании тарифа,https://ostrovok.ru/hotel/germany/дрезден/mid7487828/aparthotel_neumarkt/
4,Felix Suiten am Zwinger,92,Апартаменты с 2 комнатами идеально подходят для семьи; Наличие лифта и бесплатно,Точная площадь номера не указана,https://ostrovok.ru/hotel/germany/дрезден/mid8980841/felix_suiten_im_lebendigen_haus_am_zwinger/
5,Holiday Inn Express Dresden Zentrum by I,90,Наличие двухкомнатного люкса; Завтрак включен в стоимость проживания,Точная площадь номера не указана,https://ostrovok.ru/hotel/germany/дрезден/mid10303459/dresden_zentrum_hotel/
6,Schiffsherberge Pöppelmann,88,Люкс с 2 отдельными комнатами; Наличие стиральной машины и кухни,Отсутствие лифта в отеле; Расположение на цокольном этаже,https://ostrovok.ru/hotel/germany/дрезден/mid7902507/beherbergungsschiff_dpoppelmann/
7,Appartements Rehn,88,Три комнаты обеспечивают дополнительное пространство для семьи; Наличие кухни и,Отсутствие лифта в здании; Точная площадь номера не указана,https://ostrovok.ru/hotel/germany/дрезден/mid7828293/apartements_rehn/
8,Aparthotel Altes Dresden,82,Формат апарт-отеля удобен для семей с детьми; Наличие кухни и стиральной машины,Нет точной информации о количестве комнат и площади,https://ostrovok.ru/hotel/germany/дрезден/mid7443151/aparthotel_altes_dresden/
9,Limehome Dresden Hoyerswerdaerstraße,82,Двухкомнатный люкс для четырех человек; Бесплатный Wi-Fi и бесконтактное заселен,Отсутствие лифта в здании; Точная площадь номера не указана,https://ostrovok.ru/hotel/germany/дрезден/mid8906457/primeflats_apartments_innere_neustadt_bautz...
10,Ferienwohnung Villa Kadenstraße,80,Двухкомнатные апартаменты с бесплатным Wi-Fi; Наличие парковки и сада,Отсутствие лифта в здании; Точная площадь номера не указана,https://ostrovok.ru/hotel/germany/дрезден/mid8475090/ferienwohnung_villa_kadenstrasse/
