In [1]:
import os

from dotenv import load_dotenv
from etg import GuestRoom

load_dotenv()

# ETG API Credentials
ETG_KEY_ID = os.environ["ETG_KEY_ID"]
ETG_API_KEY = os.environ["ETG_API_KEY"]
ETG_REQUEST_TIMEOUT = 30.0

# Search Parameters
CITY = "–ú–æ—Å–∫–≤–∞"

CHECKIN_DATE = "2026-01-30"
CHECKOUT_DATE = "2026-02-04"

CURRENCY = "RUB"
LANGUAGE = "ru"
RESIDENCY = "RU"

GUESTS: list[GuestRoom] = [{"adults": 2, "children": [4, 2]}]
LIMIT = 1000

# User preferences for AI
USER_PREFERENCES = "–û–±—è–∑–∞—Ç–µ–ª—å–Ω–æ –¥–≤–µ –∫–æ–º–Ω–∞—Ç—ã –∏ –¥–≤–µ –∫—Ä–æ–≤–∞—Ç–∏. –•–æ—Ä–æ—à–∏–µ –æ—Ç–∑—ã–≤—ã. –ß–∏—Å—Ç–æ—Ç–∞"

# Filters
MIN_PRICE: float | None = 3000.0  # None = no minimum
MAX_PRICE: float | None = 20000.0  # None = no maximum

In [2]:
from etg import ETGClient, GuestRoom, Hotel, HotelContent
from utils import ostrovok_url

client = ETGClient(ETG_KEY_ID, ETG_API_KEY, timeout=ETG_REQUEST_TIMEOUT)

In [3]:
async def find_region_id(client: ETGClient, city_name: str, language: str) -> int | None:
    """Find region ID for a city name."""
    print(f"Looking up region ID for '{city_name}'...")
    regions = await client.suggest_region(city_name, language)

    if not regions:
        print(f"  No regions found for '{city_name}'")
        return None

    # Only accept City type
    for region in regions:
        if region["type"] == "City":
            region_id = region["id"]
            print(f"  Found: {region['name']} ({region.get('country_code', '')}), region_id={region_id}")
            return region_id

    # No city found - show available options
    print(f"  No city found. Available regions:")
    for r in regions[:5]:
        print(f"    - {r['name']} (type: {r['type']}, id: {r['id']})")

    return None

In [4]:
# Find region by city name.
# ETG API requires region_id for hotel search, so we first
# lookup the region ID via suggest_region by city name.
region_id = await find_region_id(client, CITY, LANGUAGE)
if not region_id:
    raise ValueError(f"Could not find region for '{CITY}'")

print(f"\nSearching hotels in {CITY}...")
print(f"  Dates: {CHECKIN_DATE} to {CHECKOUT_DATE}")
print(f"  Currency: {CURRENCY}, Limit: {LIMIT}")

Looking up region ID for '–ú–æ—Å–∫–≤–∞'...
  Found: –ú–æ—Å–∫–≤–∞ (RU), region_id=2395

Searching hotels in –ú–æ—Å–∫–≤–∞...
  Dates: 2026-01-30 to 2026-02-04
  Currency: RUB, Limit: 1000


In [5]:
import pandas as pd

from services import search_hotels

# Search available hotels in the region with given parameters.
# Filters by price range if MIN_PRICE/MAX_PRICE are set.
# Returns short hotel info: id, hid, and rates (room name, price, meal).
# Full content (name, address, amenities) is fetched separately via hotel content API.
search_result = await search_hotels(
    client=client,
    region_id=region_id,
    checkin=CHECKIN_DATE,
    checkout=CHECKOUT_DATE,
    residency=RESIDENCY,
    guests=GUESTS,
    currency=CURRENCY,
    language=LANGUAGE,
    hotels_limit=LIMIT,
    min_price=MIN_PRICE,
    max_price=MAX_PRICE,
)

hotels = search_result["hotels"]
total_available = search_result["total_available"]
total_after_filter = search_result["total_after_filter"]

In [6]:
if not hotels:
    print("No hotels found for the given criteria.")
    df_hotels = pd.DataFrame()
else:
    print(f"Found {total_after_filter} hotels after price filter (total available: {total_available})\n")

    # Create DataFrame
    hotels_data = []
    for hotel in hotels:
        rates = hotel.get("rates", [])
        if rates:
            first_rate = rates[0]
            payment_types = first_rate.get("payment_options", {}).get("payment_types", [])
            price = float(payment_types[0].get("show_amount", 0)) if payment_types else None
            room_name = first_rate.get("room_name", "")
            meal = first_rate.get("meal", "")
        else:
            price = None
            room_name = ""
            meal = ""

        hotels_data.append({
            "hotel_id": hotel["id"],
            "hid": hotel["hid"],
            "price": price,
            "currency": CURRENCY,
            "room": room_name[:40] if room_name else "",
            "meal": meal,
        })

    df_hotels = pd.DataFrame(hotels_data)
    df_hotels = df_hotels.sort_values("price", ascending=True).reset_index(drop=True)
    df_hotels.index += 1  # Start from 1

    # Display settings
    pd.set_option("display.max_colwidth", 50)
    pd.set_option("display.max_rows", 100)

df_hotels

Found 202 hotels after price filter (total available: 227)



Unnamed: 0,hotel_id,hid,price,currency,room,meal
1,vozle_parka_botanicheskiy_sad_flat,11282356,16671.0,RUB,–ß–µ—Ç—ã—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–µ –∞–ø–∞—Ä—Ç–∞–º–µ–Ω—Ç—ã Standard —Å –±–∞,nomeal
2,minihotel_ladomir_na_yauze,8662675,16729.0,RUB,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –Ω–æ–º–µ—Ä –°–µ–º–µ–π–Ω—ã–π –£–ª—É—á—à–µ–Ω–Ω—ã–π 2,nomeal
3,zvezda_hotel_6,10437016,17500.0,RUB,–¢—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–π –Ω–æ–º–µ—Ä Comfort (–ø–∏—Ç–∞–Ω–∏–µ –¥–ª—è –¥,nomeal
4,knokey_tekstilschiki_apartments,13313968,17687.0,RUB,–ê–ø–∞—Ä—Ç–∞–º–µ–Ω—Ç—ã (–ø–∏—Ç–∞–Ω–∏–µ –¥–ª—è –¥–µ—Ç–µ–π –Ω–µ –≤–∫–ª—é—á–µ,nomeal
5,ladomir_na_zelevom_minihotel,9024162,17908.0,RUB,–õ—é–∫—Å Superior (–ø–∏—Ç–∞–Ω–∏–µ –¥–ª—è –¥–µ—Ç–µ–π –Ω–µ –≤–∫–ª—é,nomeal
...,...,...,...,...,...,...
198,peter_1,7467388,97500.0,RUB,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –Ω–æ–º–µ—Ä Standard (2 –æ—Ç–¥–µ–ª—å–Ω—ã–µ,nomeal
199,otel_radisson_slavyanskaya_and_business_centre,7467357,97600.0,RUB,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –ª—é–∫—Å —Å 2 –∫–æ–º–Ω–∞—Ç–∞–º–∏ (–¥–≤—É—Å–ø–∞–ª—å,nomeal
200,doubletree_by_hilton_moscow_vnukovo_airport,8848335,97750.0,RUB,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –Ω–æ–º–µ—Ä Deluxe —Å –∫—Ä–∞—Å–∏–≤—ã–º –≤–∏–¥–æ,nomeal
201,grand_revival_hotel,10004948,98647.0,RUB,–ß–µ—Ç—ã—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–π –Ω–æ–º–µ—Ä K–æ–º—Ñ–æ—Ä—Ç —Å –¥–∂–∞–∫—É–∑–∏ (,nomeal


In [7]:
from services import batch_get_content

hotel_hids = [h["hid"] for h in hotels]

print(f"[batch_get_content_start] –ó–∞–≥—Ä—É–∑–∫–∞ –∫–æ–Ω—Ç–µ–Ω—Ç–∞ –¥–ª—è {len(hotel_hids)} –æ—Ç–µ–ª–µ–π...")
content_result = await batch_get_content(client, hotel_hids, LANGUAGE)
content_map = content_result["content"]
print(
    f"[batch_get_content_done] –ó–∞–≥—Ä—É–∂–µ–Ω –∫–æ–Ω—Ç–µ–Ω—Ç –¥–ª—è {content_result['total_loaded']} "
    f"–∏–∑ {content_result['total_requested']} –æ—Ç–µ–ª–µ–π ({content_result['total_batches']} –±–∞—Ç—á–µ–π)"
)

[batch_get_content_start] –ó–∞–≥—Ä—É–∑–∫–∞ –∫–æ–Ω—Ç–µ–Ω—Ç–∞ –¥–ª—è 202 –æ—Ç–µ–ª–µ–π...
[batch_get_content_done] –ó–∞–≥—Ä—É–∂–µ–Ω –∫–æ–Ω—Ç–µ–Ω—Ç –¥–ª—è 202 –∏–∑ 202 –æ—Ç–µ–ª–µ–π (3 –±–∞—Ç—á–µ–π)


In [8]:
content_data = []
for hid, content in content_map.items():
    content_data.append({
        "hid": hid,
        "name": content.get("name", "")[:40],
        "stars": content.get("star_rating", 0),
        "kind": content.get("kind", ""),
        "address": content.get("address", "")[:50],
        "latitude": content.get("latitude"),
        "longitude": content.get("longitude"),
        "check_in": content.get("check_in_time", ""),
        "check_out": content.get("check_out_time", ""),
    })

df_content = pd.DataFrame(content_data)

# Merge with prices from df_hotels
df_full = df_hotels.merge(df_content, on="hid", how="left")
df_full = df_full[["hotel_id", "name", "stars", "kind", "price", "meal", "address"]]
df_full = df_full.sort_values("price", ascending=True).reset_index(drop=True)
df_full.index += 1

df_full

Unnamed: 0,hotel_id,name,stars,kind,price,meal,address
1,vozle_parka_botanicheskiy_sad_flat,–ö–≤–∞—Ä—Ç–∏—Ä–∞ –≤–æ–∑–ª–µ –ø–∞—Ä–∫–∞ –ë–æ—Ç–∞–Ω–∏—á–µ—Å–∫–∏–π —Å–∞–¥,0,Apartment,16671.0,nomeal,"—É–ª–∏—Ü–∞ –°–µ–ª—å—Å–∫–æ—Ö–æ–∑—è–π—Å—Ç–≤–µ–Ω–Ω–∞—è, –¥.17–∫4, –ú–æ—Å–∫–≤–∞"
2,minihotel_ladomir_na_yauze,–û—Ç–µ–ª—å –õ–∞–¥–æ–º–∏—Ä –Ω–∞ –Ø—É–∑–µ,0,Hotel,16729.0,nomeal,"–ë–æ–ª—å—à–æ–π –ú–∞—Ç—Ä–æ—Å—Å–∫–∏–π –ø–µ—Ä–µ—É–ª–æ–∫, –¥.1 –ø–æ–º–µ—â 1/1, –ú–æ..."
3,zvezda_hotel_6,–ì–æ—Ä–æ–¥—Å–∫–æ–π –æ—Ç–µ–ª—å –ó–≤–µ–∑–¥–∞,0,Hotel,17500.0,nomeal,"–≥. –õ—é–±–µ—Ä—Ü—ã —É–ª. 3-–µ –ü–æ—á—Ç–æ–≤–æ–µ –û—Ç–¥–µ–ª–µ–Ω–∏–µ, –¥. 44–ê,..."
4,knokey_tekstilschiki_apartments,–ê–ø–∞—Ä—Ç–∞–º–µ–Ω—Ç—ã Knokey –¢–µ–∫—Å—Ç–∏–ª—å—â–∏–∫–∏,0,Apartment,17687.0,nomeal,"—É–ª–∏—Ü–∞ –ì—Ä–∞–π–≤–æ—Ä–æ–Ω–æ–≤—Å–∫–∞—è, –¥.4, —Å—Ç—Ä–æ–µ–Ω–∏–µ 1, –ú–æ—Å–∫–≤–∞"
5,ladomir_na_zelevom_minihotel,–û—Ç–µ–ª—å –õ–∞–¥–æ–º–∏—Ä –Ω–∞ –ó–µ–ª—å–µ–≤–æ–º,0,Hotel,17908.0,nomeal,"–ü—Ä–µ–æ–±—Ä–∞–∂–µ–Ω—Å–∫–æ–µ, –ø–µ—Ä –ó–µ–ª—å–µ–≤, –¥. 3, –ø–æ–º–µ—â. 3/1, ..."
...,...,...,...,...,...,...,...
198,peter_1,–û—Ç–µ–ª—å –ü—ë—Ç—Ä I,5,Hotel,97500.0,nomeal,"—É–ª–∏—Ü–∞ –ù–µ–≥–ª–∏–Ω–Ω–∞—è, 17, —Å—Ç—Ä. 1, –ú–æ—Å–∫–≤–∞"
199,otel_radisson_slavyanskaya_and_business_centre,–û—Ç–µ–ª—å Radisson Slavyanskaya Hotel & Busi,4,Hotel,97600.0,nomeal,"–ü–ª–æ—â–∞–¥—å –ï–≤—Ä–∞–∑–∏–∏, –¥.2, –ú–æ—Å–∫–≤–∞"
200,doubletree_by_hilton_moscow_vnukovo_airport,DoubleTree by Hilton Moscow ‚Äî Vnukovo Ai,4,Hotel,97750.0,nomeal,"—É–ª–∏—Ü–∞ 2-—è –†–µ–π—Å–æ–≤–∞—è, –¥–æ–º 2, –ú–æ—Å–∫–≤–∞"
201,grand_revival_hotel,–û—Ç–µ–ª—å Revival Hotel,3,Hotel,98647.0,nomeal,"—É–ª–∏—Ü–∞ –ü–µ—Ç—Ä–æ–≤–∫–∞ 19 —Å—Ç—Ä.3, –ú–æ—Å–∫–≤–∞"


In [9]:
from services import batch_get_reviews, filter_reviews, HotelReviewsFiltered

print(f"[batch_get_reviews_start] –ó–∞–≥—Ä—É–∑–∫–∞ –æ—Ç–∑—ã–≤–æ–≤ –¥–ª—è {len(hotel_hids)} –æ—Ç–µ–ª–µ–π...")
raw_reviews = await batch_get_reviews(client, hotel_hids, LANGUAGE)
reviews_map = filter_reviews(raw_reviews)

total_raw = sum(len(revs) for revs in raw_reviews.values())
total_filtered = sum(len(rd["reviews"]) for rd in reviews_map.values())
total_filtered_by_age = sum(rd["filtered_by_age"] for rd in reviews_map.values())
hotels_with_reviews = len(reviews_map)
total_positive = sum(rd["positive_count"] for rd in reviews_map.values())
total_neutral = sum(rd["neutral_count"] for rd in reviews_map.values())
total_negative = sum(rd["negative_count"] for rd in reviews_map.values())

print(
    f"[batch_get_reviews_done] –í—Å–µ–≥–æ {hotels_with_reviews} –æ—Ç–µ–ª–µ–π —Å –æ—Ç–∑—ã–≤–∞–º–∏ –∏–∑ {len(hotel_hids)}"
)
print(
    f"  –û–±—Ä–∞–±–æ—Ç–∞–Ω–æ {total_raw} –æ—Ç–∑—ã–≤–æ–≤ ‚Üí {total_filtered} —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã—Ö "
    f"(–æ—Ç—Å–µ—á–µ–Ω–æ –ø–æ –¥–∞–≤–Ω–æ—Å—Ç–∏: {total_filtered_by_age})"
)
print(
    f"  –°–µ–≥–º–µ–Ω—Ç—ã: {total_positive} –ø–æ–∑–∏—Ç–∏–≤–Ω—ã—Ö, {total_neutral} –Ω–µ–π—Ç—Ä–∞–ª—å–Ω—ã—Ö, {total_negative} –Ω–µ–≥–∞—Ç–∏–≤–Ω—ã—Ö"
)

[batch_get_reviews_start] –ó–∞–≥—Ä—É–∑–∫–∞ –æ—Ç–∑—ã–≤–æ–≤ –¥–ª—è 202 –æ—Ç–µ–ª–µ–π...
[batch_get_reviews_done] –í—Å–µ–≥–æ 202 –æ—Ç–µ–ª–µ–π —Å –æ—Ç–∑—ã–≤–∞–º–∏ –∏–∑ 202
  –û–±—Ä–∞–±–æ—Ç–∞–Ω–æ 37609 –æ—Ç–∑—ã–≤–æ–≤ ‚Üí 9327 —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã—Ö (–æ—Ç—Å–µ—á–µ–Ω–æ –ø–æ –¥–∞–≤–Ω–æ—Å—Ç–∏: 6290)
  –°–µ–≥–º–µ–Ω—Ç—ã: 5273 –ø–æ–∑–∏—Ç–∏–≤–Ω—ã—Ö, 2257 –Ω–µ–π—Ç—Ä–∞–ª—å–Ω—ã—Ö, 1797 –Ω–µ–≥–∞—Ç–∏–≤–Ω—ã—Ö


In [10]:
# Create DataFrame with reviews summary
reviews_data = []
for hid, data in reviews_map.items():
    hotel_id = next((h["id"] for h in hotels if h["hid"] == hid), "")
    reviews_data.append({
        "hotel_id": hotel_id,
        "hid": hid,
        "total": data["total_reviews"],
        "filtered_by_age": data["filtered_by_age"],
        "positive": data["positive_count"],
        "neutral": data["neutral_count"],
        "negative": data["negative_count"],
    })

df_reviews = pd.DataFrame(reviews_data)
df_reviews = df_reviews.sort_values("total", ascending=False).reset_index(drop=True)
df_reviews.index += 1

# Merge with hotel info
df_reviews_full = df_reviews.merge(
    df_content[["hid", "name", "stars"]],
    on="hid",
    how="left"
)
df_reviews_full = df_reviews_full[["hotel_id", "name", "stars", "total", "filtered_by_age", "positive", "neutral", "negative"]]

# Rating thresholds for display
NEUTRAL_THRESHOLD = 7.0
NEGATIVE_THRESHOLD = 5.0


def show_reviews(hotel_id: str, segment: str = "all", limit: int = 5) -> None:
    """
    Show reviews for a hotel.
    
    Args:
        hotel_id: Hotel ID (e.g. 'rosewood_hong_kong')
        segment: 'positive', 'negative', 'neutral', or 'all'
        limit: Number of reviews to show per segment
    """
    hid = next((h["hid"] for h in hotels if h["id"] == hotel_id), None)
    if not hid:
        print(f"Hotel '{hotel_id}' not found")
        return
    
    data = reviews_map.get(hid)
    if not data:
        print(f"No reviews for hotel '{hotel_id}'")
        return
    
    hotel_name = content_map.get(hid, {}).get("name", hotel_id)
    print(f"{'='*60}")
    print(f"{hotel_name}")
    print(f"Total: {data['total_reviews']} | +{data['positive_count']} / ~{data['neutral_count']} / -{data['negative_count']}")
    print(f"{'='*60}\n")
    
    reviews = data["reviews"]
    
    def print_segment(name: str, filter_fn, limit: int):
        segment_reviews = [r for r in reviews if filter_fn(r)][:limit]
        if not segment_reviews:
            return
        print(f"--- {name} ({len(segment_reviews)}) ---")
        for r in segment_reviews:
            rating = r["rating"]
            date = r["created"][:10]
            lang = r.get("_lang", "?")
            plus = r.get("review_plus", "").strip()
            minus = r.get("review_minus", "").strip()
            print(f"\n[{rating}/10] {date} [{lang}]")
            if plus:
                print(f"  + {plus[:300]}")
            if minus:
                print(f"  - {minus[:300]}")
        print()
    
    if segment in ("all", "positive"):
        print_segment("POSITIVE", lambda r: r["rating"] >= NEUTRAL_THRESHOLD, limit)
    if segment in ("all", "neutral"):
        print_segment("NEUTRAL", lambda r: NEGATIVE_THRESHOLD <= r["rating"] < NEUTRAL_THRESHOLD, limit)
    if segment in ("all", "negative"):
        print_segment("NEGATIVE", lambda r: r["rating"] < NEGATIVE_THRESHOLD, limit)


df_reviews_full

Unnamed: 0,hotel_id,name,stars,total,filtered_by_age,positive,neutral,negative
0,katyusha_hotel_3,–û—Ç–µ–ª—å –ö–∞—Ç—é—à–∞,3,1186,89,30,30,23
1,hotel_kurortno_razvlekatelny_kompleks_vnukovo_...,–û—Ç–µ–ª—å –ö—É—Ä–æ—Ä—Ç–Ω–æ - —Ä–∞–∑–≤–ª–µ–∫–∞—Ç–µ–ª—å–Ω—ã–π –∫–æ–º–ø–ª–µ–∫,4,1121,38,30,30,30
2,best_western_vega_hotel,–û—Ç–µ–ª—å –í–µ–≥–∞ –ò–∑–º–∞–π–ª–æ–≤–æ,4,1064,533,30,23,18
3,otel_kholidei_inn_moskva_sokolniki,–û—Ç–µ–ª—å –•–æ–ª–∏–¥–µ–π –ò–Ω–Ω –ú–æ—Å–∫–≤–∞ –°–æ–∫–æ–ª—å–Ω–∏–∫–∏,4,1022,220,30,30,7
4,otel_radisson_slavyanskaya_and_business_centre,–û—Ç–µ–ª—å Radisson Slavyanskaya Hotel & Busi,4,800,125,30,30,22
...,...,...,...,...,...,...,...,...
197,bolshaya_dvukhkomnatnaya_u_metro_dinamo_beloru...,–ö–≤–∞—Ä—Ç–∏—Ä–∞ –ë–æ–ª—å—à–∞—è –¥–≤—É—Ö–∫–æ–º–Ω–∞—Ç–Ω–∞—è —É –º–µ—Ç—Ä–æ –î,0,2,0,2,0,0
198,dlya_6_gostey_na_marshala_tukhachevskogo_flat,–ö–≤–∞—Ä—Ç–∏—Ä–∞ –¥–ª—è 6 –≥–æ—Å—Ç–µ–π –Ω–∞ –ú–∞—Ä—à–∞–ª–∞ –¢—É—Ö–∞—á–µ–≤,0,2,0,2,0,0
199,4komnatnye_na_chistykh_prudakh_apartments,–ê–ø–∞—Ä—Ç–∞–º–µ–Ω—Ç—ã 4-–∫–æ–º–Ω–∞—Ç–Ω—ã–µ –Ω–∞ –ß–∏—Å—Ç—ã—Ö –ø—Ä—É–¥–∞—Ö,0,1,0,1,0,0
200,vmestimostyyu_do_5_gostey_ryadom_s_metro_ot_ts...,–ö–≤–∞—Ä—Ç–∏—Ä–∞ –í–º–µ—Å—Ç–∏–º–æ—Å—Ç—å—é –¥–æ 5 –ì–æ—Å—Ç–µ–π –†—è–¥–æ–º,0,1,0,1,0,0


In [11]:
# Example: view reviews for a specific hotel
# show_reviews("four_seasons_st_petersburg")              # all segments, 5 per segment
# show_reviews("four_seasons_st_petersburg", "negative")  # only negative
# show_reviews("four_seasons_st_petersburg", "all", 10)   # all segments, 10 per segment

show_reviews("four_seasons_st_petersburg", limit=3)

Hotel 'four_seasons_st_petersburg' not found


In [12]:
from services import combine_hotels_data, HotelFull

combined = combine_hotels_data(hotels, content_map, reviews_map)
print(f"Combined {len(combined)} hotels with content and reviews")

Combined 202 hotels with content and reviews


In [13]:
import json

from services import estimate_tokens, prepare_hotel_for_llm, presort_hotels, score_hotels

# Estimate tokens before presort
hotels_for_llm_all = [prepare_hotel_for_llm(h) for h in combined]
tokens_before = estimate_tokens(json.dumps(hotels_for_llm_all, ensure_ascii=False))

# Pre-sort by hotel kind tier and prescore, limit to top 100 for LLM scoring
PRESORT_LIMIT = 100
top_hotels = presort_hotels(combined, reviews_map, limit=PRESORT_LIMIT)

# Estimate tokens after presort
hotels_for_llm_top = [prepare_hotel_for_llm(h) for h in top_hotels]
tokens_after = estimate_tokens(json.dumps(hotels_for_llm_top, ensure_ascii=False))

print(f"[presort_done] {len(combined)} –æ—Ç–µ–ª–µ–π ‚Üí {len(top_hotels)} (–ª–∏–º–∏—Ç {PRESORT_LIMIT})")
print(f"  –¢–æ–∫–µ–Ω—ã: ~{tokens_before:,} ‚Üí ~{tokens_after:,} (—ç–∫–æ–Ω–æ–º–∏—è {tokens_before - tokens_after:,})")

[presort_done] 202 –æ—Ç–µ–ª–µ–π ‚Üí 100 (–ª–∏–º–∏—Ç 100)
  –¢–æ–∫–µ–Ω—ã: ~223,115 ‚Üí ~116,854 (—ç–∫–æ–Ω–æ–º–∏—è 106,261)


In [14]:
import time

from services import score_hotels

# Score hotels using single LLM request
# Returns top 10 scored hotels with summary explaining the selection
print(f"[scoring_start] Scoring {len(top_hotels)} hotels...")
start_time = time.time()

scoring_result = await score_hotels(top_hotels, USER_PREFERENCES)

elapsed = time.time() - start_time

if scoring_result["error"]:
    print(f"\n‚ùå ERROR: {scoring_result['error']}")
    scoring_results = None
else:
    scoring_results = scoring_result["results"]
    print(f"[scoring_done] {len(scoring_results)} hotels scored ‚Äî {elapsed:.1f}s")
    print(f"  Estimated tokens: ~{scoring_result['estimated_tokens']:,}")
    print(f"\nüìã Summary:\n{scoring_result['summary']}")

[scoring_start] Scoring 100 hotels...
[scoring_done] 10 hotels scored ‚Äî 11.3s
  Estimated tokens: ~117,678

üìã Summary:
–î–ª—è –ø–æ–¥–±–æ—Ä–∞ –±—ã–ª–∏ –ø—Ä–æ–∞–Ω–∞–ª–∏–∑–∏—Ä–æ–≤–∞–Ω—ã 100 –æ—Ç–µ–ª–µ–π. –û—Å–Ω–æ–≤–Ω—ã–º –∫—Ä–∏—Ç–µ—Ä–∏–µ–º –±—ã–ª–æ –Ω–∞–ª–∏—á–∏–µ –¥–≤—É—Ö–∫–æ–º–Ω–∞—Ç–Ω—ã—Ö –Ω–æ–º–µ—Ä–æ–≤ —Å –¥–≤—É–º—è –æ—Ç–¥–µ–ª—å–Ω—ã–º–∏ —Å–ø–∞–ª—å–Ω—ã–º–∏ –º–µ—Å—Ç–∞–º–∏ –∏ –≤—ã—Å–æ–∫–∏–µ –æ—Ü–µ–Ω–∫–∏ —á–∏—Å—Ç–æ—Ç—ã. 

–õ–∏–¥–µ—Ä–æ–º —Å—Ç–∞–ª Wellion –í–æ–¥–Ω—ã–π (id: wellion_vodny_hotel, 44 090 —Ä—É–±.), —Ç–∞–∫ –∫–∞–∫ –æ–Ω –ø—Ä–µ–¥–ª–∞–≥–∞–µ—Ç –ø–æ–ª–Ω–æ—Ü–µ–Ω–Ω—ã–µ —Ç—Ä–µ—Ö–∫–æ–º–Ω–∞—Ç–Ω—ã–µ –∞–ø–∞—Ä—Ç–∞–º–µ–Ω—Ç—ã, —á—Ç–æ –≥–∞—Ä–∞–Ω—Ç–∏—Ä—É–µ—Ç –ø—Ä–∏–≤–∞—Ç–Ω–æ—Å—Ç—å –∏ –∫–æ–º—Ñ–æ—Ä—Ç. –û—Ç–µ–ª–∏ Kunlun (id: kunlun, 39 132 —Ä—É–±.) –∏ –ü–æ–≥–æ—Å—Ç–∏.—Ä—É (id: pogosti_na_altufevskom_shosse, 44 230 —Ä—É–±.) —Ç–∞–∫–∂–µ –ø—Ä–µ–¥–ª–∞–≥–∞—é—Ç –æ—Ç–ª–∏—á–Ω—ã–µ –¥–≤—É—Ö–∫–æ–º–Ω–∞—Ç–Ω—ã–µ –≤–∞—Ä–∏–∞–Ω—Ç—ã, –Ω–æ —Ä–∞—Å–ø–æ–ª–æ–∂–µ–Ω—ã –¥–∞–ª—å—à–µ –æ—Ç —Ü–µ–Ω—Ç—Ä–∞.

–¶–µ–Ω–æ–≤–æ–π –¥–∏–∞–ø

In [15]:
from typing import Any


def display_top_hotels(
    results: list[dict[str, Any]],
    hotels_data: list[dict[str, Any]],
    top_n: int = 10,
) -> pd.DataFrame:
    """Display top N scored hotels with details and Ostrovok links."""
    # Build lookup maps
    name_map = {h.get("id", ""): h.get("name", h.get("id", "")) for h in hotels_data}
    hid_map = {h.get("id", ""): h.get("hid", 0) for h in hotels_data}
    
    print(f"\n{'='*80}")
    print(f"TOP {top_n} HOTELS")
    print(f"{'='*80}\n")

    data = []
    for i, hotel in enumerate(results[:top_n], 1):
        hotel_id = hotel.get("hotel_id", "")
        score = hotel.get("score", 0)
        name = name_map.get(hotel_id, hotel_id)
        hid = hid_map.get(hotel_id, 0)
        reasons = hotel.get("top_reasons", [])
        penalties = hotel.get("score_penalties", [])
        
        # Generate Ostrovok URL
        url = ostrovok_url(
            hotel_id=hotel_id,
            hid=hid,
            checkin=CHECKIN_DATE,
            checkout=CHECKOUT_DATE,
            guests=GUESTS,
            region_id=region_id,
        )

        # Print detailed info
        print(f"{i}. {name}")
        print(f"   Score: {score}/100")
        if reasons:
            print(f"   + {'; '.join(reasons[:3])}")
        if penalties:
            print(f"   - {'; '.join(penalties[:5])}")
        print(f"   üîó {url}")
        print()
        
        # Collect for DataFrame
        data.append({
            "name": name[:40],
            "score": score,
            "reasons": "; ".join(reasons[:2])[:80] if reasons else "",
            "url": url,
        })
    
    df = pd.DataFrame(data)
    df.index = range(1, len(df) + 1)
    total_found = len(hotels_data)
    selected = min(top_n, len(results))
    print(f"–í—Å–µ–≥–æ –Ω–∞–π–¥–µ–Ω–æ {total_found} –æ—Ç–µ–ª–µ–π –Ω–∞ —ç—Ç–∏ –¥–∞—Ç—ã. ")
    print(f"–ü–æ–¥–æ–±—Ä–∞–Ω—ã –ª—É—á—à–∏–µ {selected} –ø–æ –≤–∞—à–∏–º –∫—Ä–∏—Ç–µ—Ä–∏—è–º.")
    return df


pd.set_option("display.max_colwidth", 100)
display_top_hotels(scoring_results, combined, top_n=10)


TOP 10 HOTELS

1. –û—Ç–µ–ª—å –í–µ–ª–ª–∏–æ–Ω –í–æ–¥–Ω—ã–π
   Score: 98/100
   + –ù–∞–ª–∏—á–∏–µ –∞–ø–∞—Ä—Ç–∞–º–µ–Ω—Ç–æ–≤ —Å 3 –∫–æ–º–Ω–∞—Ç–∞–º–∏ (–≥–∞—Ä–∞–Ω—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ 2+ —Å–ø–∞–ª—å–Ω–∏); –í—ã—Å–æ–∫–∏–µ –æ—Ü–µ–Ω–∫–∏ –∑–∞ —á–∏—Å—Ç–æ—Ç—É –∏ –≤–∫—É—Å–Ω—ã–µ –∑–∞–≤—Ç—Ä–∞–∫–∏; –£–¥–æ–±–Ω–æ–µ —Ä–∞—Å–ø–æ–ª–æ–∂–µ–Ω–∏–µ —Ä—è–¥–æ–º —Å –º–µ—Ç—Ä–æ
   üîó https://ostrovok.ru/hotel/russia/moscow/mid8854273/wellion_vodny_hotel/?dates=30.01.2026-04.02.2026&guests=4&q=2395

2. –û—Ç–µ–ª—å –ö—É–Ω—å –õ—É–Ω—å
   Score: 96/100
   + –î–≤—É—Ö–∫–æ–º–Ω–∞—Ç–Ω—ã–µ –ª—é–∫—Å—ã —Å –±–æ–ª—å—à–æ–π –ø–ª–æ—â–∞–¥—å—é; –ù–∞–ª–∏—á–∏–µ –¥–≤—É—Å–ø–∞–ª—å–Ω–æ–π –∫—Ä–æ–≤–∞—Ç–∏ –∏ –¥–∏–≤–∞–Ω–∞ (–¥–≤–µ –∫—Ä–æ–≤–∞—Ç–∏); –û—á–µ–Ω—å –ø—Ä–æ—Å—Ç–æ—Ä–Ω—ã–µ –∏ —á–∏—Å—Ç—ã–µ –Ω–æ–º–µ—Ä–∞ –≤ –∫–∏—Ç–∞–π—Å–∫–æ–º —Å—Ç–∏–ª–µ
   - –î–∞–ª–µ–∫–æ –æ—Ç —Ü–µ–Ω—Ç—Ä–∞ (19 –∫–º)
   üîó https://ostrovok.ru/hotel/russia/moscow/mid10013909/kunlun/?dates=30.01.2026-04.02.2026&guests=4&q=2395

3. –ì–æ—Å—Ç–∏–Ω–∏—Ü–∞ –ü–µ—Ä–≤–æ–º–∞–π—Å–∫–∞—è
   Score

Unnamed: 0,name,score,reasons,url
1,–û—Ç–µ–ª—å –í–µ–ª–ª–∏–æ–Ω –í–æ–¥–Ω—ã–π,98,–ù–∞–ª–∏—á–∏–µ –∞–ø–∞—Ä—Ç–∞–º–µ–Ω—Ç–æ–≤ —Å 3 –∫–æ–º–Ω–∞—Ç–∞–º–∏ (–≥–∞—Ä–∞–Ω—Ç–∏—Ä–æ–≤–∞–Ω–Ω—ã–µ 2+ —Å–ø–∞–ª—å–Ω–∏); –í—ã—Å–æ–∫–∏–µ –æ—Ü–µ–Ω–∫–∏,https://ostrovok.ru/hotel/russia/moscow/mid8854273/wellion_vodny_hotel/?dates=30.01.2026-04.02.2...
2,–û—Ç–µ–ª—å –ö—É–Ω—å –õ—É–Ω—å,96,–î–≤—É—Ö–∫–æ–º–Ω–∞—Ç–Ω—ã–µ –ª—é–∫—Å—ã —Å –±–æ–ª—å—à–æ–π –ø–ª–æ—â–∞–¥—å—é; –ù–∞–ª–∏—á–∏–µ –¥–≤—É—Å–ø–∞–ª—å–Ω–æ–π –∫—Ä–æ–≤–∞—Ç–∏ –∏ –¥–∏–≤–∞–Ω–∞ (–¥–≤,https://ostrovok.ru/hotel/russia/moscow/mid10013909/kunlun/?dates=30.01.2026-04.02.2026&guests=4...
3,–ì–æ—Å—Ç–∏–Ω–∏—Ü–∞ –ü–µ—Ä–≤–æ–º–∞–π—Å–∫–∞—è,95,–°–ø–µ—Ü–∏–∞–ª—å–Ω—ã–π —Å–µ–º–µ–π–Ω—ã–π –Ω–æ–º–µ—Ä —Å 2 –∫–æ–º–Ω–∞—Ç–∞–º–∏; –ó–∞–≤—Ç—Ä–∞–∫ –≤–∫–ª—é—á–µ–Ω –≤ —Å—Ç–æ–∏–º–æ—Å—Ç—å,https://ostrovok.ru/hotel/russia/moscow/mid7668742/gostinitsa_pervomaiskaia_2/?dates=30.01.2026-...
4,–û—Ç–µ–ª—å –ü–æ–≥–æ—Å—Ç–∏.—Ä—É –Ω–∞ –ê–ª—Ç—É—Ñ—å–µ–≤—Å–∫–æ–º –®–æ—Å—Å–µ,94,–î–≤—É—Ö–∫–æ–º–Ω–∞—Ç–Ω—ã–µ –ª—é–∫—Å—ã –≤ –Ω–∞–ª–∏—á–∏–∏; –ò—Å–∫–ª—é—á–∏—Ç–µ–ª—å–Ω–æ –≤—ã—Å–æ–∫–∏–µ —Ä–µ–π—Ç–∏–Ω–≥–∏ (–º–Ω–æ–≥–æ 10/10),https://ostrovok.ru/hotel/russia/moscow/mid8015493/pogosti_na_altufevskom_shosse/?dates=30.01.20...
5,–û—Ç–µ–ª—å Riverside,92,–õ—é–∫—Å —Å 2 –∫–æ–º–Ω–∞—Ç–∞–º–∏ –≤ –ø—Ä–µ—Å—Ç–∏–∂–Ω–æ–º —Ä–∞–π–æ–Ω–µ; –ì–∞—Ä–∞–Ω—Ç–∏—Ä–æ–≤–∞–Ω–Ω–∞—è —á–∏—Å—Ç–æ—Ç–∞ –∏ –≤—ã—Å–æ–∫–∏–π —É—Ä–æ–≤–µ–Ω,https://ostrovok.ru/hotel/russia/moscow/mid8737742/riverside_hotel_40/?dates=30.01.2026-04.02.20...
6,–û—Ç–µ–ª—å –ê—Å—Ç—Ä–æ –ü–ª–∞–∑–∞,90,–î–≤—É—Ö–∫–æ–º–Ω–∞—Ç–Ω—ã–µ –ª—é–∫—Å—ã —Å –ø–∞–Ω–æ—Ä–∞–º–Ω—ã–º–∏ –æ–∫–Ω–∞–º–∏; –í—ã—Å–æ–∫–∏–π —Ä–µ–π—Ç–∏–Ω–≥ –∏ –æ—Ç–ª–∏—á–Ω—ã–µ –æ—Ç–∑—ã–≤—ã –æ —á–∏,https://ostrovok.ru/hotel/russia/moscow/mid9091582/astro_plaza_hotel/?dates=30.01.2026-04.02.202...
7,–û—Ç–µ–ª—å Edge Seligerskaya Moscow,88,–°–µ–º–µ–π–Ω—ã–µ –ª—é–∫—Å—ã —Å 2 –∫–æ–º–Ω–∞—Ç–∞–º–∏; –ù–∞–ª–∏—á–∏–µ –±–∞—Å—Å–µ–π–Ω–∞ –∏ —Å–∞—É–Ω—ã,https://ostrovok.ru/hotel/russia/moscow/mid7600189/iris_congress_hotel/?dates=30.01.2026-04.02.2...
8,–ì–æ—Å—Ç–∏–Ω–∏—Ü–∞ –í–æ—Å—Ç–æ–∫,87,–°–µ–º–µ–π–Ω—ã–µ –Ω–æ–º–µ—Ä–∞ –∏–∑ 2 –∫–æ–º–Ω–∞—Ç –ø–æ –¥–æ—Å—Ç—É–ø–Ω–æ–π —Ü–µ–Ω–µ; –ó–∞–≤—Ç—Ä–∞–∫–∏ –≤–∫–ª—é—á–µ–Ω—ã,https://ostrovok.ru/hotel/russia/moscow/mid10620117/vostok_gostinichnaya_9_hotel/?dates=30.01.20...
9,–ì–æ—Ä–æ–¥—Å–∫–æ–π –æ—Ç–µ–ª—å –î–æ–º –ö—É–ø—Ü–∞ –ë–∞–≤—ã–∫–∏–Ω–∞,85,–°–µ–º–µ–π–Ω—ã–π –Ω–æ–º–µ—Ä –∏–∑ 2 –∫–æ–º–Ω–∞—Ç –≤ –∏—Å—Ç–æ—Ä–∏—á–µ—Å–∫–æ–º –∑–¥–∞–Ω–∏–∏; –ü—Ä–æ–µ–∫—Ü–∏–æ–Ω–Ω—ã–π –∫–∏–Ω–æ—Ç–µ–∞—Ç—Ä –≤ –Ω–æ–º–µ—Ä,https://ostrovok.ru/hotel/russia/moscow/mid11347687/dom_kuptsa_bavykina_mini_hotel/?dates=30.01....
10,–û—Ç–µ–ª—å Norke Prime –ë–∞—É–º–∞–Ω—Å–∫–∞—è,82,–ß–µ—Ç—ã—Ä–µ—Ö–º–µ—Å—Ç–Ω—ã–µ –ø–æ–ª—É–ª—é–∫—Å—ã (–≤–∏–∑—É–∞–ª—å–Ω–æ —Ä–∞–∑–¥–µ–ª–µ–Ω–Ω—ã–µ –∑–æ–Ω—ã); –°–æ–≤—Ä–µ–º–µ–Ω–Ω—ã–π –¥–∏–∑–∞–π–Ω –∏ —á–∏—Å—Ç,https://ostrovok.ru/hotel/russia/moscow/mid11161354/norke_prime_baumanskaya_hotel/?dates=30.01.2...
