In [None]:
import os

from dotenv import load_dotenv
from etg import GuestRoom

load_dotenv()

# ETG API Credentials
ETG_KEY_ID = os.environ["ETG_KEY_ID"]
ETG_API_KEY = os.environ["ETG_API_KEY"]
ETG_REQUEST_TIMEOUT = 30.0

SCORING_MODEL = os.environ["SCORING_MODEL"]

# Search Parameters
CITY = "–ú–æ—Å–∫–≤–∞"

CHECKIN_DATE = "2026-02-02"
CHECKOUT_DATE = "2026-02-04"

CURRENCY = "RUB"
LANGUAGE = "ru"
RESIDENCY = "RU"

GUESTS: list[GuestRoom] = [{"adults": 2, "children": [4, 2]}]
LIMIT = 1000

# User preferences for AI
USER_PREFERENCES = "–û–±—è–∑–∞—Ç–µ–ª—å–Ω–æ –¥–≤–µ –∫–æ–º–Ω–∞—Ç—ã –∏ –¥–≤–µ –∫—Ä–æ–≤–∞—Ç–∏. –•–æ—Ä–æ—à–∏–µ –æ—Ç–∑—ã–≤—ã. –ß–∏—Å—Ç–æ—Ç–∞"

# Filters
MIN_PRICE: float | None = 3000.0  # None = no minimum
MAX_PRICE: float | None = 20000.0  # None = no maximum

# Save results to JSON
SAVE_RESULTS = True  # Set to False to disable JSON export

In [2]:
from etg import ETGClient
from utils import ostrovok_url

client = ETGClient(ETG_KEY_ID, ETG_API_KEY, timeout=ETG_REQUEST_TIMEOUT)

In [3]:
async def find_region_id(client: ETGClient, city_name: str, language: str) -> int | None:
    """Find region ID for a city name."""
    print(f"Looking up region ID for '{city_name}'...")
    regions = await client.suggest_region(city_name, language)

    if not regions:
        print(f"  No regions found for '{city_name}'")
        return None

    # Only accept City type
    for region in regions:
        if region["type"] == "City":
            region_id = region["id"]
            print(f"  Found: {region['name']} ({region.get('country_code', '')}), region_id={region_id}")
            return region_id

    # No city found - show available options
    print(f"  No city found. Available regions:")
    for region in regions[:5]:
        print(f"    - {region['name']} (type: {region['type']}, id: {region['id']})")

    return None

In [4]:
# Find region by city name.
# ETG API requires region_id for hotel search, so we first
# lookup the region ID via suggest_region by city name.
region_id = await find_region_id(client, CITY, LANGUAGE)
if not region_id:
    raise ValueError(f"Could not find region for '{CITY}'")

print(f"\nSearching hotels in {CITY}...")
print(f"  Dates: {CHECKIN_DATE} to {CHECKOUT_DATE}")
print(f"  Currency: {CURRENCY}, Limit: {LIMIT}")

Looking up region ID for '–ú–æ—Å–∫–≤–∞'...
  Found: –ú–æ—Å–∫–≤–∞ (RU), region_id=2395

Searching hotels in –ú–æ—Å–∫–≤–∞...
  Dates: 2026-02-02 to 2026-02-04
  Currency: RUB, Limit: 1000


In [5]:
import pandas as pd

from services import filter_hotels_by_price

# Search available hotels in the region with given parameters.
# Filters by price range if MIN_PRICE/MAX_PRICE are set.
# Returns short hotel info: id, hid, and rates (room name, price, meal).
# Full content (name, address, amenities) is fetched separately via hotel content API.
# Search available hotels in the region with given parameters.
# Filters by price range if MIN_PRICE/MAX_PRICE are set.
# Returns short hotel info: id, hid, and rates (room name, price, meal).
# Full content (name, address, amenities) is fetched separately via hotel content API.
search_results = await client.search_hotels_by_region(
    region_id=region_id,
    checkin=CHECKIN_DATE,
    checkout=CHECKOUT_DATE,
    residency=RESIDENCY,
    guests=GUESTS,
    currency=CURRENCY,
    language=LANGUAGE,
    hotels_limit=LIMIT,
)

all_hotels = search_results.get("hotels", [])
total_available = search_results.get("total_hotels", len(all_hotels))

# Filter by price
hotels = filter_hotels_by_price(all_hotels, MIN_PRICE, MAX_PRICE)
total_after_filter = len(hotels)

In [6]:
from services import batch_get_content

hotel_ids = [h["hid"] for h in hotels]

print(f"[batch_get_content_start] –ó–∞–≥—Ä—É–∑–∫–∞ –∫–æ–Ω—Ç–µ–Ω—Ç–∞ –¥–ª—è {len(hotel_ids)} –æ—Ç–µ–ª–µ–π...")
content_map = await batch_get_content(client, hotel_ids, LANGUAGE)
print(f"[batch_get_content_done] –ó–∞–≥—Ä—É–∂–µ–Ω –∫–æ–Ω—Ç–µ–Ω—Ç –¥–ª—è {len(content_map)} –∏–∑ {len(hotel_ids)} –æ—Ç–µ–ª–µ–π")

[batch_get_content_done] –ó–∞–≥—Ä—É–∂–µ–Ω –∫–æ–Ω—Ç–µ–Ω—Ç –¥–ª—è 234 –∏–∑ 234 –æ—Ç–µ–ª–µ–π


In [7]:
from services import batch_get_reviews, filter_reviews

print(f"[batch_get_reviews_start] –ó–∞–≥—Ä—É–∑–∫–∞ –æ—Ç–∑—ã–≤–æ–≤ –¥–ª—è {len(hotel_ids)} –æ—Ç–µ–ª–µ–π...")
raw_reviews_payload = await batch_get_reviews(client, hotel_ids, LANGUAGE)
reviews_map = filter_reviews(raw_reviews_payload)

total_raw = sum(rd["total_reviews"] for rd in raw_reviews_payload.values())
total_filtered = sum(len(rd["reviews"]) for rd in reviews_map.values())
# Compute filtered_by_age: how many reviews were filtered out
total_raw_in_filtered = sum(rd["total_reviews"] for rd in reviews_map.values())
total_filtered_by_age = total_raw_in_filtered - total_filtered
hotels_with_reviews = len(reviews_map)

# Calculate average rating across all hotels
all_avg_ratings = [rd["avg_rating"] for rd in reviews_map.values() if rd["avg_rating"] is not None]
overall_avg = sum(all_avg_ratings) / len(all_avg_ratings) if all_avg_ratings else 0

print(
    f"[batch_get_reviews_done] –í—Å–µ–≥–æ {hotels_with_reviews} –æ—Ç–µ–ª–µ–π —Å –æ—Ç–∑—ã–≤–∞–º–∏ –∏–∑ {len(hotel_ids)}"
)
print(
    f"  –û–±—Ä–∞–±–æ—Ç–∞–Ω–æ {total_raw} –æ—Ç–∑—ã–≤–æ–≤ ‚Üí {total_filtered} —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã—Ö "
    f"(–æ—Ç—Å–µ—á–µ–Ω–æ –ø–æ –¥–∞–≤–Ω–æ—Å—Ç–∏: {total_filtered_by_age})"
)
print(f"  –°—Ä–µ–¥–Ω–∏–π —Ä–µ–π—Ç–∏–Ω–≥: {overall_avg:.1f}/10")

[batch_get_reviews_done] –í—Å–µ–≥–æ 232 –æ—Ç–µ–ª–µ–π —Å –æ—Ç–∑—ã–≤–∞–º–∏ –∏–∑ 234
  –û–±—Ä–∞–±–æ—Ç–∞–Ω–æ 39727 –æ—Ç–∑—ã–≤–æ–≤ ‚Üí 30589 —Ä–µ–ª–µ–≤–∞–Ω—Ç–Ω—ã—Ö (–æ—Ç—Å–µ—á–µ–Ω–æ –ø–æ –¥–∞–≤–Ω–æ—Å—Ç–∏: 9138)
  –°—Ä–µ–¥–Ω–∏–π —Ä–µ–π—Ç–∏–Ω–≥: 8.5/10


In [8]:
from services import combine_hotels_data

combined = combine_hotels_data(hotels, content_map, reviews_map)
print(f"Combined {len(combined)} hotels with content and reviews")

Combined 234 hotels with content and reviews


In [None]:
import json
from pathlib import Path

if SAVE_RESULTS:
    # Create .artifacts directory if it doesn't exist
    artifacts_dir = Path(".artifacts")
    artifacts_dir.mkdir(exist_ok=True)
    
    # Format filename: City_CheckinDate_CheckoutDate.json
    filename = f"{CITY}_{CHECKIN_DATE}_{CHECKOUT_DATE}.json"
    filepath = artifacts_dir / filename
    
    # Prepare export data with search metadata at top level
    export_data = {
        "search": {
            "city": CITY,
            "checkin": CHECKIN_DATE,
            "checkout": CHECKOUT_DATE,
            "guests": GUESTS,
            "min_price": MIN_PRICE,
            "max_price": MAX_PRICE,
            "currency": CURRENCY,
            "language": LANGUAGE,
            "residency": RESIDENCY,
        },
        "hotels": combined,  # HotelFull[] - already has all data
        "stats": {
            "total_hotels": len(combined),
            "total_available": total_available,
            "total_after_filter": len(hotels),
        }
    }
    
    # Save to JSON
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(export_data, f, ensure_ascii=False, indent=2)
    
    # Format guests for display
    guests_str = "; ".join([
        f"{g['adults']} adults" + (f" + {len(g.get('children', []))} children (ages: {', '.join(map(str, g.get('children', [])))})" if g.get('children') else "")
        for g in GUESTS
    ])
    
    print(f"‚úÖ Saved {len(combined)} hotels to {filepath}")
    print(f"   Search: {CITY}, {CHECKIN_DATE} ‚Üí {CHECKOUT_DATE}")
    print(f"   Guests: {guests_str}")
    print(f"   Price: {MIN_PRICE} - {MAX_PRICE} {CURRENCY}")
else:
    print("‚è≠Ô∏è  JSON export disabled (SAVE_RESULTS = False)")

In [9]:
import json

from services import estimate_tokens, prepare_hotel_for_llm, presort_hotels

# Parameters for review sampling
MAX_REVIEWS_PER_HOTEL = 30
REVIEW_TEXT_MAX_LENGTH = 512

# Estimate tokens before presort
hotels_for_llm_all = [
    prepare_hotel_for_llm(h, MIN_PRICE, MAX_PRICE, MAX_REVIEWS_PER_HOTEL, REVIEW_TEXT_MAX_LENGTH)
    for h in combined
]
tokens_before = estimate_tokens(json.dumps(hotels_for_llm_all, ensure_ascii=False), SCORING_MODEL)

# Pre-sort by hotel kind tier and prescore, limit to top 100 for LLM scoring
PRESORT_LIMIT = 120
top_hotels = presort_hotels(combined, reviews_map, limit=PRESORT_LIMIT)

# Estimate tokens after presort
hotels_for_llm_top = [
    prepare_hotel_for_llm(h, MIN_PRICE, MAX_PRICE, MAX_REVIEWS_PER_HOTEL, REVIEW_TEXT_MAX_LENGTH)
    for h in top_hotels
]
tokens_after = estimate_tokens(json.dumps(hotels_for_llm_top, ensure_ascii=False), SCORING_MODEL)

print(f"[presort_done] {len(combined)} –æ—Ç–µ–ª–µ–π ‚Üí {len(top_hotels)} (–ª–∏–º–∏—Ç {PRESORT_LIMIT})")
print(f"  –¢–æ–∫–µ–Ω—ã: ~{tokens_before:,} ‚Üí ~{tokens_after:,} (—ç–∫–æ–Ω–æ–º–∏—è {tokens_before - tokens_after:,})")

[presort_done] 234 –æ—Ç–µ–ª–µ–π ‚Üí 120 (–ª–∏–º–∏—Ç 120)
  –¢–æ–∫–µ–Ω—ã: ~618,973 ‚Üí ~362,593 (—ç–∫–æ–Ω–æ–º–∏—è 256,380)


In [14]:
import time

from services import finalize_scored_hotels, score_hotels

# Score hotels using single LLM request
# Returns top N scored hotels with summary explaining the selection
print(f"[scoring_start] Scoring {len(top_hotels)} hotels...")
start_time = time.time()

scoring_result = await score_hotels(
    top_hotels,
    USER_PREFERENCES,
    guests=GUESTS,
    max_reviews=MAX_REVIEWS_PER_HOTEL,
    review_text_max_length=REVIEW_TEXT_MAX_LENGTH,
    min_price=MIN_PRICE,
    max_price=MAX_PRICE,
    currency=CURRENCY,
    top_count=10
)

elapsed = time.time() - start_time

if scoring_result["error"]:
    print(f"\n‚ùå ERROR: {scoring_result['error']}")
    scored_hotels = None
else:
    scoring_results = scoring_result["results"]
    print(f"[scoring_done] {len(scoring_results)} hotels scored ‚Äî {elapsed:.1f}s")
    print(f"  Estimated tokens: ~{scoring_result['estimated_tokens']:,}")
    print(f"\nüìã Summary:\n{scoring_result['summary']}")
    
    # Finalize scored hotels - merge scoring results with full hotel data
    scored_hotels = finalize_scored_hotels(combined, scoring_results)
    print(f"\n[finalize_done] {len(scored_hotels)} hotels with complete data")

[scoring_done] 30 hotels scored ‚Äî 55.2s
  Estimated tokens: ~364,504

üìã Summary:
The Moscow hotel market for family-sized accommodations offers a diverse range of options, from high-end 5-star suites to practical 3-star apart-hotels. Pricing for properties that can comfortably house two adults and two children typically falls between 7,000 and 19,000 RUB per night, with central locations commanding a significant premium. The analyzed set highlights a strong preference for multi-room configurations and high cleanliness standards, which are critical for families with young children. For instance, the Moscow Marriott Hotel Novy Arbat (moscow_marriott_hotel_novy_arbat, avg_rating: 9.2) was excluded from the top picks because its available rates only offered single-room layouts, failing the user's strict requirement for two separate rooms. Final recommendations prioritize properties that guarantee the requested space and maintain superior hygiene ratings to ensure a comfortable stay.



In [16]:
from services import HotelScored


def display_top_hotels(
    scored_hotels: list[HotelScored],
    top_n: int = 10,
) -> pd.DataFrame:
    """Display top N scored hotels with details and Ostrovok links."""
    print(f"\n{'='*80}")
    print(f"TOP {top_n} HOTELS")
    print(f"{'='*80}\n")

    data = []
    for i, hotel in enumerate(scored_hotels[:top_n], 1):
        hotel_id = hotel["id"]
        name = hotel["name"]
        hid = hotel["hid"]
        kind = hotel.get("kind", "")
        score = hotel["score"]
        reasons = hotel.get("top_reasons", [])
        penalties = hotel.get("score_penalties", [])
        selected_hash = hotel.get("selected_rate_hash")
        
        # Find selected rate by hash
        rates = hotel.get("rates", [])
        selected_rate = next((r for r in rates if r.get("match_hash") == selected_hash), None)
        
        # Get rate details
        if selected_rate:
            room_name = selected_rate.get("room_name", "")[:50]
            meal_data = selected_rate.get("meal_data", {})
            meal = meal_data.get("value", selected_rate.get("meal", ""))
            
            # Calculate prices from daily_prices
            daily_prices = selected_rate.get("daily_prices", [])
            if daily_prices:
                # Convert string prices to float and sum
                total_price = sum(float(p) for p in daily_prices)
                num_nights = len(daily_prices)
                avg_price_per_night = total_price / num_nights if num_nights > 0 else 0
                
                # Get currency
                pt = selected_rate.get("payment_options", {}).get("payment_types", [])
                currency = pt[0].get("show_currency_code", "") if pt else ""
                
                total_price_str = f"{total_price:.0f} {currency}"
                avg_price_str = f"{avg_price_per_night:.0f} {currency}"
            else:
                # Fallback to payment_types if daily_prices not available
                pt = selected_rate.get("payment_options", {}).get("payment_types", [])
                if pt:
                    total_price = float(pt[0].get("show_amount", 0))
                    currency = pt[0].get("show_currency_code", "")
                    total_price_str = f"{total_price:.0f} {currency}"
                    avg_price_str = f"{total_price:.0f} {currency}"
                else:
                    total_price_str = "N/A"
                    avg_price_str = "N/A"
        else:
            room_name = "N/A"
            meal = "N/A"
            total_price_str = "N/A"
            avg_price_str = "N/A"
        
        # Get reviews data
        reviews = hotel.get("reviews")
        avg_rating = reviews.get("avg_rating") if reviews else None
        detailed = reviews.get("detailed_averages", {}) if reviews else {}
        
        # Generate Ostrovok URL
        url = ostrovok_url(
            hotel_id=hotel_id,
            hid=hid,
            checkin=CHECKIN_DATE,
            checkout=CHECKOUT_DATE,
            guests=GUESTS,
            region_id=region_id,
        )

        # Print detailed info
        print(f"{i}. {name} [{kind}]")
        print(f"   Score: {score}/100 | Rating: {avg_rating}/10" if avg_rating else f"   Score: {score}/100")
        print(f"   Room: {room_name}")
        print(f"   Total: {total_price_str} | Avg per night: {avg_price_str} | Meal: {meal}")
        if reasons:
            print(f"   + {'; '.join(reasons[:3])}")
        if penalties:
            print(f"   - {'; '.join(penalties[:5])}")
        print(f"   üîó {url}")
        print()
        
        # Collect for DataFrame
        data.append({
            "name": name[:35],
            "kind": kind,
            "room": room_name[:30],
            "total": total_price_str,
            "avg/night": avg_price_str,
            "meal": meal,
            "score": score,
            "rating": avg_rating,
            "clean": detailed.get("cleanness"),
            "url": url,
        })
    
    df = pd.DataFrame(data)
    df.index = range(1, len(df) + 1)
    selected = min(top_n, len(scored_hotels))
    print(f"–í—Å–µ–≥–æ –Ω–∞–π–¥–µ–Ω–æ {len(combined)} –æ—Ç–µ–ª–µ–π –Ω–∞ —ç—Ç–∏ –¥–∞—Ç—ã.")
    print(f"–ü–æ–¥–æ–±—Ä–∞–Ω—ã –ª—É—á—à–∏–µ {selected} –ø–æ –≤–∞—à–∏–º –∫—Ä–∏—Ç–µ—Ä–∏—è–º.")
    return df


pd.set_option("display.max_colwidth", 100)
display_top_hotels(scored_hotels, top_n=10)

Unnamed: 0,name,kind,room,total,avg/night,meal,score,rating,clean,url
1,–ë—É—Ç–∏–∫-–æ—Ç–µ–ª—å –ö–∞–∑–Ω–∞—á–µ–π—Å–∫–∏–π,Boutique_and_Design,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –ª—é–∫—Å –ú–∞–Ω—Å–∞—Ä–¥–∞ —Å 2,35660 RUB,17830 RUB,nomeal,98,9.3,9.6,https://ostrovok.ru/hotel/russia/moscow/mid11323757/kaznacheyskiy_boutique_hotel/?dates=02.02.20...
2,–û—Ç–µ–ª—å Hampton by Hilton Moscow –†–æ–≥–æ,Hotel,–ù–æ–º–µ—Ä —Å –¥–∏–≤–∞–Ω–æ–º –°–µ–º–µ–π–Ω—ã–π —Å –∫—Ä–æ,38728 RUB,19364 RUB,breakfast,97,9.3,9.5,https://ostrovok.ru/hotel/russia/moscow/mid9770069/hampton_by_hilton_moscow_rogozhsky_val/?dates...
3,–ì–æ—Ä–æ–¥—Å–∫–æ–π –æ—Ç–µ–ª—å –î–æ–º –ö—É–ø—Ü–∞ –ë–∞–≤—ã–∫–∏–Ω–∞,Hotel,–ß–µ—Ç—ã—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–π –Ω–æ–º–µ—Ä —Å –ø—Ä–æ–µ–∫—Ü–∏,17967 RUB,8984 RUB,nomeal,96,9.6,9.8,https://ostrovok.ru/hotel/russia/moscow/mid11347687/dom_kuptsa_bavykina_mini_hotel/?dates=02.02....
4,–ê–ø–∞—Ä—Ç-–æ—Ç–µ–ª—å Idera,Hotel,–ù–æ–º–µ—Ä —Å–µ–º–µ–π–Ω—ã–π –î–µ–ª—é–∫—Å (–ø–∏—Ç–∞–Ω–∏–µ,15025 RUB,7512 RUB,nomeal,95,9.3,9.6,https://ostrovok.ru/hotel/russia/moscow/mid13106596/idera_hotel/?dates=02.02.2026-04.02.2026&gue...
5,–û—Ç–µ–ª—å –°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å –ì—Ä–∞–Ω–¥ –ö–ª–∞—Å—Å–∏–∫,Hotel,–ß–µ—Ç—ã—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–µ –∞–ø–∞—Ä—Ç–∞–º–µ–Ω—Ç—ã (–ø–∏,27086 RUB,13543 RUB,nomeal,94,9.2,9.3,https://ostrovok.ru/hotel/russia/moscow/mid7625812/sevastopol_hotel_bld2/?dates=02.02.2026-04.02...
6,–ì–æ—Å—Ç–∏–Ω–∏—Ü–∞ –ê–ø–∞—Ä—Ç-–æ—Ç–µ–ª—å Raido.Moscow,Hotel,–ß–µ—Ç—ã—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–π –ª—é–∫—Å Premier (–∫,25740 RUB,12870 RUB,nomeal,93,9.4,9.4,https://ostrovok.ru/hotel/russia/moscow/mid13099355/moscow_history_apartments/?dates=02.02.2026-...
7,–û—Ç–µ–ª—å Mamaison All-Suites Spa Pokro,Hotel,Suite Deluxe 1 Bedroom (–ø–∏—Ç–∞–Ω–∏,28000 RUB,14000 RUB,nomeal,92,8.8,8.9,https://ostrovok.ru/hotel/russia/moscow/mid7590428/mamaison_allsuites_spa_hotel_pokrovka/?dates=...
8,–û—Ç–µ–ª—å –§–∞—Ä—Ñ–∞–ª–ª–µ,Hotel,–õ—é–∫—Å —Å 2 –∫–æ–º–Ω–∞—Ç–∞–º–∏ —Å –∫—Ä–∞—Å–∏–≤—ã–º,25000 RUB,12500 RUB,breakfast,91,9.2,9.4,https://ostrovok.ru/hotel/russia/moscow/mid8744883/fafralle_minihotel/?dates=02.02.2026-04.02.20...
9,–û—Ç–µ–ª—å –õ–µ—Å–Ω–∞—è –°–∞—Ñ–º–∞—Ä (–±—ã–≤—à–∏–π –•–æ–ª–∏–¥–µ–π,Hotel,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –ª—é–∫—Å —Å –±–æ–ª—å—à–æ–π –¥–≤—É,37300 RUB,18650 RUB,breakfast,90,9.0,9.1,https://ostrovok.ru/hotel/russia/moscow/mid7467380/kholidei_inn_moskva_lesnaia/?dates=02.02.2026...
10,–û—Ç–µ–ª—å –ú–æ—Å–∫–≤–∞ –ö—Ä–∞—Å–Ω–æ—Å–µ–ª—å—Å–∫–∞—è (ex. Hi,Hotel,–ß–µ—Ç—ã—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–π –Ω–æ–º–µ—Ä —Å–º–µ–∂–Ω—ã–π –î,32600 RUB,16300 RUB,nomeal,89,8.9,9.0,https://ostrovok.ru/hotel/russia/moscow/mid9751449/hilton_garden_inn_moscow_krasnoselskaya_hotel...
