In [10]:
import json
import os
from pathlib import Path

from dotenv import load_dotenv

load_dotenv()

SCORING_MODEL = os.environ["SCORING_MODEL"]

# Specify JSON file to load (from .artifacts directory)
# Format: City_CheckinDate_CheckoutDate.json
ARTIFACT_FILE = "–ú–æ—Å–∫–≤–∞_2026-02-02_2026-02-04.json"  # Change this to your file

# Load data from JSON
artifacts_dir = Path(".artifacts")
filepath = artifacts_dir / ARTIFACT_FILE

if not filepath.exists():
    raise FileNotFoundError(f"File not found: {filepath}")

with open(filepath, "r", encoding="utf-8") as f:
    data = json.load(f)

# Extract search parameters
search = data["search"]
CITY = search["city"]
REGION_ID = search["region_id"]
CHECKIN_DATE = search["checkin"]
CHECKOUT_DATE = search["checkout"]
GUESTS = search["guests"]
MIN_PRICE = search["min_price"]
MAX_PRICE = search["max_price"]
CURRENCY = search["currency"]
LANGUAGE = search["language"]
RESIDENCY = search["residency"]

# Extract hotels data
combined = data["hotels"]  # HotelFull[]
stats = data["stats"]

# Reconstruct reviews_map from combined data
reviews_map = {}
for hotel in combined:
    hid = hotel.get("hid")
    reviews = hotel.get("reviews")
    if hid and reviews:
        reviews_map[hid] = reviews

print(f"‚úÖ Loaded data from {ARTIFACT_FILE}")
print(f"   Search: {CITY}, {CHECKIN_DATE} ‚Üí {CHECKOUT_DATE}")
print(f"   Hotels: {len(combined)}")
print(f"   Stats: {stats}")

# User preferences for scoring (can be modified)
USER_PREFERENCES = "–û–±—è–∑–∞—Ç–µ–ª—å–Ω–æ –¥–≤–µ –∫–æ–º–Ω–∞—Ç—ã –∏ –¥–≤–µ –∫—Ä–æ–≤–∞—Ç–∏. –•–æ—Ä–æ—à–∏–µ –æ—Ç–∑—ã–≤—ã. –ß–∏—Å—Ç–æ—Ç–∞"

‚úÖ Loaded data from –ú–æ—Å–∫–≤–∞_2026-02-02_2026-02-04.json
   Search: –ú–æ—Å–∫–≤–∞, 2026-02-02 ‚Üí 2026-02-04
   Hotels: 234
   Stats: {'total_hotels': 234, 'total_available': 258, 'total_after_filter': 234}


In [11]:
import json

from services import estimate_tokens, prepare_hotel_for_llm, presort_hotels

# Parameters for review sampling
MAX_REVIEWS_PER_HOTEL = 30
REVIEW_TEXT_MAX_LENGTH = 512

# Estimate tokens before presort
hotels_for_llm_all = [
    prepare_hotel_for_llm(h, MIN_PRICE, MAX_PRICE, MAX_REVIEWS_PER_HOTEL, REVIEW_TEXT_MAX_LENGTH)
    for h in combined
]
tokens_before = estimate_tokens(json.dumps(hotels_for_llm_all, ensure_ascii=False), SCORING_MODEL)

# Pre-sort by hotel kind tier and prescore, limit to top 100 for LLM scoring
PRESORT_LIMIT = 120
top_hotels = presort_hotels(combined, reviews_map, limit=PRESORT_LIMIT)

# Estimate tokens after presort
hotels_for_llm_top = [
    prepare_hotel_for_llm(h, MIN_PRICE, MAX_PRICE, MAX_REVIEWS_PER_HOTEL, REVIEW_TEXT_MAX_LENGTH)
    for h in top_hotels
]
tokens_after = estimate_tokens(json.dumps(hotels_for_llm_top, ensure_ascii=False), SCORING_MODEL)

print(f"[presort_done] {len(combined)} –æ—Ç–µ–ª–µ–π ‚Üí {len(top_hotels)} (–ª–∏–º–∏—Ç {PRESORT_LIMIT})")
print(f"  –¢–æ–∫–µ–Ω—ã: ~{tokens_before:,} ‚Üí ~{tokens_after:,} (—ç–∫–æ–Ω–æ–º–∏—è {tokens_before - tokens_after:,})")

[presort_done] 234 –æ—Ç–µ–ª–µ–π ‚Üí 120 (–ª–∏–º–∏—Ç 120)
  –¢–æ–∫–µ–Ω—ã: ~618,973 ‚Üí ~362,593 (—ç–∫–æ–Ω–æ–º–∏—è 256,380)


In [14]:
import time

from services import finalize_scored_hotels, score_hotels

# Score hotels using single LLM request
# Returns top N scored hotels with summary explaining the selection
print(f"[scoring_start] Scoring {len(top_hotels)} hotels...")
start_time = time.time()

scoring_result = await score_hotels(
    top_hotels,
    USER_PREFERENCES,
    guests=GUESTS,
    max_reviews=MAX_REVIEWS_PER_HOTEL,
    review_text_max_length=REVIEW_TEXT_MAX_LENGTH,
    min_price=MIN_PRICE,
    max_price=MAX_PRICE,
    currency=CURRENCY,
    top_count=10
)

elapsed = time.time() - start_time

if scoring_result["error"]:
    print(f"\n‚ùå ERROR: {scoring_result['error']}")
    scored_hotels = None
else:
    scoring_results = scoring_result["results"]
    print(f"[scoring_done] {len(scoring_results)} hotels scored ‚Äî {elapsed:.1f}s")
    print(f"  Estimated tokens: ~{scoring_result['estimated_tokens']:,}")
    print(f"\nüìã Summary:\n{scoring_result['summary']}")
    
    # Finalize scored hotels - merge scoring results with full hotel data
    scored_hotels = finalize_scored_hotels(combined, scoring_results)
    print(f"\n[finalize_done] {len(scored_hotels)} hotels with complete data")

[scoring_done] 10 hotels scored ‚Äî 40.1s
  Estimated tokens: ~364,504

üìã Summary:
–ê–Ω–∞–ª–∏–∑ –º–æ—Å–∫–æ–≤—Å–∫–æ–≥–æ —Ä—ã–Ω–∫–∞ –ø–æ–∫–∞–∑–∞–ª —à–∏—Ä–æ–∫–∏–π –≤—ã–±–æ—Ä –æ—Ç–µ–ª–µ–π –∫–∞—Ç–µ–≥–æ—Ä–∏–π 3-5 –∑–≤–µ–∑–¥, –ø—Ä–µ–¥–ª–∞–≥–∞—é—â–∏—Ö –º–Ω–æ–≥–æ–∫–æ–º–Ω–∞—Ç–Ω—ã–µ –Ω–æ–º–µ—Ä–∞ –¥–ª—è —Å–µ–º–µ–π–Ω–æ–≥–æ —Ä–∞–∑–º–µ—â–µ–Ω–∏—è –≤ –¥–∏–∞–ø–∞–∑–æ–Ω–µ –æ—Ç 8 000 –¥–æ 19 000 —Ä—É–±–ª–µ–π –∑–∞ –Ω–æ—á—å. –í —Ä–∞–º–∫–∞—Ö –∑–∞–¥–∞–Ω–Ω–æ–≥–æ –±—é–¥–∂–µ—Ç–∞ –æ—Ç 3 000 –¥–æ 20 000 —Ä—É–±–ª–µ–π –±—ã–ª–∏ –æ—Ç–æ–±—Ä–∞–Ω—ã –≤–∞—Ä–∏–∞–Ω—Ç—ã, —Å—Ç—Ä–æ–≥–æ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É—é—â–∏–µ —Ç—Ä–µ–±–æ–≤–∞–Ω–∏—é –æ –Ω–∞–ª–∏—á–∏–∏ –¥–≤—É—Ö –∫–æ–º–Ω–∞—Ç –∏ –≤—ã—Å–æ–∫–æ–≥–æ —Ä–µ–π—Ç–∏–Ω–≥–∞ —á–∏—Å—Ç–æ—Ç—ã. –û—Ç–µ–ª—å Golden Ring Hotel (hotel_id: golden_ring_hotel_2, —Ä–µ–π—Ç–∏–Ω–≥ 9.1) –±—ã–ª –æ—Ç–∫–ª–æ–Ω–µ–Ω, —Ç–∞–∫ –∫–∞–∫ –¥–æ—Å—Ç—É–ø–Ω—ã–µ —Ç–∞—Ä–∏—Ñ—ã –¥–ª—è –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ Junior Suite –Ω–µ –≥–∞—Ä–∞–Ω—Ç–∏—Ä—É—é—Ç –Ω–∞–ª–∏—á–∏–µ –¥–≤—É—Ö –∏–∑–æ–ª–∏—Ä–æ–≤–∞–Ω–Ω—ã—Ö –∫–æ–º–Ω–∞—Ç, —Ç—Ä–µ–±—É–µ–º—ã—Ö 

In [15]:
import pandas as pd
from services import HotelScored
from utils import ostrovok_url


def display_top_hotels(
    scored_hotels: list[HotelScored],
    top_n: int = 10,
) -> pd.DataFrame:
    """Display top N scored hotels with details and Ostrovok links."""
    print(f"\n{'='*80}")
    print(f"TOP {top_n} HOTELS")
    print(f"{'='*80}\n")

    data = []
    for i, hotel in enumerate(scored_hotels[:top_n], 1):
        hotel_id = hotel["id"]
        name = hotel["name"]
        hid = hotel["hid"]
        kind = hotel.get("kind", "")
        score = hotel["score"]
        reasons = hotel.get("top_reasons", [])
        penalties = hotel.get("score_penalties", [])
        selected_hash = hotel.get("selected_rate_hash")
        
        # Find selected rate by hash
        rates = hotel.get("rates", [])
        selected_rate = next((r for r in rates if r.get("match_hash") == selected_hash), None)
        
        # Get rate details
        if selected_rate:
            room_name = selected_rate.get("room_name", "")[:50]
            meal_data = selected_rate.get("meal_data", {})
            meal = meal_data.get("value", selected_rate.get("meal", ""))
            
            # Calculate prices from daily_prices
            daily_prices = selected_rate.get("daily_prices", [])
            if daily_prices:
                # Convert string prices to float and sum
                total_price = sum(float(p) for p in daily_prices)
                num_nights = len(daily_prices)
                avg_price_per_night = total_price / num_nights if num_nights > 0 else 0
                
                # Get currency
                pt = selected_rate.get("payment_options", {}).get("payment_types", [])
                currency = pt[0].get("show_currency_code", "") if pt else ""
                
                total_price_str = f"{total_price:.0f} {currency}"
                avg_price_str = f"{avg_price_per_night:.0f} {currency}"
            else:
                # Fallback to payment_types if daily_prices not available
                pt = selected_rate.get("payment_options", {}).get("payment_types", [])
                if pt:
                    total_price = float(pt[0].get("show_amount", 0))
                    currency = pt[0].get("show_currency_code", "")
                    total_price_str = f"{total_price:.0f} {currency}"
                    avg_price_str = f"{total_price:.0f} {currency}"
                else:
                    total_price_str = "N/A"
                    avg_price_str = "N/A"
        else:
            room_name = "N/A"
            meal = "N/A"
            total_price_str = "N/A"
            avg_price_str = "N/A"
        
        # Get reviews data
        reviews = hotel.get("reviews")
        avg_rating = reviews.get("avg_rating") if reviews else None
        detailed = reviews.get("detailed_averages", {}) if reviews else {}
        
        # Generate Ostrovok URL
        url = ostrovok_url(
            hotel_id=hotel_id,
            hid=hid,
            checkin=CHECKIN_DATE,
            checkout=CHECKOUT_DATE,
            guests=GUESTS,
            region_id=REGION_ID,
        )

        # Print detailed info
        print(f"{i}. {name} [{kind}]")
        print(f"   Score: {score}/100 | Rating: {avg_rating}/10" if avg_rating else f"   Score: {score}/100")
        print(f"   Room: {room_name}")
        print(f"   Total: {total_price_str} | Avg per night: {avg_price_str} | Meal: {meal}")
        if reasons:
            print(f"   + {'; '.join(reasons[:3])}")
        if penalties:
            print(f"   - {'; '.join(penalties[:5])}")
        print(f"   üîó {url}")
        print()
        
        # Collect for DataFrame
        data.append({
            "name": name[:35],
            "kind": kind,
            "room": room_name[:30],
            "total": total_price_str,
            "avg/night": avg_price_str,
            "meal": meal,
            "score": score,
            "rating": avg_rating,
            "clean": detailed.get("cleanness"),
            "url": url,
        })
    
    df = pd.DataFrame(data)
    df.index = range(1, len(df) + 1)
    selected = min(top_n, len(scored_hotels))
    print(f"–í—Å–µ–≥–æ –Ω–∞–π–¥–µ–Ω–æ {len(combined)} –æ—Ç–µ–ª–µ–π –Ω–∞ —ç—Ç–∏ –¥–∞—Ç—ã.")
    print(f"–ü–æ–¥–æ–±—Ä–∞–Ω—ã –ª—É—á—à–∏–µ {selected} –ø–æ –≤–∞—à–∏–º –∫—Ä–∏—Ç–µ—Ä–∏—è–º.")
    return df


pd.set_option("display.max_colwidth", 100)
display_top_hotels(scored_hotels, top_n=10)

Unnamed: 0,name,kind,room,total,avg/night,meal,score,rating,clean,url
1,–ì–æ—Ä–æ–¥—Å–∫–æ–π –æ—Ç–µ–ª—å –î–æ–º –ö—É–ø—Ü–∞ –ë–∞–≤—ã–∫–∏–Ω–∞,Hotel,–ß–µ—Ç—ã—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–π –Ω–æ–º–µ—Ä —Å –ø—Ä–æ–µ–∫—Ü–∏,17967 RUB,8984 RUB,nomeal,98,9.6,9.8,https://ostrovok.ru/hotel/russia/moscow/mid11347687/dom_kuptsa_bavykina_mini_hotel/?dates=02.02....
2,–û—Ç–µ–ª—å Hampton by Hilton Moscow –†–æ–≥–æ,Hotel,–ù–æ–º–µ—Ä —Å –¥–∏–≤–∞–Ω–æ–º –°–µ–º–µ–π–Ω—ã–π —Å –∫—Ä–æ,38728 RUB,19364 RUB,breakfast,95,9.3,9.5,https://ostrovok.ru/hotel/russia/moscow/mid9770069/hampton_by_hilton_moscow_rogozhsky_val/?dates...
3,–ë—É—Ç–∏–∫-–æ—Ç–µ–ª—å –ö–∞–∑–Ω–∞—á–µ–π—Å–∫–∏–π,Boutique_and_Design,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –ª—é–∫—Å –ú–∞–Ω—Å–∞—Ä–¥–∞ —Å 2,35660 RUB,17830 RUB,nomeal,94,9.3,9.6,https://ostrovok.ru/hotel/russia/moscow/mid11323757/kaznacheyskiy_boutique_hotel/?dates=02.02.20...
4,–û—Ç–µ–ª—å –°–µ–≤–∞—Å—Ç–æ–ø–æ–ª—å –ì—Ä–∞–Ω–¥ –ö–ª–∞—Å—Å–∏–∫,Hotel,–ß–µ—Ç—ã—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–µ –∞–ø–∞—Ä—Ç–∞–º–µ–Ω—Ç—ã,37533 RUB,18766 RUB,breakfast,93,9.2,9.3,https://ostrovok.ru/hotel/russia/moscow/mid7625812/sevastopol_hotel_bld2/?dates=02.02.2026-04.02...
5,Select Hotel Paveletskaya,Hotel,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –ª—é–∫—Å —Å 2 –∫–æ–º–Ω–∞—Ç–∞–º–∏,21000 RUB,10500 RUB,nomeal,92,9.1,9.2,https://ostrovok.ru/hotel/russia/moscow/mid7596982/tatiana/?dates=02.02.2026-04.02.2026&guests=2...
6,–û—Ç–µ–ª—å –õ–µ—Å–Ω–∞—è –°–∞—Ñ–º–∞—Ä (–±—ã–≤—à–∏–π –•–æ–ª–∏–¥–µ–π,Hotel,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –ª—é–∫—Å —Å –±–æ–ª—å—à–æ–π –¥–≤—É,37300 RUB,18650 RUB,breakfast,91,9.0,9.1,https://ostrovok.ru/hotel/russia/moscow/mid7467380/kholidei_inn_moskva_lesnaia/?dates=02.02.2026...
7,–û—Ç–µ–ª—å –ú–æ—Å–∫–≤–∞ –ö—Ä–∞—Å–Ω–æ—Å–µ–ª—å—Å–∫–∞—è (ex. Hi,Hotel,–ß–µ—Ç—ã—Ä—ë—Ö–º–µ—Å—Ç–Ω—ã–π –Ω–æ–º–µ—Ä —Å–º–µ–∂–Ω—ã–π –î,32600 RUB,16300 RUB,nomeal,90,8.9,9.0,https://ostrovok.ru/hotel/russia/moscow/mid9751449/hilton_garden_inn_moscow_krasnoselskaya_hotel...
8,–û—Ç–µ–ª—å Mamaison All-Suites Spa Pokro,Hotel,Suite Deluxe 1 Bedroom,35200 RUB,17600 RUB,breakfast,89,8.8,8.9,https://ostrovok.ru/hotel/russia/moscow/mid7590428/mamaison_allsuites_spa_hotel_pokrovka/?dates=...
9,–û—Ç–µ–ª—å –î–µ—Ä–∂–∞–≤–Ω—ã–π,Hotel,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –ª—é–∫—Å —Å 2 –∫–æ–º–Ω–∞—Ç–∞–º–∏,21113 RUB,10556 RUB,breakfast,88,8.8,8.8,https://ostrovok.ru/hotel/russia/moscow/mid8874860/derzhavnyij_hotel/?dates=02.02.2026-04.02.202...
10,–û—Ç–µ–ª—å Radisson Slavyanskaya Hotel &,Hotel,–î–≤—É—Ö–º–µ—Å—Ç–Ω—ã–π –ª—é–∫—Å —Å 2 –∫–æ–º–Ω–∞—Ç–∞–º–∏,39500 RUB,19750 RUB,nomeal,87,8.8,8.7,https://ostrovok.ru/hotel/russia/moscow/mid7467357/otel_radisson_slavyanskaya_and_business_centr...
