In [None]:
import json
import os
from pathlib import Path

from dotenv import load_dotenv

load_dotenv()

SCORING_MODEL = os.environ["SCORING_MODEL"]

# Specify JSON file to load (from .artifacts directory)
# Format: City_CheckinDate_CheckoutDate.json
ARTIFACT_FILE = "–ú–æ—Å–∫–≤–∞_2026-02-02_2026-02-04.json"  # Change this to your file

# Load data from JSON
artifacts_dir = Path(".artifacts")
filepath = artifacts_dir / ARTIFACT_FILE

if not filepath.exists():
    raise FileNotFoundError(f"File not found: {filepath}")

with open(filepath, "r", encoding="utf-8") as f:
    data = json.load(f)

# Extract search parameters
search = data["search"]
CITY = search["city"]
CHECKIN_DATE = search["checkin"]
CHECKOUT_DATE = search["checkout"]
GUESTS = search["guests"]
MIN_PRICE = search["min_price"]
MAX_PRICE = search["max_price"]
CURRENCY = search["currency"]
LANGUAGE = search["language"]
RESIDENCY = search["residency"]

# Extract hotels data
combined = data["hotels"]  # HotelFull[]
stats = data["stats"]

# Reconstruct reviews_map from combined data
reviews_map = {}
for hotel in combined:
    hid = hotel.get("hid")
    reviews = hotel.get("reviews")
    if hid and reviews:
        reviews_map[hid] = reviews

print(f"‚úÖ Loaded data from {ARTIFACT_FILE}")
print(f"   Search: {CITY}, {CHECKIN_DATE} ‚Üí {CHECKOUT_DATE}")
print(f"   Hotels: {len(combined)}")
print(f"   Stats: {stats}")

# User preferences for scoring (can be modified)
USER_PREFERENCES = "–û–±—è–∑–∞—Ç–µ–ª—å–Ω–æ –¥–≤–µ –∫–æ–º–Ω–∞—Ç—ã –∏ –¥–≤–µ –∫—Ä–æ–≤–∞—Ç–∏. –•–æ—Ä–æ—à–∏–µ –æ—Ç–∑—ã–≤—ã. –ß–∏—Å—Ç–æ—Ç–∞"

In [None]:
import json

from services import estimate_tokens, prepare_hotel_for_llm, presort_hotels

# Parameters for review sampling
MAX_REVIEWS_PER_HOTEL = 30
REVIEW_TEXT_MAX_LENGTH = 512

# Estimate tokens before presort
hotels_for_llm_all = [
    prepare_hotel_for_llm(h, MIN_PRICE, MAX_PRICE, MAX_REVIEWS_PER_HOTEL, REVIEW_TEXT_MAX_LENGTH)
    for h in combined
]
tokens_before = estimate_tokens(json.dumps(hotels_for_llm_all, ensure_ascii=False), SCORING_MODEL)

# Pre-sort by hotel kind tier and prescore, limit to top 100 for LLM scoring
PRESORT_LIMIT = 120
top_hotels = presort_hotels(combined, reviews_map, limit=PRESORT_LIMIT)

# Estimate tokens after presort
hotels_for_llm_top = [
    prepare_hotel_for_llm(h, MIN_PRICE, MAX_PRICE, MAX_REVIEWS_PER_HOTEL, REVIEW_TEXT_MAX_LENGTH)
    for h in top_hotels
]
tokens_after = estimate_tokens(json.dumps(hotels_for_llm_top, ensure_ascii=False), SCORING_MODEL)

print(f"[presort_done] {len(combined)} –æ—Ç–µ–ª–µ–π ‚Üí {len(top_hotels)} (–ª–∏–º–∏—Ç {PRESORT_LIMIT})")
print(f"  –¢–æ–∫–µ–Ω—ã: ~{tokens_before:,} ‚Üí ~{tokens_after:,} (—ç–∫–æ–Ω–æ–º–∏—è {tokens_before - tokens_after:,})")

In [None]:
import time

from services import finalize_scored_hotels, score_hotels

# Score hotels using single LLM request
# Returns top N scored hotels with summary explaining the selection
print(f"[scoring_start] Scoring {len(top_hotels)} hotels...")
start_time = time.time()

scoring_result = await score_hotels(
    top_hotels,
    USER_PREFERENCES,
    guests=GUESTS,
    max_reviews=MAX_REVIEWS_PER_HOTEL,
    review_text_max_length=REVIEW_TEXT_MAX_LENGTH,
    min_price=MIN_PRICE,
    max_price=MAX_PRICE,
    currency=CURRENCY,
    top_count=10
)

elapsed = time.time() - start_time

if scoring_result["error"]:
    print(f"\n‚ùå ERROR: {scoring_result['error']}")
    scored_hotels = None
else:
    scoring_results = scoring_result["results"]
    print(f"[scoring_done] {len(scoring_results)} hotels scored ‚Äî {elapsed:.1f}s")
    print(f"  Estimated tokens: ~{scoring_result['estimated_tokens']:,}")
    print(f"\nüìã Summary:\n{scoring_result['summary']}")
    
    # Finalize scored hotels - merge scoring results with full hotel data
    scored_hotels = finalize_scored_hotels(combined, scoring_results)
    print(f"\n[finalize_done] {len(scored_hotels)} hotels with complete data")

In [None]:
import pandas as pd
from services import HotelScored
from utils import ostrovok_url


def display_top_hotels(
    scored_hotels: list[HotelScored],
    top_n: int = 10,
) -> pd.DataFrame:
    """Display top N scored hotels with details and Ostrovok links."""
    print(f"\n{'='*80}")
    print(f"TOP {top_n} HOTELS")
    print(f"{'='*80}\n")

    data = []
    for i, hotel in enumerate(scored_hotels[:top_n], 1):
        hotel_id = hotel["id"]
        name = hotel["name"]
        hid = hotel["hid"]
        kind = hotel.get("kind", "")
        score = hotel["score"]
        reasons = hotel.get("top_reasons", [])
        penalties = hotel.get("score_penalties", [])
        selected_hash = hotel.get("selected_rate_hash")
        
        # Find selected rate by hash
        rates = hotel.get("rates", [])
        selected_rate = next((r for r in rates if r.get("match_hash") == selected_hash), None)
        
        # Get rate details
        if selected_rate:
            room_name = selected_rate.get("room_name", "")[:50]
            meal_data = selected_rate.get("meal_data", {})
            meal = meal_data.get("value", selected_rate.get("meal", ""))
            
            # Calculate prices from daily_prices
            daily_prices = selected_rate.get("daily_prices", [])
            if daily_prices:
                # Convert string prices to float and sum
                total_price = sum(float(p) for p in daily_prices)
                num_nights = len(daily_prices)
                avg_price_per_night = total_price / num_nights if num_nights > 0 else 0
                
                # Get currency
                pt = selected_rate.get("payment_options", {}).get("payment_types", [])
                currency = pt[0].get("show_currency_code", "") if pt else ""
                
                total_price_str = f"{total_price:.0f} {currency}"
                avg_price_str = f"{avg_price_per_night:.0f} {currency}"
            else:
                # Fallback to payment_types if daily_prices not available
                pt = selected_rate.get("payment_options", {}).get("payment_types", [])
                if pt:
                    total_price = float(pt[0].get("show_amount", 0))
                    currency = pt[0].get("show_currency_code", "")
                    total_price_str = f"{total_price:.0f} {currency}"
                    avg_price_str = f"{total_price:.0f} {currency}"
                else:
                    total_price_str = "N/A"
                    avg_price_str = "N/A"
        else:
            room_name = "N/A"
            meal = "N/A"
            total_price_str = "N/A"
            avg_price_str = "N/A"
        
        # Get reviews data
        reviews = hotel.get("reviews")
        avg_rating = reviews.get("avg_rating") if reviews else None
        detailed = reviews.get("detailed_averages", {}) if reviews else {}
        
        # Generate Ostrovok URL (note: region_id not available, URL may not work perfectly)
        url = ostrovok_url(
            hotel_id=hotel_id,
            hid=hid,
            checkin=CHECKIN_DATE,
            checkout=CHECKOUT_DATE,
            guests=GUESTS,
            region_id=None,  # Not stored in artifacts
        )

        # Print detailed info
        print(f"{i}. {name} [{kind}]")
        print(f"   Score: {score}/100 | Rating: {avg_rating}/10" if avg_rating else f"   Score: {score}/100")
        print(f"   Room: {room_name}")
        print(f"   Total: {total_price_str} | Avg per night: {avg_price_str} | Meal: {meal}")
        if reasons:
            print(f"   + {'; '.join(reasons[:3])}")
        if penalties:
            print(f"   - {'; '.join(penalties[:5])}")
        print(f"   üîó {url}")
        print()
        
        # Collect for DataFrame
        data.append({
            "name": name[:35],
            "kind": kind,
            "room": room_name[:30],
            "total": total_price_str,
            "avg/night": avg_price_str,
            "meal": meal,
            "score": score,
            "rating": avg_rating,
            "clean": detailed.get("cleanness"),
            "url": url,
        })
    
    df = pd.DataFrame(data)
    df.index = range(1, len(df) + 1)
    selected = min(top_n, len(scored_hotels))
    print(f"–í—Å–µ–≥–æ –Ω–∞–π–¥–µ–Ω–æ {len(combined)} –æ—Ç–µ–ª–µ–π –Ω–∞ —ç—Ç–∏ –¥–∞—Ç—ã.")
    print(f"–ü–æ–¥–æ–±—Ä–∞–Ω—ã –ª—É—á—à–∏–µ {selected} –ø–æ –≤–∞—à–∏–º –∫—Ä–∏—Ç–µ—Ä–∏—è–º.")
    return df


pd.set_option("display.max_colwidth", 100)
display_top_hotels(scored_hotels, top_n=10)