# City Explorer: Multi-Source Attraction & Activity Discovery with Routing

This notebook is intentionally **readable** and **step-based**.

## What it does
1. Loads required environment variables (API keys + DB path)
2. Initializes a shared SQLite database (migrations + tables)
3. Retrieves **Top 10** items for a city (ranked by review count) from:
   - Google Places (New) — tourist attractions
   - TripAdvisor Content API — attractions/activities (depending on API behavior)
4. Uses a city-level snapshot cache (`city_top10`) and an item-level cache (`item_summary`)
5. Provides an interactive UI using **ipywidgets**

## Required environment variables
- `GOOGLE_MAPS_API_KEY`
- `TRIPADVISOR_API_KEY`
- `DABN23_DB_PATH` (full file path, e.g. `G:\My Drive\dabn23_SharedDatabase\dabn23_cache.sqlite`)


In [1]:
# 0) Dependency check (optional)
# This notebook does NOT auto-install by default (cleaner + more reproducible).
AUTO_INSTALL = False

required = [
    ("requests", "requests"),
    ("pandas", "pandas"),
    ("ipywidgets", "ipywidgets"),
]

missing = []
for import_name, pip_name in required:
    try:
        __import__(import_name)
    except ImportError:
        missing.append(pip_name)

if missing:
    print("Missing packages:", ", ".join(missing))
    print("Install command:")
    print("  pip install " + " ".join(missing))
    if AUTO_INSTALL:
        import sys, subprocess
        subprocess.check_call([sys.executable, "-m", "pip", "install", *missing])
        print("Installed. Re-run this cell if needed.")
else:
    print("All required packages are installed.")


All required packages are installed.


In [2]:
# 1) Make sure we can import from /src (works when running from notebooks/ folder)
import sys
from pathlib import Path

PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("Project root:", PROJECT_ROOT)


Project root: c:\Users\megiv\Desktop\Git Clone\dabn23-project1\dabn23


In [3]:
# 2) Load configuration (API keys + DB path)
# config.py fails fast with a helpful error message if something is missing.

from src.config import GOOGLE_API_KEY, TA_API_KEY, DB_PATH

print("Google API key loaded (length):", len(GOOGLE_API_KEY))
print("TripAdvisor API key loaded (length):", len(TA_API_KEY))
print("DB_PATH:", DB_PATH)


Google API key loaded (length): 39
TripAdvisor API key loaded (length): 32
DB_PATH: G:\My Drive\dabn23_SharedDatabase\dabn23_cache.sqlite


In [4]:
# 3) Initialize the shared SQLite database (creates the file if it doesn't exist)

from pathlib import Path
from src.db import connect, migrate_if_needed, create_tables

# Ensure parent folder exists (SQLite can create the file, but not the folder)
Path(DB_PATH).parent.mkdir(parents=True, exist_ok=True)

conn = connect(DB_PATH)
migrate_if_needed(conn)   # handles legacy schemas (e.g., place_ids_json -> item_ids_json)
create_tables(conn)

tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
print("✅ DB ready. Tables:", [t[0] for t in tables])


✅ DB ready. Tables: ['city_top10', 'item_summary']


In [5]:
# 4) Top-10 pipelines (snapshot + cache)
# These functions keep the notebook readable while avoiding duplicated logic.

from typing import Any, Dict, List

from src.cache import (
    get_city_snapshot_item_ids,
    save_city_snapshot_item_ids,
    get_cached_item_summary,
    upsert_item_summary,
)

import src.google_places as g
import src.tripadvisor as ta


def top10_google_attractions(city: str, n: int = 10, language: str = "en", search_pool: int = 50) -> List[Dict[str, Any]]:
    """Top-10 Google tourist attractions by review_count (static city snapshot)."""
    source, item_type = "google", "attraction"

    # 1) Snapshot lookup
    ids = get_city_snapshot_item_ids(conn, city, source, item_type)
    city_source = "city_snapshot" if ids else "computed"

    # 2) Compute snapshot once (if missing)
    if not ids:
        candidates = g.text_search_many(
            f"tourist attractions in {city}",
            language_code=language,
            max_results=search_pool,
        )

        # Strict filter: only tourist attractions
        filtered = [p for p in candidates if "tourist_attraction" in (p.get("types") or [])]

        # Rank by number of reviews
        ranked = sorted(filtered, key=lambda p: int(p.get("userRatingCount", 0) or 0), reverse=True)

        ids = [p["id"] for p in ranked[:n]]
        save_city_snapshot_item_ids(conn, city, source, item_type, ids)

    # 3) Resolve IDs -> cached details (or fetch once, then cache)
    results: List[Dict[str, Any]] = []
    for pid in ids[:n]:
        cached = get_cached_item_summary(conn, source, pid)
        if cached:
            s = cached
            s["_source"] = "cache"
        else:
            details = g.place_details(pid, language_code=language)
            s = g.summarize(details)
            upsert_item_summary(conn, s)
            s["_source"] = "api"

        s["_city_source"] = city_source
        results.append(s)

    return results


def top10_tripadvisor(city: str, item_type: str = "attraction", n: int = 10, language: str = "en") -> List[Dict[str, Any]]:
    """Top-10 TripAdvisor locations by review_count (static city snapshot)."""
    source = "tripadvisor"

    # 1) Snapshot lookup
    ids = get_city_snapshot_item_ids(conn, city, source, item_type)
    city_source = "city_snapshot" if ids else "computed"

    # 2) Compute snapshot once (if missing)
    if not ids:
        city_geo = ta.get_city_location(city, language=language)
        candidates = ta.search(city_geo, item_type=item_type, language=language)

        ranked = sorted(candidates, key=lambda p: int(p.get("num_reviews", 0) or 0), reverse=True)
        ids = [str(p["location_id"]) for p in ranked[:n]]

        save_city_snapshot_item_ids(conn, city, source, item_type, ids)

    # 3) Resolve IDs -> cached details (or fetch once, then cache)
    results: List[Dict[str, Any]] = []
    for lid in ids[:n]:
        cached = get_cached_item_summary(conn, source, lid)
        if cached:
            s = cached
            s["_source"] = "cache"
        else:
            details = ta.details(lid, language=language)
            s = ta.summarize(details)
            upsert_item_summary(conn, s)
            s["_source"] = "api"

        s["_city_source"] = city_source
        results.append(s)

    return results


def unified_search(city: str, source: str, item_type: str) -> List[Dict[str, Any]]:
    """One UI-facing entrypoint: (city, source, item_type) -> Top-10 results."""
    if source == "google":
        # Google side currently implemented for attractions only
        if item_type != "attraction":
            return []
        return top10_google_attractions(city)

    if source == "tripadvisor":
        return top10_tripadvisor(city, item_type=item_type)

    return []


## 5) Interactive search UI (ipywidgets)

Use the controls to choose:
- city
- data source (Google or TripAdvisor)
- type (attraction/activity)

Then click **Search Top 10**.


In [6]:
from src.ui import build_search_widget

# build_search_widget expects: search_fn(city, source, item_type) -> results
build_search_widget(unified_search)


VBox(children=(HBox(children=(Text(value='Paris', description='City:', layout=Layout(width='420px'), placehold…

## 6) Optional: "closest two" demo (fallback)

This uses a straight-line distance fallback (Haversine) so the demo works even before
Google Routes API is integrated into `src/routing.py`.


In [7]:
from src.routing import closest_two_fallback

# Example: compute closest two among Google top-10 (needs lat/lng)
city = "Paris"
results = top10_google_attractions(city)

start = results[0]
others = results[1:]

closest = closest_two_fallback(start, others)

print("Start:", start.get("name"))
print("Closest two (fallback distance):")
for c in closest:
    print(" -", c.get("name"))


Start: Eiffel Tower
Closest two (fallback distance):
 - Champ de Mars
 - Arc de Triomphe
