# 🏠 Smart Host Advisor

This notebook is a simple, interactive interface for property owners.
All core data and models are **precomputed**, so results are fast and easy to explore.
Please enter the following before running:
1. storage_account
2. container
3. sas_token

Please run the cells from top to bottom.

In [0]:
from IPython.display import display, clear_output, HTML, Markdown
from pyspark.sql.functions import col, max as spark_max
from pyspark.sql import functions as F, Window
import ipywidgets as widgets
import json, requests, time
import re

storage_account = "replace_by_storage_account"  
container = "replace_by_container"

sas_token="replace_with_your_sas_token"
sas_token = sas_token.lstrip('?')
spark.conf.set(f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net", "SAS")
spark.conf.set(f"fs.azure.sas.token.provider.type.{storage_account}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider")
spark.conf.set(f"fs.azure.sas.fixed.token.{storage_account}.dfs.core.windows.net", sas_token)

path = f"abfss://{container}@{storage_account}.dfs.core.windows.net/airbnb_1_12_parquet"

airbnb = spark.read.parquet(path)
# Load DF with Amenities
df = spark.read.parquet("dbfs:/airbnb/df_with_amenities")
OUTPUT_BASE = "dbfs:/FileStore/airbnb"
popular_price = spark.read.parquet(f"{OUTPUT_BASE}/popular_price")
popular_rating = spark.read.parquet(f"{OUTPUT_BASE}/popular_rating")
bayes_price_model = spark.read.format("delta").load(f"{OUTPUT_BASE}/bayes_price_model_delta")
bayes_rating_model = spark.read.format("delta").load(f"{OUTPUT_BASE}/bayes_rating_model_delta")
popular_price_df = spark.read.parquet(f"{OUTPUT_BASE}/popular_price_final")

review_df = spark.read.format("delta").load(f"{OUTPUT_BASE}/airbnb_api_property_issue_evidence_ITALY_1")
review_df_no_api = spark.read.format("delta").load(f"{OUTPUT_BASE}/airbnb_property_issue_sentence_full_ITALY_1")
df_italy_properties = spark.read.parquet("dbfs:/FileStore/airbnb/property_ids")

## 🎯 Recommendations & Review Insights

In this step, you define what to analyze and view the main results.

You can adjust:
- **Property ID** - the property to analyze
- **Max Amenity Cost ($)** *(optional)* - budget limit for recommendations
- **α (price ↔ rating)** - balance between price uplift and rating improvement

After clicking **Run Recommendations**, the system presents:
- Bayesian amenity recommendations (price uplift, cost, confidence)
- Market Standard Amenities
- Review-based insights focusing on recurring issues raised by guests

All results here are generated from precomputed data.

In [0]:
# =====================================
# Properties with PRECOMPUTED review analysis
# =====================================

example_property_ids = [
    "1042005770541410920",
    "10566180",
    "11251348",
    "1166472471627354698",
    "1259994760695336460",
    "14410929",
    "1773055"
]

examples_text = "Property ID examples (with review analysis): " + ", ".join(example_property_ids)

# =====================================
# UI Elements
# =====================================

property_input = widgets.Text(
    value=example_property_ids[0],
    description="Property ID:",
    layout=widgets.Layout(width="420px"),
    style={"description_width": "140px"}
)

budget_input = widgets.Text(
    value="",
    description="Max Amenity Cost ($):",
    placeholder="leave empty for no limit",
    layout=widgets.Layout(width="420px"),
    style={"description_width": "140px"}
)

examples_label = widgets.HTML(
    value=f"<span style='color:gray;font-size:12px'>{examples_text}</span>"
)

alpha_slider = widgets.FloatSlider(
    value=0.6,
    min=0.0,
    max=1.0,
    step=0.05,
    description="α (price ↔ rating):",
    readout_format=".2f",
    layout=widgets.Layout(width="520px"),
    style={"description_width": "180px"}
)

run_button = widgets.Button(
    description="Run Recommendations",
    button_style="primary",
    layout=widgets.Layout(width="220px")
)

loading_label = widgets.Label(value="")
hint_label = widgets.Label(value="← Press button and wait ~30 seconds")

button_row = widgets.HBox([run_button, loading_label, hint_label])
output = widgets.Output()

# =====================================
# Helpers
# =====================================

def esc(x):
    if x is None:
        return ""
    return str(x).replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")

def clean_name(a):
    return a.replace("a_", "").replace("_", " ")

def parse_budget(txt):
    txt = txt.strip()
    if txt == "":
        return None
    try:
        return float(txt)
    except ValueError:
        return None

def li(text):
    return f"<li style='margin:6px 0'>{text}</li>"

# =====================================
# Bayesian Renderer (with budget filter)
# =====================================

def render_bayesian_html(property_id: int, alpha: float, budget=None):

    price_rec = (
        bayes_price_model
        .filter(
            (F.col("property_id") == property_id) &
            F.col("score_lcb_log").isNotNull()
        )
        .select(
            "amenity_key",
            F.col("mu_post").alias("price_uplift"),
            F.col("score_lcb_log").alias("price_score"),
            F.col("var_post").alias("price_var"),
            "estimated_cost"
        )
    )

    rating_rec = (
        bayes_rating_model
        .filter(
            (F.col("property_id") == property_id) &
            F.col("score_lcb_log").isNotNull()
        )
        .select(
            "amenity_key",
            F.col("mu_post").alias("rating_uplift"),
            F.col("score_lcb_log").alias("rating_score"),
            F.col("var_post").alias("rating_var")
        )
    )

    rows = price_rec.join(rating_rec, on="amenity_key", how="inner").collect()

    # -----------------------------
    # Budget filter
    # -----------------------------
    if budget is not None:
        rows = [
            r for r in rows
            if r["estimated_cost"] is None or r["estimated_cost"] == 0 or r["estimated_cost"] <= budget
        ]

    if not rows:
        return """
        <div style="font-family:Arial;margin-top:24px;">
          <h2>🎯 Bayesian Amenity Recommendations</h2>
          <div style="padding:12px;border-radius:12px;border:1px dashed #d1d5db;background:#f9fafb;">
            No Bayesian recommendations available under the selected budget.
          </div>
        </div>
        """

    # -----------------------------
    # Confidence thresholds (quantiles)
    # -----------------------------
    price_vars = sorted([r["price_var"] for r in rows if r["price_var"] is not None])
    rating_vars = sorted([r["rating_var"] for r in rows if r["rating_var"] is not None])

    def q(vals, p):
        if not vals:
            return None
        idx = int(p * (len(vals) - 1))
        return vals[idx]

    price_q15 = q(price_vars, 0.15)
    price_q50 = q(price_vars, 0.50)

    rating_q15 = q(rating_vars, 0.15)
    rating_q50 = q(rating_vars, 0.50)

    def confidence_badge(var, q15, q50):
        if var is None or q15 is None or q50 is None:
            return ""
        if var <= q15:
            return "<span style='color:#15803d;font-weight:700;'>High confidence</span>"
        if var >= q50:
            return "<span style='color:#b91c1c;font-weight:700;'>Low confidence</span>"
        return "<span style='color:#ca8a04;font-weight:700;'>Medium confidence</span>"

    def cost_str(x):
        if x is None or x == 0:
            return ""
        return f" | cost ${x:.0f}"

    def li(text):
        return f"<li style='margin:6px 0'>{text}</li>"

    # Rankings
    top_price = sorted(rows, key=lambda r: r["price_score"], reverse=True)[:3]
    top_rating = sorted(rows, key=lambda r: r["rating_score"], reverse=True)[:3]

    scored = [(r, alpha * r["price_score"] + (1 - alpha) * r["rating_score"]) for r in rows]
    top_combined = sorted(scored, key=lambda x: x[1], reverse=True)[:3]

    html = f"""
<div style="font-family:Arial;max-width:1100px;margin-top:24px;">
  <h2>🎯 Bayesian Amenity Recommendations</h2>
  <div style="border:1px solid #e6e6e6;border-radius:16px;padding:16px;
              box-shadow:0 2px 10px rgba(0,0,0,0.05);background:#fff;">
"""

    # -----------------------------
    # Price
    # -----------------------------
    html += "<h3>🔝 Top Price Uplift</h3><ul>"
    for r in top_price:
        html += li(
            f"{clean_name(r['amenity_key'])} — "
            f"+${r['price_uplift']:.2f}/night"
            f"{cost_str(r['estimated_cost'])} | "
            f"{confidence_badge(r['price_var'], price_q15, price_q50)}"
        )
    html += "</ul>"

    # -----------------------------
    # Rating
    # -----------------------------
    html += "<h3>⭐ Top Rating Uplift</h3><ul>"
    for r in top_rating:
        html += li(
            f"{clean_name(r['amenity_key'])} — "
            f"+{r['rating_uplift']:.3f} rating"
            f"{cost_str(r['estimated_cost'])} | "
            f"{confidence_badge(r['rating_var'], rating_q15, rating_q50)}"
        )
    html += "</ul>"

    # -----------------------------
    # Combined
    # -----------------------------
    html += f"<h3>⚖️ Combined Ranking (α = {alpha:.2f})</h3><ul>"
    for r, _ in top_combined:
        if alpha <= 0.5:
            badge = confidence_badge(r["price_var"], price_q15, price_q50)
        else:
            badge = confidence_badge(r["rating_var"], rating_q15, rating_q50)

        html += li(
            f"{clean_name(r['amenity_key'])} — "
            f"+${r['price_uplift']:.2f}/night | "
            f"+{r['rating_uplift']:.3f} rating"
            f"{cost_str(r['estimated_cost'])} | "
            f"{badge}"
        )
    html += "</ul>"

    html += "</div></div>"
    return html

# =====================================
# Market Standard Amenities 
# =====================================

def render_market_html(property_id: int):

    popular = (
        popular_price_df
        .filter(
            (F.col("target_id") == property_id) &
            (F.col("percentage") >= 0.9)
        )
        .select("amenity", "percentage")
        .collect()
    )

    html = f"""
<div style="font-family:Arial;max-width:1100px;margin-top:24px;">
  <h2>🧱 Market Standard Amenities</h2>
  <div style="border:1px solid #e6e6e6;border-radius:16px;padding:16px;
              box-shadow:0 2px 10px rgba(0,0,0,0.05);background:#fff;">
"""

    if popular:
        html += "<ul>"
        for r in popular:
            html += li(f"{clean_name(r['amenity'])} — {r['percentage']*100:.0f}% of nearby listings")
        html += "</ul>"
    else:
        html += """
    <div style="padding:12px;border-radius:12px;border:1px dashed #d1d5db;background:#f9fafb;">
      No strong market-standard amenities found.
    </div>
"""

    html += "</div></div>"
    return html


from pyspark.sql import functions as F, Window

MAX_SENTENCES_PER_ISSUE = 5
SHOW_SIMILARITY = True


def render_review_html(property_id: str) -> str:
    df_show = review_df.filter(
        (F.col("property_id") == property_id) &
        (F.col("llm_text").isNotNull())
    )

    if df_show.count() == 0:
        return f"""
        <div style="margin-top:24px;padding:16px;border-radius:14px;
                    border:2px dashed #c7d2fe;background:#eef2ff;font-family:Arial;">
          <b>🔑 Review analysis not available.</b><br>
          No precomputed review analysis exists for property {property_id}.<br>
          Please run the next cell with YOUR API key and the desire property_id.
        </div>
        """

    # -----------------------------
    # Helpers (match the "nice" visual style)
    # -----------------------------
    def _esc(x):
        if x is None:
            return ""
        s = str(x)
        return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")

    def _pct(x, digits=0):
        try:
            if x is None:
                return "—"
            return f"{round(float(x)*100, digits)}%"
        except Exception:
            return "—"

    def _badge(text, bg="#eef2ff", fg="#1e3a8a", border="#c7d2fe"):
        text = _esc(text)
        return f"""<span style="display:inline-block;padding:3px 8px;border-radius:999px;
                   background:{bg};color:{fg};border:1px solid {border};font-size:12px;">{text}</span>"""

    def _priority_badge(p):
        p = (p or "").strip()
        if p == "P0":
            return _badge("P0 – Urgent", bg="#fee2e2", fg="#991b1b", border="#fecaca")
        if p == "P1":
            return _badge("P1 – Important", bg="#ffedd5", fg="#9a3412", border="#fed7aa")
        return _badge(p or "—", bg="#f3f4f6", fg="#374151", border="#e5e7eb")

    def _extract_priority_from_llm_text(txt):
        if not txt:
            return None
        t = txt.upper()
        if "PRIORITY:" in t:
            after = t.split("PRIORITY:", 1)[1].strip()
            if after.startswith("P0"):
                return "P0"
            if after.startswith("P1"):
                return "P1"
        head = t[:200]
        if "P0" in head:
            return "P0"
        if "P1" in head:
            return "P1"
        return None

    # -----------------------------
    # Rank issues
    # -----------------------------
    w_issue = Window.partitionBy("property_id","seller_id") \
                    .orderBy(F.desc("mentions"), F.desc("avg_neg_prob"))
    ranked = df_show.withColumn("issue_rank", F.dense_rank().over(w_issue))

    # -----------------------------
    # Rank sentences per issue
    # -----------------------------
    order_cols = []
    if "issue_sim" in ranked.columns:
        order_cols.append(F.desc("issue_sim"))
    order_cols.append(F.desc("neg_prob"))

    w_sent = Window.partitionBy("property_id","seller_id","issue") \
                   .orderBy(*order_cols)

    ranked = ranked.withColumn("sent_rank", F.row_number().over(w_sent)) \
                   .filter(F.col("sent_rank") <= int(MAX_SENTENCES_PER_ISSUE))

    rows = ranked.orderBy("issue_rank","sent_rank").collect()

    # -----------------------------
    # Build structure (single card)
    # -----------------------------
    seller_id = None
    llm_text = None
    issues = {}

    for r in rows:
        seller_id = r["seller_id"]
        llm_text = r["llm_text"]

        iss = r["issue"]
        if iss not in issues:
            issues[iss] = {
                "mentions": r["mentions"],
                "avg_neg_prob": r["avg_neg_prob"],
                "avg_issue_sim": r["avg_issue_sim"] if "avg_issue_sim" in r.asDict() else None,
                "sentences": []
            }

        issues[iss]["sentences"].append({
            "sentence": r["sentence"],
            "neg_prob": r["neg_prob"],
            "issue_sim": r["issue_sim"] if "issue_sim" in r.asDict() else None
        })

    llm_text = (llm_text or "").strip()
    pr = _extract_priority_from_llm_text(llm_text)
    pr_badge = _priority_badge(pr) if pr else ""

    # -----------------------------
    # Render HTML (same style you pasted)
    # -----------------------------
    html = f"""
<div style="font-family: Arial, sans-serif; line-height:1.4; max-width: 1100px; margin-top:24px;">
  <h2 style="margin:0 0 14px 0;">🧠 What Guests Are Really Saying – Review Analysis</h2>
  <div style="color:#555; margin-bottom:14px;">
    The following card summarizes the analysis of guest reviews and the resulting recommendations.
  </div>

  <div style="border:1px solid #e6e6e6; border-radius:16px; padding:16px 16px 12px 16px; margin:14px 0;
              box-shadow:0 2px 10px rgba(0,0,0,0.05); background:#fff;">
    <div style="display:flex; align-items:center; justify-content:space-between; gap:12px;">
      <div style="font-size:18px; font-weight:700;">
        Property <span style="color:#0b57d0;">{_esc(property_id)}</span>
      </div>
      <div>{pr_badge}</div>
    </div>

    <div style="margin-top:6px; color:#555;">
      <span style="margin-right:10px;"><b>Seller:</b> {_esc(seller_id)}</span>
    </div>

    <div style="margin-top:12px; background:#f7f7fb; border:1px solid #ececf6; padding:12px 12px; border-radius:12px;">
      <div style="font-weight:700; margin-bottom:6px;">
        Review Analysis Summary & Recommendations
      </div>
      <div style="white-space:pre-wrap; color:#111;">{_esc(llm_text)}</div>
    </div>

    <div style="margin-top:14px;">
      <div style="font-weight:700; margin-bottom:8px;">Issues Identified from Guest Reviews</div>
"""

    j = 1
    for iss, meta in issues.items():
        mentions = meta.get("mentions")
        avg_neg = meta.get("avg_neg_prob")
        avg_sim = meta.get("avg_issue_sim")

        chip_mentions = _badge(f"mentions: {mentions}", bg="#f3f4f6", fg="#111827", border="#e5e7eb")
        chip_neg = _badge(f"negativity: {_pct(avg_neg)}", bg="#fff7ed", fg="#9a3412", border="#fed7aa")

        chip_sim = ""
        if SHOW_SIMILARITY and (avg_sim is not None):
            chip_sim = _badge(f"match: {_pct(avg_sim)}", bg="#ecfeff", fg="#155e75", border="#a5f3fc")

        html += f"""
      <div style="padding:10px 10px; border:1px solid #f0f0f0; border-radius:12px; margin:10px 0; background:#fcfcfd;">
        <div style="display:flex; align-items:center; justify-content:space-between; gap:10px; flex-wrap:wrap;">
          <div style="font-weight:700; color:#111827;">
            {j}. {_esc(iss)}
          </div>
          <div style="display:flex; gap:8px; flex-wrap:wrap;">
            {chip_mentions}
            {chip_neg}
            {chip_sim}
          </div>
        </div>

        <ul style="margin:8px 0 0 18px; padding:0;">
"""

        for s in meta["sentences"]:
            sent = _esc(s.get("sentence") or "")
            negp = _pct(s.get("neg_prob"), digits=0)

            simp = ""
            if SHOW_SIMILARITY and (s.get("issue_sim") is not None):
                simp = f" · match {_pct(s.get('issue_sim'), digits=0)}"

            html += f"""
          <li style="margin:8px 0;">
            <div style="color:#111827;">{sent}</div>
            <div style="color:#6b7280; font-size:12px; margin-top:2px;">
              negativity {negp}{simp}
            </div>
          </li>
"""

        html += """
        </ul>
      </div>
"""
        j += 1

    html += """
    </div>
  </div>
</div>
"""
    return html

# =====================================
# Button callback
# =====================================

def on_run_clicked(b):
    run_button.disabled = True
    loading_label.value = "⏳ Running…"

    with output:
        clear_output()
        try:
            pid = int(property_input.value.strip())
            alpha = float(alpha_slider.value)
            budget = parse_budget(budget_input.value)

            display(HTML(render_bayesian_html(pid, alpha, budget)))
            display(HTML(render_market_html(pid)))
            display(HTML(render_review_html(pid)))

        except ValueError:
            print("Invalid input")
        finally:
            loading_label.value = "✅ Done"
            run_button.disabled = False

run_button.on_click(on_run_clicked)

# =====================================
# Display
# =====================================

display(widgets.VBox([
    property_input,
    budget_input,
    examples_label,
    alpha_slider,
    button_row,
    output
]))

with output:
    display(HTML(render_bayesian_html(int(example_property_ids[0]), alpha_slider.value)))
    display(HTML(render_market_html(example_property_ids[0])))
    display(HTML(render_review_html(example_property_ids[0])))

VBox(children=(Text(value='1042005770541410920', description='Property ID:', layout=Layout(width='420px'), sty…

## 🧠 Review Analysis – Optional Live Summary

The previous step already includes the full analytical output.
Readable summaries were pre-generated for a selected set of properties
to keep the experience fast and smooth.

To generate a readable summary for any other property:
- **Enter a Property ID**
- **Add an API key (Google API key recommended)**
- Run this cell to generate the explanation on demand

This step does not change the analysis — it only adds a human-readable summary.


In [0]:
# =============================
# USER INPUT
# =============================
# REQUIRED: set both PROPERTY_ID and GOOGLE_API_KEY

GOOGLE_API_KEY = "INSERT YOUR API KEY"

# Example - PROPERTY_ID = "1070313407533478024"  
PROPERTY_ID = "CHOOSE YOUR PROPERTY_ID HERE" 

# -----------------------------
# Visual params
# -----------------------------
MAX_SENTENCES_PER_ISSUE = 5
SHOW_SIMILARITY = True
GEMINI_MODEL = "models/gemini-2.5-pro"
API_SLEEP_SEC = 0.25
API_TIMEOUT_SEC = 120

# -----------------------------
# Helpers (same style)
# -----------------------------
def _esc(x):
    if x is None:
        return ""
    s = str(x)
    return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")

def _pct(x, digits=0):
    try:
        if x is None:
            return "—"
        return f"{round(float(x)*100, digits)}%"
    except Exception:
        return "—"

def _badge(text, bg="#eef2ff", fg="#1e3a8a", border="#c7d2fe"):
    text = _esc(text)
    return f"""<span style="display:inline-block;padding:3px 8px;border-radius:999px;
               background:{bg};color:{fg};border:1px solid {border};font-size:12px;">{text}</span>"""

def _priority_badge(p):
    p = (p or "").strip()
    if p == "P0":
        return _badge("P0 – Urgent", bg="#fee2e2", fg="#991b1b", border="#fecaca")
    if p == "P1":
        return _badge("P1 – Important", bg="#ffedd5", fg="#9a3412", border="#fed7aa")
    return _badge(p or "—", bg="#f3f4f6", fg="#374151", border="#e5e7eb")

def _extract_priority_from_llm_text(txt):
    if not txt:
        return None
    t = txt.upper()
    if "PRIORITY:" in t:
        after = t.split("PRIORITY:", 1)[1].strip()
        if after.startswith("P0"):
            return "P0"
        if after.startswith("P1"):
            return "P1"
    head = t[:200]
    if "P0" in head:
        return "P0"
    if "P1" in head:
        return "P1"
    return None

# -----------------------------
# Stop clearly if API key missing
# -----------------------------
if not GOOGLE_API_KEY:
    display(HTML("""
    <div style="font-family:Arial;max-width:900px;padding:16px;border:2px solid #fecaca;border-radius:14px;background:#fff1f2;">
      <div style="font-size:20px;font-weight:800;color:#991b1b;margin-bottom:8px;">⛔ Missing GOOGLE_API_KEY</div>
      <div style="color:#7f1d1d;">
        Please set <b>GOOGLE_API_KEY</b> in the dedicated cell (your own key) and rerun this cell.
      </div>
    </div>
    """))
    raise ValueError("GOOGLE_API_KEY is not set")

# -----------------------------
# Filter ONE property only
# -----------------------------
df_prop = review_df_no_api.filter(F.col("property_id") == str(PROPERTY_ID))

if df_prop.count() == 0:
    display(HTML(f"""
    <div style="font-family:Arial;max-width:900px;padding:16px;border:2px solid #fde68a;border-radius:14px;background:#fffbeb;">
      <div style="font-size:20px;font-weight:800;color:#92400e;margin-bottom:8px;">⚠️ Property not found</div>
      <div style="color:#92400e;">No rows found for property_id: <b>{_esc(PROPERTY_ID)}</b></div>
    </div>
    """))
    raise ValueError("Property not found in review_df")

seller_id = df_prop.select("seller_id").first()["seller_id"]

# -----------------------------
# Build payload (from review_df ONLY)
# -----------------------------
df_payload = (
    df_prop.orderBy(F.desc("neg_prob"))
           .limit(120)
           .select("sentence", "neg_prob", "issue")
)
payload = {
    "entity_type": "property",
    "property_id": str(PROPERTY_ID),
    "seller_id": str(seller_id),
    "sentences": [r.asDict(True) for r in df_payload.collect()]
}

# -----------------------------
# PROMPT (unchanged)
# -----------------------------
prompt_text = (
    "You are an expert hospitality operations consultant.\n\n"
    "Analyze the following Airbnb feedback for ONE property.\n"
    "Each item includes: sentence, neg_prob (probability the sentence is negative), and issue category.\n\n"
    "Return a short summary and practical recommendations for the host.\n\n"
    "OUTPUT FORMAT (strict):\n"
    "Summary: <2-3 concise sentences describing the main problems and their impact>\n"
    "Priority: P0 or P1 (P0 = urgent/critical, P1 = important but not critical)\n"
    "Practical recommendations:\n"
    "- <actionable recommendation 1>\n"
    "- <actionable recommendation 2>\n"
    "- <actionable recommendation 3>\n"
    "(2-6 bullets total; be specific and implementable, e.g., noise -> door seals / double glazing / thick curtains)\n\n"
    "Do NOT return JSON. Do NOT add extra sections or titles. Do NOT cite sentence_ids.\n\n"
    f"DATA:\n{json.dumps(payload, ensure_ascii=False, indent=2)}"
)

# -----------------------------
# CALL GEMINI
# -----------------------------
url = f"https://generativelanguage.googleapis.com/v1beta/{GEMINI_MODEL}:generateContent?key={GOOGLE_API_KEY}"
body = {"contents": [{"role": "user", "parts": [{"text": prompt_text}]}]}
resp = requests.post(url, json=body, timeout=int(API_TIMEOUT_SEC))
resp.raise_for_status()
llm_text = resp.json()["candidates"][0]["content"]["parts"][0]["text"]
time.sleep(float(API_SLEEP_SEC))

# -----------------------------
# Prepare issues + sentences (all rows for that property)
# top-N sentences per issue for readability
# -----------------------------
order_cols = []
if "issue_sim" in df_prop.columns:
    order_cols.append(F.desc("issue_sim"))
order_cols.append(F.desc("neg_prob"))

w_sent = Window.partitionBy("property_id", "seller_id", "issue").orderBy(*order_cols)
ranked = df_prop.withColumn("sent_rank", F.row_number().over(w_sent)) \
               .filter(F.col("sent_rank") <= F.lit(int(MAX_SENTENCES_PER_ISSUE)))

select_cols = ["issue", "mentions", "avg_neg_prob", "sentence", "neg_prob"]
if "avg_issue_sim" in ranked.columns:
    select_cols.insert(select_cols.index("avg_neg_prob"), "avg_issue_sim")
if "issue_sim" in ranked.columns:
    select_cols.insert(select_cols.index("neg_prob"), "issue_sim")

rows = ranked.select(*select_cols).orderBy("issue", "sent_rank").collect()

issues = {}
for r in rows:
    d = r.asDict()
    iss = d["issue"]
    issues.setdefault(iss, {
        "mentions": d.get("mentions"),
        "avg_neg_prob": d.get("avg_neg_prob"),
        "avg_issue_sim": d.get("avg_issue_sim") if "avg_issue_sim" in d else None,
        "sentences": []
    })
    issues[iss]["sentences"].append({
        "sentence": d.get("sentence"),
        "neg_prob": d.get("neg_prob"),
        "issue_sim": d.get("issue_sim") if "issue_sim" in d else None
    })

# -----------------------------
# Render HTML (same look)
# -----------------------------
pr = _extract_priority_from_llm_text(llm_text)
pr_badge = _priority_badge(pr) if pr else ""

html = f"""
<div style="font-family: Arial, sans-serif; line-height:1.4; max-width: 1100px;">
  <h2 style="margin:0 0 14px 0;">🧠 What Guests Are Really Saying – Review Analysis</h2>

  <div style="border:1px solid #e6e6e6; border-radius:16px; padding:16px 16px 12px 16px; margin:14px 0;
              box-shadow:0 2px 10px rgba(0,0,0,0.05); background:#fff;">
    <div style="display:flex; align-items:center; justify-content:space-between; gap:12px;">
      <div style="font-size:18px; font-weight:700;">
        Property <span style="color:#0b57d0;">{_esc(PROPERTY_ID)}</span>
      </div>
      <div>{pr_badge}</div>
    </div>

    <div style="margin-top:6px; color:#555;">
      <span style="margin-right:10px;"><b>Seller:</b> {_esc(seller_id)}</span>
    </div>

    <div style="margin-top:12px; background:#f7f7fb; border:1px solid #ececf6; padding:12px 12px; border-radius:12px;">
      <div style="font-weight:700; margin-bottom:6px;">Review Analysis Summary & Recommendations</div>
      <div style="white-space:pre-wrap; color:#111;">{_esc(llm_text)}</div>
    </div>

    <div style="margin-top:14px;">
      <div style="font-weight:700; margin-bottom:8px;">Issues Identified from Guest Reviews</div>
"""

j = 1
for iss, meta in issues.items():
    mentions = meta.get("mentions")
    avg_neg = meta.get("avg_neg_prob")
    avg_sim = meta.get("avg_issue_sim")

    chip_mentions = _badge(f"mentions: {mentions}", bg="#f3f4f6", fg="#111827", border="#e5e7eb")
    chip_neg = _badge(f"negativity: {_pct(avg_neg)}", bg="#fff7ed", fg="#9a3412", border="#fed7aa")

    chip_sim = ""
    if SHOW_SIMILARITY and (avg_sim is not None):
        chip_sim = _badge(f"match: {_pct(avg_sim)}", bg="#ecfeff", fg="#155e75", border="#a5f3fc")

    html += f"""
      <div style="padding:10px 10px; border:1px solid #f0f0f0; border-radius:12px; margin:10px 0; background:#fcfcfd;">
        <div style="display:flex; align-items:center; justify-content:space-between; gap:10px; flex-wrap:wrap;">
          <div style="font-weight:700; color:#111827;">{j}. {_esc(iss)}</div>
          <div style="display:flex; gap:8px; flex-wrap:wrap;">
            {chip_mentions}
            {chip_neg}
            {chip_sim}
          </div>
        </div>

        <ul style="margin:8px 0 0 18px; padding:0;">
    """

    for s in meta["sentences"]:
        sent = _esc(s.get("sentence") or "")
        negp = _pct(s.get("neg_prob"), digits=0)

        simp = ""
        if SHOW_SIMILARITY and (s.get("issue_sim") is not None):
            simp = f" · match {_pct(s.get('issue_sim'), digits=0)}"

        html += f"""
          <li style="margin:8px 0;">
            <div style="color:#111827;">{sent}</div>
            <div style="color:#6b7280; font-size:12px; margin-top:2px;">
              negativity {negp}{simp}
            </div>
          </li>
        """

    html += """
        </ul>
      </div>
    """
    j += 1

html += """
    </div>
  </div>
</div>
"""

display(HTML(html))
