In [None]:
import pandas as pd
import numpy as np

weights = pd.Series(
    index=["Commute Convenience", "Safety", "Noise", "Amenity Convenience", "Green Space Accessibility", "Job Opportunities", "Education Access", "Political Leaning"],
    data=[1, 1, 1, 1, 1, 1, 1, 1]
)

values = pd.DataFrame(
    index = ["Hudson Yards", "Morningside Heights", "Harlem"],
    data = {
        "Commute Convenience": [0.39, 0.21, 0.19],
        "Safety": [0.90, 0.57, 0.62],
        "Noise": [0.5, 0.5, 0.5],
        "Amenity Convenience": [0.39, 0.11, 0.08],
        "Green Space Accessibility": [0.69, 0.76, 0.74],
        "Job Opportunities": [0.91, 0.27, 0.28],
        "Education Access": [0.11, 0.1, 0.1],
        "Political Leaning": [0.35, 0.45, 0.55]
    }
)

costs = pd.Series(
    index = ["Hudson Yards", "Morningside Heights", "Harlem"],
    data = [18000, 16000, 10000]
)

In [None]:
# This is the overall quality of life rating, finding the "best" possible location to live in, based on the user's preferences
# In this mode, cost is a factor that is taken into consideration in the calculation. (To disable, set its weight to 0.)
# The function returns a value between 0 and 1, where 0 indicates minimal fit, and 1 indicates maximum fit.
# We can think of this as answering: "Where would I be most happy?"

def calculate_fit_index(location: str):
    qol_value = (weights * values.loc[location]).sum()
    weights_sum = weights.sum()
    return qol_value / weights_sum


# This is the best value rating, finding places that give you the most "bang for your buck".
# The QoL value is calculated WITHOUT taking cost into consideration.
# This QoL value is then divided by the cost value; hence, "bang for your buck". Or in other words: QoL per Dollar.
# We can think of this as answering: "Where is the best deal?"

def calculate_return_on_investment(location: str):
    qol_value = (weights.drop(labels="Cost") * values.drop(columns="Cost").loc[location]).sum()
    cost = costs[location]

    # Note that we take the natural log of the cost of living, because the significance of cost is not linear.
    # For example, consider:
        # Neighborhood A: QoL = 0.5, Cost = 2 (Very Cheap). Ratio = 0.25
        # Neighborhood B: QoL = 1.0 (Perfect!), Cost = 5 (Average). Ratio = 0.20
    # If we use a linear scale for the cost, Neighborhood A has a better cost ratio.
    # Any neighborhood with a low cost value like 1 or 2, regardless of their actual fit, would get a very good result. We don't want that.
    return qol_value / np.log(cost)

In [None]:
for location in values.index.tolist():
    values.loc[location, "Fit Index"] = calculate_fit_index(location)

for location in values.index.tolist():
    values.loc[location, "ROI"] = calculate_return_on_investment(location)

values.head()

In [None]:
"""
RENTSENSE - Question Engine v2
Hackathon: NexHacks 2026

Gemini-powered contextual question generation.
Questions feel personal, not like a survey.
"""

import json
from google import genai

# ============================================================
# CONFIG
# ============================================================

API_KEY = "API_KEY"  # Replace with your key

DIMENSIONS = [
    "Commute Convenience",
    "Safety",
    "Noise",
    "Amenity Convenience",
    "Green Space Accessibility",
    "Job Opportunities",
    "Education Access",
    "Political Leaning"
]

# ============================================================
# GEMINI CLIENT
# ============================================================

client = genai.Client(api_key=API_KEY)

def call_gemini(prompt):
    """Call Gemini API and return response text."""
    response = client.models.generate_content(
        model='gemini-2.5-flash',
        contents=prompt
    )
    return response.text


# ============================================================
# QUESTION ENGINE v2
# ============================================================

class QuestionEngineV2:

    def __init__(self, mode="discovery"):
        self.mode = mode
        self.conversation_history = []

        # Initialize weights (8 dimensions, each starts at 1.0, sum = 8)
        self.weights = {dim: 1.0 for dim in DIMENSIONS}

        # Track state
        self.questions_asked = 0
        self.dimensions_covered = set()
        self.user_context = ""
        self.zip_code = None  # For migration mode

    def analyze_input(self, user_input):
        """Use Gemini to analyze what's CLEAR, AMBIGUOUS, and MISSING."""

        self.user_context = user_input
        self.conversation_history.append({"role": "user", "content": user_input})

        prompt = f"""
You are analyzing a user's housing preference input to understand what they care about.

USER INPUT: "{user_input}"

MODE: {self.mode} ({"new to the city" if self.mode == "discovery" else "already lives here, moving within city"})

DIMENSIONS TO EVALUATE:
{json.dumps(DIMENSIONS, indent=2)}

Analyze the input and categorize each dimension:

1. CLEAR - User explicitly mentioned this, we know their preference
2. AMBIGUOUS - User hinted at this but needs clarification
3. MISSING - User didn't mention this at all

Also extract any specific details (job location, budget mentioned, specific concerns, etc.)

Respond in this exact JSON format:
{{
    "clear": {{
        "dimension_name": {{"preference": "high/medium/low", "evidence": "what they said", "weight_delta": 1.5}},
        ...
    }},
    "ambiguous": {{
        "dimension_name": {{"hint": "what they hinted at", "needs_clarification": "what to ask"}},
        ...
    }},
    "missing": ["dimension1", "dimension2", ...],
    "extracted_details": {{
        "job_location": "if mentioned",
        "budget": "if mentioned",
        "specific_concerns": ["list of concerns"],
        "other": "any other relevant details"
    }}
}}

Respond with ONLY the JSON, no other text.
"""

        response = call_gemini(prompt)

        # Parse JSON (handle potential formatting issues)
        try:
            # Clean up response if needed
            response = response.strip()
            if response.startswith("```json"):
                response = response[7:]
            if response.startswith("```"):
                response = response[3:]
            if response.endswith("```"):
                response = response[:-3]

            analysis = json.loads(response.strip())

            # Apply weight deltas for CLEAR dimensions
            if "clear" in analysis:
                for dim, info in analysis["clear"].items():
                    if dim in self.weights and "weight_delta" in info:
                        self.weights[dim] += info["weight_delta"]
                        self.dimensions_covered.add(dim)

            # Normalize after applying deltas
            self._normalize_weights()

            return analysis

        except json.JSONDecodeError as e:
            print(f"Warning: Could not parse analysis JSON: {e}")
            print(f"Raw response: {response}")
            return {"clear": {}, "ambiguous": {}, "missing": DIMENSIONS, "extracted_details": {}}

    def generate_question(self, analysis):
        """Use Gemini to generate a contextual question based on analysis."""

        # Determine what to focus on
        if analysis.get("ambiguous"):
            focus = "ambiguous"
            target = list(analysis["ambiguous"].keys())[0]
            context = analysis["ambiguous"][target]
        elif analysis.get("missing"):
            focus = "missing"
            # Pick most important missing dimension
            priority_order = ["Safety", "Commute Convenience", "Noise", "Amenity Convenience",
                           "Green Space Accessibility", "Education Access", "Job Opportunities", "Political Leaning"]
            target = None
            for dim in priority_order:
                if dim in analysis["missing"] and dim not in self.dimensions_covered:
                    target = dim
                    break
            if not target:
                return None
            context = {}
        else:
            return None  # Nothing to ask

        prompt = f"""
You are a friendly housing consultant helping someone find their perfect neighborhood.

USER'S ORIGINAL INPUT: "{self.user_context}"

CONVERSATION SO FAR:
{json.dumps(self.conversation_history, indent=2)}

YOU NEED TO ASK ABOUT: {target}
REASON: {"User hinted at this but needs clarification" if focus == "ambiguous" else "User hasn't mentioned this yet"}
{f"HINT FROM USER: {context.get('hint', '')}" if focus == "ambiguous" else ""}

Generate a contextual, conversational question that:
1. References something specific from their input (makes it feel personal)
2. Asks about {target} in a natural way
3. Provides clear options with different preference levels

Choose the best FORMAT:
- MCQ_SINGLE: When there are distinct preference levels (use most often)
- MCQ_MULTI: When user might value multiple things (good for amenities, what they love/hate)
- YES_NO: For simple binary questions (do you have kids, do you have a car)

Respond in this exact JSON format:
{{
    "format": "MCQ_SINGLE" or "MCQ_MULTI" or "YES_NO",
    "dimension": "{target}",
    "question": "Your contextual question here",
    "options": [
        {{"id": "A", "label": "Option text", "weight_delta": 1.5}},
        {{"id": "B", "label": "Option text", "weight_delta": 0.5}},
        {{"id": "C", "label": "Option text", "weight_delta": -0.3}}
    ]
}}

RULES FOR weight_delta:
- High importance to user ‚Üí +1.0 to +1.5
- Medium importance ‚Üí +0.3 to +0.5
- Low importance / don't care ‚Üí -0.3 to -0.5
- For MCQ_MULTI, use smaller deltas (+0.3 to +0.5 each) since they can select multiple

Make the question feel PERSONAL and CONVERSATIONAL, not like a survey.
Respond with ONLY the JSON, no other text.
"""

        response = call_gemini(prompt)

        try:
            # Clean up response
            response = response.strip()
            if response.startswith("```json"):
                response = response[7:]
            if response.startswith("```"):
                response = response[3:]
            if response.endswith("```"):
                response = response[:-3]

            question_data = json.loads(response.strip())
            question_data["target_dimension"] = target
            return question_data

        except json.JSONDecodeError as e:
            print(f"Warning: Could not parse question JSON: {e}")
            print(f"Raw response: {response}")
            return None

    def process_answer(self, question_data, selected_ids):
        """Process user's answer and update weights."""

        self.questions_asked += 1

        # Find selected options and apply weight deltas
        for option in question_data["options"]:
            if option["id"] in selected_ids:
                dim = question_data.get("dimension") or question_data.get("target_dimension")
                if dim and dim in self.weights:
                    self.weights[dim] += option.get("weight_delta", 0)
                    self.dimensions_covered.add(dim)

        # Add to conversation history
        selected_labels = [opt["label"] for opt in question_data["options"] if opt["id"] in selected_ids]
        self.conversation_history.append({
            "role": "assistant",
            "content": question_data["question"]
        })
        self.conversation_history.append({
            "role": "user",
            "content": ", ".join(selected_labels)
        })

        # Normalize weights to sum to 8
        self._normalize_weights()

    def _normalize_weights(self):
        """Normalize weights to sum to 8."""
        total = sum(self.weights.values())
        if total > 0:
            self.weights = {k: (v / total) * 8 for k, v in self.weights.items()}

    def get_weights(self):
        """Return current weights, sorted by value (always normalized to sum to 8)."""
        self._normalize_weights()  # Ensure normalized before returning
        return dict(sorted(self.weights.items(), key=lambda x: -x[1]))

    def should_continue(self, max_questions=5):
        """Check if we should ask more questions."""
        if self.questions_asked >= max_questions:
            return False
        if len(self.dimensions_covered) >= len(DIMENSIONS) - 2:  # Most covered
            return False
        return True


# ============================================================
# MAIN RUNNER
# ============================================================

def run_discovery_mode():
    print("\n" + "=" * 60)
    print("   RENTSENSE - Discovery Mode")
    print("   New to NYC? Let's find your perfect neighborhood.")
    print("=" * 60)

    engine = QuestionEngineV2(mode="discovery")

    # Get initial input
    print("\nTell me about yourself and what you're looking for.")
    print("(Job location, concerns, lifestyle, budget, anything relevant)\n")
    user_input = input("You: ")

    # Analyze input
    print("\nüîç Understanding your needs...")
    analysis = engine.analyze_input(user_input)

    # Show what we understood
    if analysis.get("clear"):
        print("\n‚úì Got it! I understood:")
        for dim, info in analysis["clear"].items():
            print(f"   ‚Ä¢ {dim}: {info.get('evidence', 'mentioned')}")

    # Question loop
    max_questions = 4  # Discovery gets more questions

    while engine.should_continue(max_questions):
        # Generate contextual question
        question_data = engine.generate_question(analysis)

        if not question_data:
            break

        # Display question
        print(f"\n{'‚îÄ' * 50}")
        print(f"Question {engine.questions_asked + 1}")
        print(f"{'‚îÄ' * 50}")
        print(f"\n{question_data['question']}\n")

        for opt in question_data["options"]:
            print(f"  {opt['id']}) {opt['label']}")

        if question_data["format"] == "MCQ_MULTI":
            print(f"\n  (Select multiple, comma-separated: A,C)")

        print(f"\n  S) Skip")
        print(f"  X) Show results now")

        # Get answer
        answer = input("\nYour choice: ").strip().upper()

        if answer == "X":
            break
        elif answer == "S":
            engine.questions_asked += 1
            # Update analysis to try next dimension
            if question_data.get("target_dimension") in analysis.get("missing", []):
                analysis["missing"].remove(question_data["target_dimension"])
        else:
            selected = [a.strip() for a in answer.split(",")]
            valid_ids = [opt["id"] for opt in question_data["options"]]
            selected = [s for s in selected if s in valid_ids]

            if selected:
                engine.process_answer(question_data, selected)
                # Update analysis
                if question_data.get("target_dimension") in analysis.get("missing", []):
                    analysis["missing"].remove(question_data["target_dimension"])
                if question_data.get("target_dimension") in analysis.get("ambiguous", {}):
                    del analysis["ambiguous"][question_data["target_dimension"]]
            else:
                print("Invalid choice, skipping...")
                engine.questions_asked += 1

    # Show results
    show_results(engine)


def run_migration_mode():
    print("\n" + "=" * 60)
    print("   RENTSENSE - Migration Mode")
    print("   Already in NYC? Let's find somewhere better.")
    print("=" * 60)

    engine = QuestionEngineV2(mode="migration")

    # Get ZIP
    zip_code = input("\nWhat's your current ZIP code? ")
    engine.zip_code = zip_code

    # Get pros/cons
    print(f"\nüìç Got it ‚Äî {zip_code}")
    print("\nTell me: What do you love AND what would you change")
    print("about your current neighborhood?\n")
    user_input = input("You: ")

    # Analyze input
    print("\nüîç Understanding your experience...")
    analysis = engine.analyze_input(user_input)

    # Show what we understood
    if analysis.get("clear"):
        print("\n‚úì I heard you:")
        for dim, info in analysis["clear"].items():
            print(f"   ‚Ä¢ {dim}: {info.get('evidence', 'mentioned')}")

    # Question loop (fewer for migration)
    max_questions = 2

    while engine.should_continue(max_questions):
        question_data = engine.generate_question(analysis)

        if not question_data:
            break

        # Display question
        print(f"\n{'‚îÄ' * 50}")
        print(f"Question {engine.questions_asked + 1}")
        print(f"{'‚îÄ' * 50}")
        print(f"\n{question_data['question']}\n")

        for opt in question_data["options"]:
            print(f"  {opt['id']}) {opt['label']}")

        if question_data["format"] == "MCQ_MULTI":
            print(f"\n  (Select multiple, comma-separated: A,C)")

        print(f"\n  S) Skip")
        print(f"  X) Show results now")

        # Get answer
        answer = input("\nYour choice: ").strip().upper()

        if answer == "X":
            break
        elif answer == "S":
            engine.questions_asked += 1
            if question_data.get("target_dimension") in analysis.get("missing", []):
                analysis["missing"].remove(question_data["target_dimension"])
        else:
            selected = [a.strip() for a in answer.split(",")]
            valid_ids = [opt["id"] for opt in question_data["options"]]
            selected = [s for s in selected if s in valid_ids]

            if selected:
                engine.process_answer(question_data, selected)
                if question_data.get("target_dimension") in analysis.get("missing", []):
                    analysis["missing"].remove(question_data["target_dimension"])
                if question_data.get("target_dimension") in analysis.get("ambiguous", {}):
                    del analysis["ambiguous"][question_data["target_dimension"]]
            else:
                print("Invalid choice, skipping...")
                engine.questions_asked += 1

    # Show results
    show_results(engine)


def show_results(engine):
    """Display final weights."""
    weights = engine.get_weights()

    print(f"\n{'=' * 60}")
    print("YOUR PREFERENCE WEIGHTS (sum to 8)")
    print(f"{'=' * 60}\n")

    total = 0
    for dimension, weight in weights.items():
        bar_length = int(weight * 5)
        bar = "‚ñà" * bar_length + "‚ñë" * (10 - bar_length)
        print(f"  {dimension:28s} {weight:.2f}  {bar}")
        total += weight

    print(f"\n{'‚îÄ' * 60}")
    print(f"Total: {total:.2f}")
    print(f"Questions asked: {engine.questions_asked}")
    print(f"Dimensions covered: {len(engine.dimensions_covered)}/{len(DIMENSIONS)}")
    print(f"{'‚îÄ' * 60}")

    # Show what we learned
    print(f"\nüìã Summary:")
    top_3 = list(weights.items())[:3]
    print(f"   Your top priorities: {', '.join([d[0] for d in top_3])}")


# ============================================================
# MAIN
# ============================================================

if __name__ == "__main__":
    print("\n" + "=" * 60)
    print("   üè† RENTSENSE - Neighborhood Recommendation Engine")
    print("=" * 60)

    print("\nAre you new to NYC or already living here?\n")
    print("  1) I'm new to NYC (Discovery Mode)")
    print("  2) I already live here (Migration Mode)")

    choice = input("\nYour choice (1 or 2): ").strip()

    if choice == "2":
        run_migration_mode()
    else:
        run_discovery_mode()

    print("\n‚úì Done! Next step: Use these weights to score neighborhoods.\n")


   üè† RENTSENSE - Neighborhood Recommendation Engine

Are you new to NYC or already living here?

  1) I'm new to NYC (Discovery Mode)
  2) I already live here (Migration Mode)

Your choice (1 or 2): 2

   RENTSENSE - Migration Mode
   Already in NYC? Let's find somewhere better.

What's your current ZIP code? 11355

üìç Got it ‚Äî 11355

Tell me: What do you love AND what would you change
about your current neighborhood?

You: I like the access to amenities like supermarkets and parks, but I think the nearby education opportunities are not very good. I'd prefer a neighborhood with nearby schools

üîç Understanding your experience...

‚úì I heard you:
   ‚Ä¢ Amenity Convenience: I like the access to amenities like supermarkets
   ‚Ä¢ Green Space Accessibility: I like the access to amenities like supermarkets and parks
   ‚Ä¢ Education Access: I think the nearby education opportunities are not very good. I'd prefer a neighborhood with nearby schools

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚