In [2]:
import pandas as pd
import numpy as np

weights = pd.Series(
    index=["Commute Convenience", "Safety", "Noise", "Amenity Convenience", "Green Space Accessibility", "Job Opportunities", "Education Access", "Political Leaning"],
    data=[1, 1, 1, 1, 1, 1, 1, 1]
)

values = pd.DataFrame(
    index = ["Hudson Yards", "Morningside Heights", "Harlem"],
    data = {
        "Commute Convenience": [0.39, 0.21, 0.19],
        "Safety": [0.90, 0.57, 0.62],
        "Noise": [0.5, 0.5, 0.5],
        "Amenity Convenience": [0.39, 0.11, 0.08],
        "Green Space Accessibility": [0.69, 0.76, 0.74],
        "Job Opportunities": [0.91, 0.27, 0.28],
        "Education Access": [0.11, 0.1, 0.1],
        "Political Leaning": [0.35, 0.45, 0.55]
    }
)

costs = pd.Series(
    index = ["Hudson Yards", "Morningside Heights", "Harlem"],
    data = [18000, 16000, 10000]
)

In [3]:
# This is the overall quality of life rating, finding the "best" possible location to live in, based on the user's preferences
# In this mode, cost is a factor that is taken into consideration in the calculation. (To disable, set its weight to 0.)
# The function returns a value between 0 and 1, where 0 indicates minimal fit, and 1 indicates maximum fit.
# We can think of this as answering: "Where would I be most happy?"

def calculate_fit_index(location: str):
    qol_value = (weights * values.loc[location]).sum()
    weights_sum = weights.sum()
    return qol_value / weights_sum


# This is the best value rating, finding places that give you the most "bang for your buck".
# The QoL value is calculated WITHOUT taking cost into consideration.
# This QoL value is then divided by the cost value; hence, "bang for your buck". Or in other words: QoL per Dollar.
# We can think of this as answering: "Where is the best deal?"

def calculate_return_on_investment(location: str):
    qol_value = (weights.drop(labels="Cost") * values.drop(columns="Cost").loc[location]).sum()
    cost = costs[location]

    # Note that we take the natural log of the cost of living, because the significance of cost is not linear.
    # For example, consider:
        # Neighborhood A: QoL = 0.5, Cost = 2 (Very Cheap). Ratio = 0.25
        # Neighborhood B: QoL = 1.0 (Perfect!), Cost = 5 (Average). Ratio = 0.20
    # If we use a linear scale for the cost, Neighborhood A has a better cost ratio.
    # Any neighborhood with a low cost value like 1 or 2, regardless of their actual fit, would get a very good result. We don't want that.
    return qol_value / np.log(cost)

In [None]:
for location in values.index.tolist():
    values.loc[location, "Fit Index"] = calculate_fit_index(location)

for location in values.index.tolist():
    values.loc[location, "ROI"] = calculate_return_on_investment(location)

values.head()

KeyError: "['Cost'] not found in axis"

In [None]:
print(calculate_fit_index("Harlem"))

0.4144444444444445


In [4]:
from google import genai

# Create a single client object
client = genai.Client(api_key="AIzaSyCtdxcowK6rV1aBg27bZdOl680qUOEL8ag")
user_inputs = []

user_inputs.append(input(f"Please describe what you are looking for in a residential neighborhood. \nPotential topics include (but not limited to): \n{", ".join(weights.index.tolist())}.\n"))

# Access API methods through services on the client object
response = client.models.generate_content(
    model='gemini-2.5-flash',
    contents=f"""
    You are a consultant, and your client is describing what they are looking for in a residential neighborhood.
    Here is their description: {user_inputs[0]}. Your goal is to understand how important the following categories (i.e. location factors) are to the user:
    {", ".join(weights.index.tolist())}.
    You may ask the user up to three clarifying questions, that will help you better understand their preferences.
    Choose and phrase your questions carefully to maximize how much clarifying information you can get.
    It may be in your best interests to ask questions about categories the user may not have mentioned preferences for or against.\n
    """
)

user_inputs.append(input(f"{response.text}\n"))

response = client.models.generate_content(
    model='gemini-2.5-flash',
    contents=f"""
    You are a consultant, and your client is describing what they are looking for in a residential neighborhood.
    Here is their description: {user_inputs[0]}. You then asked this client the following clarifying questions: {response.text}, and they responded with the following: {user_inputs[1]}.
    Now, based on the description your client gave earlier, and their response to your questions, you will assign a weight to each of the following categories:
    {", ".join(weights.index.tolist())}.
    The higher the weight, the more important you believe that category is to the user.
    The weights can be decimals, but all of them must still sum to the number of categories we've just provided.
    Your response must be a string of space-delimited decimals, such as: 0.2 0.3 0.4
    Each decimal will correspond with the respective category. For instance, the first decimal in your string of decimals corresponds to {weights.index.tolist()[0]}.\n
    """
)

new_weights = [float(x) for x in response.text.split()]
weights = pd.Series(
    index=["Commute Convenience", "Safety", "Noise", "Amenity Convenience", "Green Space Accessibility", "Job Opportunities", "Education Access", "Political Leaning"],
    data=new_weights
)

Please describe what you are looking for in a residential neighborhood. 
Potential topics include (but not limited to): 
Commute Convenience, Safety, Noise, Amenity Convenience, Green Space Accessibility, Job Opportunities, Education Access, Political Leaning.
JOb, Education, safety
Thank you for sharing your initial thoughts on what's important. "Job, Education, safety" gives us a great starting point. To help me understand your preferences more deeply and identify the perfect neighborhood, I have a few clarifying questions:

1.  You've highlighted **Job, Education, and Safety** as crucial. Could you help me understand their relative priority â€“ perhaps ranking them or telling me if any are non-negotiable? Building on that, how important are daily practicalities such as your **commute convenience** and easy **access to amenities** like shops and services?
2.  Regarding the living environment, what are your preferences concerning **noise levels** in the neighborhood, and how important

In [5]:
weights.head(15)

Unnamed: 0,0
Commute Convenience,1.0
Safety,1.5
Noise,0.5
Amenity Convenience,1.0
Green Space Accessibility,0.5
Job Opportunities,1.5
Education Access,1.5
Political Leaning,0.5
