In [None]:
import pandas as pd
import numpy as np

weights = pd.Series(
    index=["Cost", "Commute Convenience", "Safety", "Noise", "Amenity Convenience", "Green Space Accessibility", "Job Opportunities", "Education Access", "Political Leaning"],
    data=[10, 1, 1, 1, 1, 1, 1, 1, 1]
  )

values = pd.DataFrame(
    index = ["Hudson Yards", "Morningside Heights", "Harlem"],
    data = {
        "Cost": [0.05, 0.30, 0.44],
        "Commute Convenience": [0.39, 0.21, 0.19],
        "Safety": [0.90, 0.57, 0.62],
        "Noise": [0.5, 0.5, 0.5],
        "Amenity Convenience": [0.39, 0.11, 0.08],
        "Green Space Accessibility": [0.69, 0.76, 0.74],
        "Job Opportunities": [0.91, 0.27, 0.28],
        "Education Access": [0.11, 0.1, 0.1],
        "Political Leaning": [0.35, 0.45, 0.55]
    }
)

costs = pd.Series(
    index = ["Hudson Yards", "Morningside Heights", "Harlem"],
    data = [18000, 16000, 10000]
)

# This is the overall quality of life rating, finding the "best" possible location to live in, based on the user's preferences
# In this mode, cost is a factor that is taken into consideration in the calculation. (To disable, set its weight to 0.)
# The function returns a value between 0 and 1, where 0 indicates minimal fit, and 1 indicates maximum fit.
# We can think of this as answering: "Where would I be most happy?"

def calculate_fit_index(location: str):
    qol_value = (weights * values.loc[location]).sum()
    weights_sum = weights.sum()
    return qol_value / weights_sum


# This is the best value rating, finding places that give you the most "bang for your buck".
# The QoL value is calculated WITHOUT taking cost into consideration.
# This QoL value is then divided by the cost value; hence, "bang for your buck". Or in other words: QoL per Dollar.
# We can think of this as answering: "Where is the best deal?"

def calculate_value_for_money(location: str):
    qol_value = (weights.drop(labels="Cost") * values.drop(columns="Cost").loc[location]).sum()
    cost = costs[location]

    # Note that we take the natural log of the cost of living, because the significance of cost is not linear.
    # For example, consider:
        # Neighborhood A: QoL = 0.5, Cost = 2 (Very Cheap). Ratio = 0.25
        # Neighborhood B: QoL = 1.0 (Perfect!), Cost = 5 (Average). Ratio = 0.20
    # If we use a linear scale for the cost, Neighborhood A has a better cost ratio.
    # Any neighborhood with a low cost value like 1 or 2, regardless of their actual fit, would get a very good result. We don't want that.
    return qol_value / np.log(cost)

In [None]:
print(calculate_fit_index("Harlem"))

0.4144444444444445
