In [15]:
import pandas as pd
import numpy as np
import folium
from geopy.distance import geodesic
import joblib  # to load the trained price model


## 1. Data Input

In [16]:
df = pd.read_csv("../../data/processed/cleaned_resale_data.csv")

# Using simulated data for demonstration purposes first
df["predicted_price"] = df["resale_price"] * np.random.uniform(0.95, 1.05, len(df))  # Simulated


## 2. Budget Input

In [17]:
budget_temp = 500000

## 3. Filter Affordable choices

In [18]:
def get_affordable_flats(predicted_price, resale_df, top_n=50):
    """
    Filters and returns flats within the user's predicted price range.
    """
    affordable = resale_df[resale_df["predicted_price"] <= predicted_price].copy()
    print(f"✅ Found {len(affordable)} flats under budget ${predicted_price:,.0f}")
    return affordable.head(top_n)

affordable_flats = get_affordable_flats(budget_temp, df)
affordable_flats.head()


✅ Found 110253 flats under budget $500,000


Unnamed: 0,time,address,storey_avg,floor_area_sqm,flat_type_encoded,flat_model,remaining_lease_months,resale_price,predicted_price
0,0,406 ANG MO KIO AVE 10,11,44.0,2,Improved,736,232000.0,221599.015931
1,0,108 ANG MO KIO AVE 4,2,67.0,3,New Generation,727,250000.0,250783.865789
2,0,602 ANG MO KIO AVE 5,2,67.0,3,New Generation,749,262000.0,263064.640186
3,0,465 ANG MO KIO AVE 10,5,68.0,3,New Generation,745,265000.0,268641.89239
4,0,601 ANG MO KIO AVE 5,2,67.0,3,New Generation,749,265000.0,270991.420344


## Getting coordinates

In [19]:
# Using Simulated Data for Housing Locations, MRT and Schools
# The OneMap API is too slow in this environment, so we will simulate the data instead.
SG_LAT_MIN, SG_LAT_MAX = 1.22, 1.47
SG_LNG_MIN, SG_LNG_MAX = 103.6, 104.0

np.random.seed(42)
affordable_flats["lat"] = np.random.uniform(SG_LAT_MIN, SG_LAT_MAX, len(affordable_flats))
affordable_flats["lng"] = np.random.uniform(SG_LNG_MIN, SG_LNG_MAX, len(affordable_flats))

mrt_df = pd.DataFrame({
    "station": [f"MRT_{i}" for i in range(30)],
    "lat": np.random.uniform(SG_LAT_MIN, SG_LAT_MAX, 30),
    "lng": np.random.uniform(SG_LNG_MIN, SG_LNG_MAX, 30)
})


schools_df = pd.DataFrame({
    "school": [f"School_{i}" for i in range(50)],
    "lat": np.random.uniform(SG_LAT_MIN, SG_LAT_MAX, 50),
    "lng": np.random.uniform(SG_LNG_MIN, SG_LNG_MAX, 50)
})

amenities_df = pd.DataFrame({
    "amenity": [f"Amenity_{i}" for i in range(20)],
    "lat": np.random.uniform(SG_LAT_MIN, SG_LAT_MAX, 20),
    "lng": np.random.uniform(SG_LNG_MIN, SG_LNG_MAX, 20)
})





## Add Scoring Function

In [20]:
from geopy.distance import geodesic

def score_location(lat, lng, mrt_df, schools_df, amenities_df):
    def count_within(df, radius_km):
        return df.apply(lambda row: geodesic((lat, lng), (row["lat"], row["lng"])).km <= radius_km, axis=1).sum()

    score = 0
    score += min(count_within(mrt_df, 0.8), 3) * 2        # MRT: up to 6 pts
    score += min(count_within(schools_df, 1.0), 3) * 1.5  # Schools: up to 4.5 pts
    score += min(count_within(amenities_df, 1.0), 2) * 1  # Amenities: up to 2 pts
    
    return round(min(score, 10), 2)

In [21]:
affordable_flats["score"] = affordable_flats.apply(
    lambda row: score_location(row["lat"], row["lng"], mrt_df, schools_df, amenities_df), axis=1
)

top_flats = affordable_flats.sort_values(by="score", ascending=False).head(20)
top_flats[["address", "predicted_price", "score", "lat", "lng"]]


Unnamed: 0,address,predicted_price,score,lat,lng
13,219 ANG MO KIO AVE 1,310740.657916,3.5,1.273085,103.742701
27,256 ANG MO KIO AVE 4,338742.644999,2.5,1.348559,103.629618
41,463 ANG MO KIO AVE 10,438832.757914,2.0,1.343794,103.885298
16,570 ANG MO KIO AVE 3,297268.588332,2.0,1.296061,103.65637
30,302 ANG MO KIO AVE 3,367627.780914,1.5,1.371886,103.945241
61,151 BEDOK RESERVOIR RD,285026.398938,1.5,1.356678,103.610168
62,54 CHAI CHEE ST,283810.207379,1.5,1.266214,103.643157
31,575 ANG MO KIO AVE 10,384669.229142,1.5,1.262631,103.849319
56,101 BEDOK NTH AVE 4,226730.969191,1.5,1.44733,103.824511
3,465 ANG MO KIO AVE 10,268641.89239,1.0,1.369665,103.957931


In [22]:
#Alt: Calling OneMap API to get coordinates for addresses
import requests
import time
import pandas as pd

def get_coordinates(address):
    """Fetch coordinates (lat, lng) for a given address using OneMap API."""
    url = "https://www.onemap.gov.sg/api/common/elastic/search"
    params = {
        "searchVal": address,
        "returnGeom": "Y",
        "getAddrDetails": "Y",
        "pageNum": 1
    }
    try:
        response = requests.get(url, params=params, timeout=5)
        results = response.json().get("results", [])
        if results:
            lat = float(results[0]["LATITUDE"])
            lng = float(results[0]["LONGITUDE"])
            return lat, lng
    except Exception as e:
        print(f"❌ Error fetching {address}: {e}")
    return None, None

In [23]:
import folium

# Center map on Singapore
m = folium.Map(location=[1.35, 103.82], zoom_start=12)

# Plot top N affordable flats
for _, row in top_flats.iterrows():
    folium.Marker(
        location=[row["lat"], row["lng"]],
        popup=folium.Popup(
            f"<b>{row['address']}</b><br>"
            f"Predicted Price: ${int(row['predicted_price']):,}<br>"
            f"Score: {row['score']}/10",
            max_width=300
        ),
        icon=folium.Icon(color="green", icon="home", prefix="fa")
    ).add_to(m)

m  # This displays the map in the notebook


## A brief Overview and Summary

- Reducing time spent on requests remains a challenge, and has to be replaced with simulated data here to avoid the runtime of each iteration to be crazy

- For further improvement, visualization can be improved to work better with real data instead of simulated ones. UI components can also be added to the map s.t. users can click into each housing choice to check the details