# Imports

In [2]:
from pathlib import Path
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
import pandas as pd
DATA =  Path("..") / "datasets" / "VMFEAT"# adjust if needed

In [3]:
foi_points = gpd.read_file(DATA / "FOI_POINT.shp")  

# Cleaning and preprocess for FOI POINTS

In [4]:
cols_to_drop = [
    "UFI", "PFI", "FEATURE_ID", "PARENTFTID",
    "SUPER_PFI", "CRDATE_PFI", "CRDATE_UFI",
    "FEATURE_UF", "FEATURE_CR", "NAME_LABEL",
    "PARENTNAME", "VICNMSTATC" , "CHILDEXIST",
    "AUTHORGC", "AUTHORGID", "AUTHORGVER",
    "VMADD_PFI", "VICNAMESID" ,"THEME1","THEME2",
    "FEATSTATUS" 
]

foi_points_clean = foi_points.drop(columns=[c for c in cols_to_drop if c in foi_points.columns])
foi_points_clean = foi_points_clean[foi_points_clean["STATE"].str.upper() == "VIC"].copy()

Map each FOI points to SA2 code and name

In [5]:
from shapely.geometry import MultiPoint
def to_point(g):
    return g.geoms[0] if isinstance(g, MultiPoint) and len(g.geoms) > 0 else g
foi_points_clean["geometry"] = foi_points_clean.geometry.apply(to_point)

# Load ABS SA2 2021 polygons
sa2 = gpd.read_file(DATA / "SA2_2021_AUST_GDA2020.shp")

# Ensure CRS match
foi_points_clean = foi_points_clean.to_crs(sa2.crs)

# Spatial join: assign SA2 to each FOI
foi_points_clean = gpd.sjoin(
    foi_points_clean,
    sa2[["SA2_CODE21","SA2_NAME21","geometry"]],
    how="left",
    predicate="intersects"
).drop(columns=["index_right"])


In [6]:
#For the points that is not within any boundary we put them to the closest point
needs = foi_points_clean["SA2_CODE21"].isna()
if needs.any():
    sa2_pts = sa2.copy()
    sa2_pts["geometry"] = sa2_pts.geometry.representative_point()
    fix = gpd.sjoin_nearest(
        foi_points_clean.loc[needs, ["geometry"]],
        sa2_pts[["SA2_CODE21","SA2_NAME21","geometry"]],
        how="left",
        distance_col="dist_to_sa2_m"
    )
    foi_points_clean.loc[needs, ["SA2_CODE21","SA2_NAME21"]] = fix[["SA2_CODE21","SA2_NAME21"]].values




In [7]:
foi_gdf = gpd.GeoDataFrame(
    foi_points_clean,
    geometry="geometry",  # use the existing geometry column as-is
    crs="EPSG:4326"
)

foi_gdf = foi_gdf.to_crs(epsg=3857)

#Group the categories so that we can find the nearest distance
#  school, hospital,entertainment, grocery stor, melbourne cbd/ melb central
education =['primary school', 'secondary school', 'primary/secondary school','university']

health = ['maternal/child health centre', 'community health centre', 'day procedure centre', 'disability support centre',
          'general hospital', 'general hospital (emergency)',
          'bush nursing hospital', 'ambulance station']

tourist = ['tourist information centre', 'tourist attraction']

cultural = ['church', 'mosque', 'monastry', 'vihara (buddhist)', 'mandir (hindu)']

def assign_group(category):
    if category in education:
        return 'education'
    elif category in health:
        return 'health'
    elif category in tourist:
        return 'tourist'
    elif category in cultural:
        return 'cultural'
    else:
        return 'others'

foi_gdf['group'] = foi_gdf['FEATSUBTYP'].apply(assign_group)

IMPORT DOMAIN DATASET 

In [8]:
domain_df = pd.read_csv('/home/eeamanda/project-2-group-real-estate-industry-project-7-2025/datasets/domain_cleaned.csv')
#Creating as geodataframe to read in the latitude and longitude 
domain_gdf = gpd.GeoDataFrame(
    domain_df,
    geometry=gpd.points_from_xy(domain_df.lon, domain_df.lat),
    crs="EPSG:4326"  # WGS84 (lat/lon)
)

foi_gdf = gpd.GeoDataFrame(
    foi_points_clean,
    geometry="geometry",  # use the existing geometry column as-is
    crs="EPSG:4326"
)
domain_gdf = domain_gdf.to_crs(epsg=3857)

# CHECKING THE SHORTEST DISTANCE USING OPENROUTHREVICE

In [None]:
from shapely.geometry import Point
import openrouteservice
import numpy as np
import time

# ORS client
client = openrouteservice.Client(key="") #Enter key in the ""

In [None]:
import time
import numpy as np
import geopandas as gpd
import openrouteservice as ors
from openrouteservice.exceptions import ApiError
from math import radians, cos, sin, asin, sqrt


# Config
MAX_ROUTES = 3500      # ORS max per request
BATCH_DELAY = 2        # seconds between requests
DAILY_RESET_WAIT = 60 * 60  # wait 1h if daily quota exceeded
MAX_RETRIES = 3        # retry attempts per batch

# ----------------------------------------------------
# Utility: chunk a list into smaller lists
def chunk_list(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i+n]

# Utility: Haversine distance in meters
def haversine(lon1, lat1, lon2, lat2):
    R = 6371000  # Earth radius in meters
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    return 2 * R * asin(sqrt(a))

# ----------------------------------------------------
# Safe ORS distance matrix with retries + fallback
def safe_matrix(origins, destinations):
    for attempt in range(MAX_RETRIES):
        try:
            result = client.distance_matrix(
                locations=origins + destinations,
                profile="driving-car",
                sources=list(range(len(origins))),
                destinations=list(range(len(origins), len(origins) + len(destinations))),
                metrics=["distance"]
            )
            return result["distances"]

        except ApiError as e:
            err_msg = str(e)

            if "Quota exceeded" in err_msg or "403" in err_msg:
                print("⚠️ Daily quota exceeded. Waiting before retry...")
                time.sleep(DAILY_RESET_WAIT)

            elif "429" in err_msg or "Rate limit" in err_msg:
                print("⚠️ Rate limit exceeded. Waiting 10s...")
                time.sleep(10)

            else:
                print(f"⚠️ ORS error: {err_msg}. Retrying in 5s...")
                time.sleep(5)

    # Fallback: return straight-line (Haversine) distances
    print("⚠️ ORS failed after retries. Using Haversine fallback.")
    distances = []
    for (lon1, lat1) in origins:
        row = [haversine(lon1, lat1, lon2, lat2) for (lon2, lat2) in destinations]
        distances.append(row)
    return distances

# ----------------------------------------------------
# Main nearest POI calculation
def nearest_poi_batch(domain_points, foi_points, max_dest=50):
    results = []

    # Limit FOIs if too many (pre-filtering)
    if len(foi_points) > max_dest:
        foi_points = foi_points[:max_dest]

    # Max origins allowed in one batch
    max_origins = max(1, MAX_ROUTES // len(foi_points))

    for domain_chunk in chunk_list(domain_points, max_origins):
        distances = safe_matrix(domain_chunk, foi_points)
        if distances is None:
            print(f"⚠️ Failed batch {domain_chunk[:3]}... skipping")
            continue

        for row in distances:
            nearest = min(row) if row else None
            results.append(nearest)

        time.sleep(BATCH_DELAY)

    return results

# ----------------------------------------------------
# Prepare data
# Ensure both domain and foi are in EPSG:4326 (lat/lon)
domain_gdf = domain_gdf.to_crs(epsg=4326)
foi_gdf = foi_gdf.to_crs(epsg=4326)

# Extract lon/lat
foi_gdf["lon"] = foi_gdf.geometry.x
foi_gdf["lat"] = foi_gdf.geometry.y

# Domain points list
domain_points = list(zip(domain_gdf["lon"], domain_gdf["lat"]))

# ----------------------------------------------------
# Run nearest POI per category
for cat in ["education", "health", "tourist", "cultural"]:
    foi_points_cat = list(zip(
        foi_gdf.loc[foi_gdf["FEATSUBTYP"] == cat, "lon"],
        foi_gdf.loc[foi_gdf["FEATSUBTYP"] == cat, "lat"]
    ))

    if len(foi_points_cat) == 0:
        print(f"⚠️ No FOIs found for category {cat}. Skipping.")
        domain_gdf[f"nearest_{cat}_dist_m"] = np.nan
        continue

    print(f"Processing {cat} with {len(foi_points_cat)} FOIs...")
    distances = nearest_poi_batch(domain_points, foi_points_cat, max_dest=50)
    domain_gdf[f"nearest_{cat}_dist_m"] = distances

# ----------------------------------------------------
# Save results
domain_gdf.to_csv("domain_with_nearest_pois.csv", index=False)
print("✅ Saved results to domain_with_nearest_pois.csv")



KeyError: 'group'

In [11]:
foi_gdf

Unnamed: 0,FTYPE,FEATSUBTYP,NAME,STATE,geometry,SA2_CODE21,SA2_NAME21,lon,lat
0,control point,survey monument,,VIC,POINT (143.52433 -38.84588),217031476,Otway,143.524329,-38.845880
1,control point,survey monument,,VIC,POINT (146.16829 -36.72986),204021064,Benalla Surrounds,146.168293,-36.729864
2,control point,survey monument,,VIC,POINT (145.1764 -37.09963),204011060,Seymour Surrounds,145.176397,-37.099626
3,control point,survey monument,,VIC,POINT (144.12032 -37.4246),201021011,Daylesford,144.120324,-37.424595
4,control point,survey monument,,VIC,POINT (144.52872 -36.06312),216011408,Lockington - Gunbower,144.528723,-36.063124
...,...,...,...,...,...,...,...,...,...
50672,place of worship,church,,VIC,POINT (146.38005 -36.57062),204021067,Wangaratta Surrounds,146.380055,-36.570616
50673,place of worship,church,APSLEY CATHOLIC CHURCH,VIC,POINT (141.08392 -36.96737),215011393,West Wimmera,141.083922,-36.967375
50675,community space,camp ground,LODDON FLOODWAY - MIDDLE BEND CAMPING AREA,VIC,POINT (143.69958 -35.3812),215031405,Swan Hill Surrounds,143.699581,-35.381201
50676,health facility,maternal/child health centre,DIGGERS REST MATERNAL AND CHILD HEALTH,VIC,POINT (144.70974 -37.62036),210041539,Diggers Rest,144.709739,-37.620356


In [None]:
"""
# Ensure foi_gdf has lon/lat columns
foi_gdf["lon"] = foi_gdf.geometry.x
foi_gdf["lat"] = foi_gdf.geometry.y

# Define domain_points from domain_gdf
domain_points = list(zip(domain_gdf["lon"], domain_gdf["lat"]))

for cat in ["education", "health", "tourist", "cultural"]:
    foi_points_cat = list(zip(
        foi_gdf.loc[foi_gdf["group"] == cat, "lon"],
        foi_gdf.loc[foi_gdf["group"] == cat, "lat"]
    ))
    
    print(f"Processing {cat}...")
    distances = nearest_poi_batch(domain_points, foi_points_cat, max_dest=50)
    domain_gdf[f"nearest_{cat}_dist_m"] = distances
"""
