# Imports

In [1]:
from pathlib import Path
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
import pandas as pd
DATA =  Path("..") / "datasets" / "VMFEAT"# adjust if needed

In [2]:
foi_points = gpd.read_file(DATA / "FOI_POINT.shp")  

# Cleaning and preprocess for FOI POINTS

In [3]:
cols_to_drop = [
    "UFI", "PFI", "FEATURE_ID", "PARENTFTID",
    "SUPER_PFI", "CRDATE_PFI", "CRDATE_UFI",
    "FEATURE_UF", "FEATURE_CR", "NAME_LABEL",
    "PARENTNAME", "VICNMSTATC" , "CHILDEXIST",
    "AUTHORGC", "AUTHORGID", "AUTHORGVER",
    "VMADD_PFI", "VICNAMESID" ,"THEME1","THEME2",
    "FEATSTATUS" 
]

foi_points_clean = foi_points.drop(columns=[c for c in cols_to_drop if c in foi_points.columns])
foi_points_clean = foi_points_clean[foi_points_clean["STATE"].str.upper() == "VIC"].copy()

Map each FOI points to SA2 code and name

In [4]:
from shapely.geometry import MultiPoint
def to_point(g):
    return g.geoms[0] if isinstance(g, MultiPoint) and len(g.geoms) > 0 else g
foi_points_clean["geometry"] = foi_points_clean.geometry.apply(to_point)

# Load ABS SA2 2021 polygons
sa2 = gpd.read_file(DATA / "SA2_2021_AUST_GDA2020.shp")

# Ensure CRS match
foi_points_clean = foi_points_clean.to_crs(sa2.crs)

# Spatial join: assign SA2 to each FOI
foi_points_clean = gpd.sjoin(
    foi_points_clean,
    sa2[["SA2_CODE21","SA2_NAME21","geometry"]],
    how="left",
    predicate="intersects"
).drop(columns=["index_right"])


In [None]:
#For the points that is not within any boundary we put them to the closest point
needs = foi_points_clean["SA2_CODE21"].isna ()
if needs.any():
    sa2_pts = sa2.copy()
    sa2_pts["geometry"] = sa2_pts.geometry.representative_point()
    fix = gpd.sjoin_nearest(
        foi_points_clean.loc[needs, ["geometry"]],
        sa2_pts[["SA2_CODE21","SA2_NAME21","geometry"]],
        how="left",
        distance_col="dist_to_sa2_m"
    )
    foi_points_clean.loc[needs, ["SA2_CODE21","SA2_NAME21"]] = fix[["SA2_CODE21","SA2_NAME21"]].values




In [6]:
foi_gdf = gpd.GeoDataFrame(
    foi_points_clean,
    geometry="geometry",  # use the existing geometry column as-is
    crs="EPSG:4326"
)

foi_gdf = foi_gdf.to_crs(epsg=3857)

#Group the categories so that we can find the nearest distance
#  school, hospital,entertainment, grocery stor, melbourne cbd/ melb central
education =['primary school', 'secondary school', 'primary/secondary school','university']

health = ['maternal/child health centre', 'community health centre', 'day procedure centre', 'disability support centre',
          'general hospital', 'general hospital (emergency)',
          'bush nursing hospital', 'ambulance station']

tourist = ['tourist information centre', 'tourist attraction']

cultural = ['church', 'mosque', 'monastry', 'vihara (buddhist)', 'mandir (hindu)']

def assign_group(category):
    if category in education:
        return 'education'
    elif category in health:
        return 'health'
    elif category in tourist:
        return 'tourist'
    elif category in cultural:
        return 'cultural'
    else:
        return 'others'

foi_gdf['group'] = foi_gdf['FEATSUBTYP'].apply(assign_group)

IMPORT DOMAIN DATASET 

In [7]:
domain_df = pd.read_csv('/home/eeamanda/project-2-group-real-estate-industry-project-7-2025/datasets/domain_cleaned.csv')
#Creating as geodataframe to read in the latitude and longitude 
domain_gdf = gpd.GeoDataFrame(
    domain_df,
    geometry=gpd.points_from_xy(domain_df.lon, domain_df.lat),
    crs="EPSG:4326"  # WGS84 (lat/lon)
)


# CHECKING THE SHORTEST DISTANCE USING OPENROUTHREVICE

In [87]:
from shapely.geometry import Point
import openrouteservice
import numpy as np
import time

# ORS client
client = openrouteservice.Client(key="eyJvcmciOiI1YjNjZTM1OTc4NTExMTAwMDFjZjYyNDgiLCJpZCI6IjA2MzEyMjUyMzJhYjRiZDY4ZGFkZDY3MmVhZTc4MGYyIiwiaCI6Im11cm11cjY0In0=") #Enter key in the ""

# Calculating the distance and time for the nearest Schools and hospitals

In [None]:
import os
import time
import numpy as np
import geopandas as gpd
import openrouteservice as ors
from openrouteservice.exceptions import ApiError

# =========================
# CONFIG
# =========================
WANTED_CATS = ["education", "health"]
K_NEAREST = 3
ORS_ELEMENTS_LIMIT = 3500          # sources × destinations per Matrix call
BATCH_DELAY = 2                    # polite delay between API calls (seconds)
OUTPUT_CSV = "nearest_drive_metrics2.csv"

# =========================
# UTILITIES
# =========================
def haversine_vec(orig_lons, orig_lats, dest_lons, dest_lats):
    """Return matrix of Haversine distances in meters, shape (n_origins, n_destinations)."""
    R = 6371000.0
    olon = np.radians(orig_lons)[:, None]
    olat = np.radians(orig_lats)[:, None]
    dlon = np.radians(dest_lons)[None, :]
    dlat = np.radians(dest_lats)[None, :]
    d_lon = dlon - olon
    d_lat = dlat - olat
    a = np.sin(d_lat/2.0)**2 + np.cos(olat) * np.cos(dlat) * np.sin(d_lon/2.0)**2
    return 2.0 * R * np.arcsin(np.sqrt(a))

def wait_until_matrix_metrics(origins, destinations):
    """
    Call ORS Matrix for driving distance (meters) and duration (seconds).
    Will WAIT/RETRY indefinitely until success. No fallback.
    Returns: (dist_m, dur_s) as np.ndarrays shape (n_origins, n_destinations).
    """
    while True:
        try:
            resp = client.distance_matrix(
                locations=origins + destinations,
                profile="driving-car",
                sources=list(range(len(origins))),
                destinations=list(range(len(origins), len(origins) + len(destinations))),
                metrics=["distance", "duration"]
            )
            dist = np.array(resp["distances"], dtype=float)
            dur  = np.array(resp["durations"], dtype=float)
            return dist, dur

        except ApiError as e:
            msg = str(e)
            # Backoff rules:
            if "Quota exceeded" in msg or "403" in msg:
                print("⚠️ ORS daily quota exceeded. Waiting 1 hour before retry...")
                time.sleep(60 * 60)
            elif "429" in msg or "Rate limit" in msg:
                print("⚠️ ORS rate limit hit. Waiting 10s before retry...")
                time.sleep(10)
            else:
                print(f"⚠️ ORS error: {msg}. Waiting 5s before retry...")
                time.sleep(5)
        except Exception as e:
            print(f"⚠️ Unexpected error: {e}. Waiting 10s before retry...")
            time.sleep(10)

def split_gdf(gdf, chunk_size=30, start=0):
    """Yield chunks of a GeoDataFrame."""
    for i in range(start, len(gdf), chunk_size):
        yield gdf.iloc[i:i+chunk_size].copy()

# =========================
# CORE
# =========================
def compute_nearest_drive_metrics_chunked(
    domain_gdf: gpd.GeoDataFrame,
    foi_gdf: gpd.GeoDataFrame,
    output_csv: str = OUTPUT_CSV,
    chunk_size: int = 30,
    start_row: int = 7470,
    limit_radius_m: float | None = None
):
    """
    For each domain row and each category:
      - pick K nearest FOIs by straight-line (optionally within radius),
      - dedupe FOIs across the chunk,
      - call ORS Matrix (distance+duration) in safe batches (no fallback),
      - store min driving distance (meters) and time (minutes):
        nearest_{cat}_dist_m, nearest_{cat}_time_mins
    """
    # Ensure WGS84 & lon/lat columns
    domain = domain_gdf.to_crs(epsg=4326).copy()
    foi = foi_gdf.to_crs(epsg=4326).copy()
    foi = foi[foi["group"].isin(WANTED_CATS)].copy()

    for g in (domain, foi):
        if "lon" not in g.columns or "lat" not in g.columns:
            g["lon"] = g.geometry.x
            g["lat"] = g.geometry.y

    # Pre-split FOIs by category (coords only)
    foi_by_cat = {}
    for cat in WANTED_CATS:
        sub = foi[foi["group"] == cat].copy()
        foi_by_cat[cat] = {
            "lon": sub["lon"].to_numpy(),
            "lat": sub["lat"].to_numpy()
        }

    # Fresh output
    if os.path.exists(output_csv):
        os.remove(output_csv)

    for ch_i, chunk in enumerate(split_gdf(domain, chunk_size=chunk_size, start=start_row)):
        print(f"Processing chunk {ch_i+1} (rows {chunk.index.min()}–{chunk.index.max()}), size={len(chunk)}")
        o_lon = chunk["lon"].to_numpy()
        o_lat = chunk["lat"].to_numpy()
        n_o = len(chunk)

        for cat in WANTED_CATS:
            dest_lon = foi_by_cat[cat]["lon"]
            dest_lat = foi_by_cat[cat]["lat"]

            if dest_lon.size == 0:
                chunk[f"nearest_{cat}_dist_m"] = np.full(n_o, np.nan)
                chunk[f"nearest_{cat}_time_mins"] = np.full(n_o, np.nan)
                continue

            # 1) Top-K by Haversine per origin (optionally within radius)
            dmat = haversine_vec(o_lon, o_lat, dest_lon, dest_lat)  # meters
            if limit_radius_m is not None:
                dmask = dmat.copy()
                dmask[dmask > float(limit_radius_m)] = np.inf
            else:
                dmask = dmat

            k = min(K_NEAREST, dmask.shape[1])
            topk_idx = np.argpartition(dmask, kth=k-1, axis=1)[:, :k]
            rows = np.arange(topk_idx.shape[0])[:, None]
            topk_sorted = topk_idx[rows, np.argsort(dmask[rows, topk_idx])]

            cand_sets = []
            for r in range(n_o):
                cands = [idx for idx in topk_sorted[r].tolist() if np.isfinite(dmask[r, idx])]
                cand_sets.append(set(cands))

            # 2) Deduplicate candidate destinations across the chunk
            dest_unique = sorted(set().union(*cand_sets))
            if len(dest_unique) == 0:
                chunk[f"nearest_{cat}_dist_m"] = np.full(n_o, np.nan)
                chunk[f"nearest_{cat}_time_mins"] = np.full(n_o, np.nan)
                continue

            dest_pos = {d: j for j, d in enumerate(dest_unique)}
            dest_list = [(float(dest_lon[d]), float(dest_lat[d])) for d in dest_unique]

            # 3) Matrix calls in element-safe batches
            n_d = len(dest_list)
            sources_per_call = max(1, ORS_ELEMENTS_LIMIT // n_d)

            best_dist = np.full(n_o, np.nan, dtype=float)
            best_time_s = np.full(n_o, np.nan, dtype=float)

            start_idx = 0
            while start_idx < n_o:
                end_idx = min(n_o, start_idx + sources_per_call)
                sub_origins = list(zip(o_lon[start_idx:end_idx], o_lat[start_idx:end_idx]))

                # WAIT until ORS succeeds (no fallback)
                dist_m, dur_s = wait_until_matrix_metrics(sub_origins, dest_list)

                # For each origin in this sub-batch, only consider its own candidates
                for local_r, global_r in enumerate(range(start_idx, end_idx)):
                    cands = cand_sets[global_r]
                    if not cands:
                        continue
                    cols = [dest_pos[d] for d in cands]

                    # Distance
                    vals_d = dist_m[local_r, cols]
                    finite_d = vals_d[np.isfinite(vals_d)]
                    if finite_d.size > 0:
                        best_val_d = float(np.min(finite_d))
                        if not np.isfinite(best_dist[global_r]) or best_val_d < best_dist[global_r]:
                            best_dist[global_r] = best_val_d

                    # Duration
                    vals_t = dur_s[local_r, cols]
                    finite_t = vals_t[np.isfinite(vals_t)]
                    if finite_t.size > 0:
                        best_val_t = float(np.min(finite_t))
                        if not np.isfinite(best_time_s[global_r]) or best_val_t < best_time_s[global_r]:
                            best_time_s[global_r] = best_val_t

                time.sleep(BATCH_DELAY)
                start_idx = end_idx

            # 4) Write category result for this chunk
            chunk[f"nearest_{cat}_dist_m"] = best_dist
            chunk[f"nearest_{cat}_time_mins"] = (best_time_s / 60.0)  # seconds -> minutes

        # Append chunk to CSV
        chunk.to_csv(output_csv, index=False, mode="a", header=(ch_i == 0))
        print(f"✅ Saved chunk {ch_i+1} -> {output_csv}")


In [86]:
compute_nearest_drive_metrics_chunked(
    domain_gdf=domain_gdf,
    foi_gdf=foi_gdf,
    output_csv="nearest_drive_metrics2.csv",
    chunk_size=30,          # adjust to fit quota/limits
    start_row=7470,            # e.g. resume from 0
    limit_radius_m=15000    # optional cap (15 km straight-line for candidates)
)

Processing chunk 1 (rows 7470–7499), size=30
✅ Saved chunk 1 -> nearest_drive_metrics2.csv
Processing chunk 2 (rows 7500–7529), size=30
✅ Saved chunk 2 -> nearest_drive_metrics2.csv
Processing chunk 3 (rows 7530–7559), size=30
✅ Saved chunk 3 -> nearest_drive_metrics2.csv
Processing chunk 4 (rows 7560–7589), size=30
✅ Saved chunk 4 -> nearest_drive_metrics2.csv
Processing chunk 5 (rows 7590–7619), size=30
✅ Saved chunk 5 -> nearest_drive_metrics2.csv
Processing chunk 6 (rows 7620–7649), size=30
✅ Saved chunk 6 -> nearest_drive_metrics2.csv
Processing chunk 7 (rows 7650–7679), size=30
✅ Saved chunk 7 -> nearest_drive_metrics2.csv
Processing chunk 8 (rows 7680–7709), size=30
✅ Saved chunk 8 -> nearest_drive_metrics2.csv
Processing chunk 9 (rows 7710–7739), size=30
✅ Saved chunk 9 -> nearest_drive_metrics2.csv
Processing chunk 10 (rows 7740–7769), size=30
✅ Saved chunk 10 -> nearest_drive_metrics2.csv
Processing chunk 11 (rows 7770–7799), size=30




✅ Saved chunk 11 -> nearest_drive_metrics2.csv
Processing chunk 12 (rows 7800–7829), size=30
✅ Saved chunk 12 -> nearest_drive_metrics2.csv
Processing chunk 13 (rows 7830–7859), size=30
✅ Saved chunk 13 -> nearest_drive_metrics2.csv
Processing chunk 14 (rows 7860–7889), size=30
✅ Saved chunk 14 -> nearest_drive_metrics2.csv
Processing chunk 15 (rows 7890–7919), size=30
✅ Saved chunk 15 -> nearest_drive_metrics2.csv
Processing chunk 16 (rows 7920–7949), size=30
✅ Saved chunk 16 -> nearest_drive_metrics2.csv
Processing chunk 17 (rows 7950–7979), size=30
✅ Saved chunk 17 -> nearest_drive_metrics2.csv
Processing chunk 18 (rows 7980–8009), size=30
✅ Saved chunk 18 -> nearest_drive_metrics2.csv
Processing chunk 19 (rows 8010–8039), size=30
✅ Saved chunk 19 -> nearest_drive_metrics2.csv
Processing chunk 20 (rows 8040–8069), size=30
✅ Saved chunk 20 -> nearest_drive_metrics2.csv
Processing chunk 21 (rows 8070–8099), size=30
✅ Saved chunk 21 -> nearest_drive_metrics2.csv
Processing chunk 22 (ro



✅ Saved chunk 42 -> nearest_drive_metrics2.csv
Processing chunk 43 (rows 8730–8759), size=30




✅ Saved chunk 43 -> nearest_drive_metrics2.csv
Processing chunk 44 (rows 8760–8789), size=30




✅ Saved chunk 44 -> nearest_drive_metrics2.csv
Processing chunk 45 (rows 8790–8819), size=30
✅ Saved chunk 45 -> nearest_drive_metrics2.csv
Processing chunk 46 (rows 8820–8849), size=30
✅ Saved chunk 46 -> nearest_drive_metrics2.csv
Processing chunk 47 (rows 8850–8879), size=30
✅ Saved chunk 47 -> nearest_drive_metrics2.csv
Processing chunk 48 (rows 8880–8909), size=30
✅ Saved chunk 48 -> nearest_drive_metrics2.csv
Processing chunk 49 (rows 8910–8939), size=30
✅ Saved chunk 49 -> nearest_drive_metrics2.csv
Processing chunk 50 (rows 8940–8969), size=30
✅ Saved chunk 50 -> nearest_drive_metrics2.csv
Processing chunk 51 (rows 8970–8999), size=30
✅ Saved chunk 51 -> nearest_drive_metrics2.csv
Processing chunk 52 (rows 9000–9029), size=30
✅ Saved chunk 52 -> nearest_drive_metrics2.csv
Processing chunk 53 (rows 9030–9059), size=30
✅ Saved chunk 53 -> nearest_drive_metrics2.csv
Processing chunk 54 (rows 9060–9089), size=30
✅ Saved chunk 54 -> nearest_drive_metrics2.csv
Processing chunk 55 (ro



✅ Saved chunk 78 -> nearest_drive_metrics2.csv
Processing chunk 79 (rows 9810–9839), size=30
✅ Saved chunk 79 -> nearest_drive_metrics2.csv
Processing chunk 80 (rows 9840–9869), size=30
✅ Saved chunk 80 -> nearest_drive_metrics2.csv
Processing chunk 81 (rows 9870–9899), size=30
✅ Saved chunk 81 -> nearest_drive_metrics2.csv
Processing chunk 82 (rows 9900–9929), size=30
✅ Saved chunk 82 -> nearest_drive_metrics2.csv
Processing chunk 83 (rows 9930–9959), size=30
✅ Saved chunk 83 -> nearest_drive_metrics2.csv
Processing chunk 84 (rows 9960–9989), size=30
✅ Saved chunk 84 -> nearest_drive_metrics2.csv
Processing chunk 85 (rows 9990–10019), size=30
✅ Saved chunk 85 -> nearest_drive_metrics2.csv
Processing chunk 86 (rows 10020–10049), size=30
✅ Saved chunk 86 -> nearest_drive_metrics2.csv
Processing chunk 87 (rows 10050–10079), size=30
✅ Saved chunk 87 -> nearest_drive_metrics2.csv
Processing chunk 88 (rows 10080–10109), size=30
✅ Saved chunk 88 -> nearest_drive_metrics2.csv
Processing chunk



✅ Saved chunk 89 -> nearest_drive_metrics2.csv
Processing chunk 90 (rows 10140–10169), size=30
✅ Saved chunk 90 -> nearest_drive_metrics2.csv
Processing chunk 91 (rows 10170–10199), size=30
✅ Saved chunk 91 -> nearest_drive_metrics2.csv
Processing chunk 92 (rows 10200–10229), size=30
✅ Saved chunk 92 -> nearest_drive_metrics2.csv
Processing chunk 93 (rows 10230–10259), size=30
✅ Saved chunk 93 -> nearest_drive_metrics2.csv
Processing chunk 94 (rows 10260–10289), size=30
✅ Saved chunk 94 -> nearest_drive_metrics2.csv
Processing chunk 95 (rows 10290–10319), size=30
✅ Saved chunk 95 -> nearest_drive_metrics2.csv
Processing chunk 96 (rows 10320–10349), size=30
✅ Saved chunk 96 -> nearest_drive_metrics2.csv
Processing chunk 97 (rows 10350–10379), size=30
✅ Saved chunk 97 -> nearest_drive_metrics2.csv
Processing chunk 98 (rows 10380–10409), size=30
✅ Saved chunk 98 -> nearest_drive_metrics2.csv
Processing chunk 99 (rows 10410–10439), size=30




✅ Saved chunk 99 -> nearest_drive_metrics2.csv
Processing chunk 100 (rows 10440–10469), size=30
✅ Saved chunk 100 -> nearest_drive_metrics2.csv
Processing chunk 101 (rows 10470–10499), size=30
✅ Saved chunk 101 -> nearest_drive_metrics2.csv
Processing chunk 102 (rows 10500–10529), size=30
✅ Saved chunk 102 -> nearest_drive_metrics2.csv
Processing chunk 103 (rows 10530–10559), size=30
✅ Saved chunk 103 -> nearest_drive_metrics2.csv
Processing chunk 104 (rows 10560–10589), size=30
✅ Saved chunk 104 -> nearest_drive_metrics2.csv
Processing chunk 105 (rows 10590–10619), size=30
✅ Saved chunk 105 -> nearest_drive_metrics2.csv
Processing chunk 106 (rows 10620–10649), size=30
✅ Saved chunk 106 -> nearest_drive_metrics2.csv
Processing chunk 107 (rows 10650–10679), size=30
✅ Saved chunk 107 -> nearest_drive_metrics2.csv
Processing chunk 108 (rows 10680–10709), size=30
✅ Saved chunk 108 -> nearest_drive_metrics2.csv
Processing chunk 109 (rows 10710–10739), size=30
✅ Saved chunk 109 -> nearest_dr



✅ Saved chunk 115 -> nearest_drive_metrics2.csv
Processing chunk 116 (rows 10920–10949), size=30
✅ Saved chunk 116 -> nearest_drive_metrics2.csv
Processing chunk 117 (rows 10950–10979), size=30
✅ Saved chunk 117 -> nearest_drive_metrics2.csv
Processing chunk 118 (rows 10980–11009), size=30
✅ Saved chunk 118 -> nearest_drive_metrics2.csv
Processing chunk 119 (rows 11010–11039), size=30
✅ Saved chunk 119 -> nearest_drive_metrics2.csv
Processing chunk 120 (rows 11040–11069), size=30
✅ Saved chunk 120 -> nearest_drive_metrics2.csv
Processing chunk 121 (rows 11070–11099), size=30
✅ Saved chunk 121 -> nearest_drive_metrics2.csv
Processing chunk 122 (rows 11100–11129), size=30
✅ Saved chunk 122 -> nearest_drive_metrics2.csv
Processing chunk 123 (rows 11130–11159), size=30
✅ Saved chunk 123 -> nearest_drive_metrics2.csv
Processing chunk 124 (rows 11160–11189), size=30
✅ Saved chunk 124 -> nearest_drive_metrics2.csv
Processing chunk 125 (rows 11190–11219), size=30
✅ Saved chunk 125 -> nearest_d



✅ Saved chunk 146 -> nearest_drive_metrics2.csv
Processing chunk 147 (rows 11850–11879), size=30
✅ Saved chunk 147 -> nearest_drive_metrics2.csv
Processing chunk 148 (rows 11880–11909), size=30
✅ Saved chunk 148 -> nearest_drive_metrics2.csv
Processing chunk 149 (rows 11910–11939), size=30
✅ Saved chunk 149 -> nearest_drive_metrics2.csv
Processing chunk 150 (rows 11940–11969), size=30
✅ Saved chunk 150 -> nearest_drive_metrics2.csv
Processing chunk 151 (rows 11970–11999), size=30
✅ Saved chunk 151 -> nearest_drive_metrics2.csv
Processing chunk 152 (rows 12000–12029), size=30
✅ Saved chunk 152 -> nearest_drive_metrics2.csv
Processing chunk 153 (rows 12030–12059), size=30
✅ Saved chunk 153 -> nearest_drive_metrics2.csv
Processing chunk 154 (rows 12060–12089), size=30
✅ Saved chunk 154 -> nearest_drive_metrics2.csv
Processing chunk 155 (rows 12090–12119), size=30
✅ Saved chunk 155 -> nearest_drive_metrics2.csv
Processing chunk 156 (rows 12120–12149), size=30
✅ Saved chunk 156 -> nearest_d

# Calculating the drive time and distance to the CBD

In [None]:
import os
import time
import numpy as np
import geopandas as gpd
import openrouteservice as ors
from openrouteservice.exceptions import ApiError

# =========================
# CONFIG
# =========================
CBD_COORD = (144.9671, -37.8183)   # Melbourne CBD (lon, lat)
ORS_ELEMENTS_LIMIT = 3500
BATCH_DELAY = 2
OUTPUT_CSV = "domain_to_cbd.csv"

# =========================
# UTILITIES
# =========================
def wait_until_matrix_metrics(origins, destinations):
    """
    Call ORS Matrix for driving distance (meters) and duration (seconds).
    Will retry indefinitely until success.
    Returns: (dist_m, dur_s)
    """
    while True:
        try:
            resp = client.distance_matrix(
                locations=origins + destinations,
                profile="driving-car",
                sources=list(range(len(origins))),
                destinations=list(range(len(origins), len(origins) + len(destinations))),
                metrics=["distance", "duration"]
            )
            dist = np.array(resp["distances"], dtype=float)
            dur  = np.array(resp["durations"], dtype=float)
            return dist, dur
        except ApiError as e:
            msg = str(e)
            if "Quota exceeded" in msg or "403" in msg:
                print("⚠️ ORS daily quota exceeded. Waiting 1 hour...")
                time.sleep(60 * 60)
            elif "429" in msg or "Rate limit" in msg:
                print("⚠️ ORS rate limit hit. Waiting 10s...")
                time.sleep(10)
            else:
                print(f"⚠️ ORS error: {msg}. Retrying in 5s...")
                time.sleep(5)
        except Exception as e:
            print(f"⚠️ Unexpected error: {e}. Waiting 10s...")
            time.sleep(10)

def split_gdf(gdf, chunk_size=30, start=0):
    """Yield chunks of a GeoDataFrame."""
    for i in range(start, len(gdf), chunk_size):
        yield gdf.iloc[i:i+chunk_size].copy()

# =========================
# CORE
# =========================
def compute_domain_to_cbd(
    domain_gdf: gpd.GeoDataFrame,
    output_csv: str = OUTPUT_CSV,
    chunk_size: int = 30,
    start_row: int = 0
):
    """
    For each domain row, compute driving distance (m) and time (mins)
    from domain location to Melbourne CBD.
    """
    domain = domain_gdf.to_crs(epsg=4326).copy()

    if "lon" not in domain.columns or "lat" not in domain.columns:
        domain["lon"] = domain.geometry.x
        domain["lat"] = domain.geometry.y

    if os.path.exists(output_csv):
        os.remove(output_csv)

    dest_list = [CBD_COORD]

    for ch_i, chunk in enumerate(split_gdf(domain, chunk_size=chunk_size, start=start_row)):
        print(f"Processing chunk {ch_i+1} (rows {chunk.index.min()}–{chunk.index.max()}), size={len(chunk)}")

        o_lon = chunk["lon"].to_numpy()
        o_lat = chunk["lat"].to_numpy()
        n_o = len(chunk)

        best_dist = np.full(n_o, np.nan, dtype=float)
        best_time_s = np.full(n_o, np.nan, dtype=float)

        # batch calls
        sources_per_call = max(1, ORS_ELEMENTS_LIMIT // len(dest_list))
        start_idx = 0
        while start_idx < n_o:
            end_idx = min(n_o, start_idx + sources_per_call)
            sub_origins = list(zip(o_lon[start_idx:end_idx], o_lat[start_idx:end_idx]))

            dist_m, dur_s = wait_until_matrix_metrics(sub_origins, dest_list)

            for local_r, global_r in enumerate(range(start_idx, end_idx)):
                best_dist[global_r] = float(dist_m[local_r, 0])
                best_time_s[global_r] = float(dur_s[local_r, 0])

            time.sleep(BATCH_DELAY)
            start_idx = end_idx

        chunk["cbd_dist_m"] = best_dist
        chunk["cbd_time_mins"] = (best_time_s / 60.0)

        # Append to CSV
        chunk.to_csv(output_csv, index=False, mode="a", header=(ch_i == 0))
        print(f"✅ Saved chunk {ch_i+1} -> {output_csv}")


In [89]:
# Example usage

compute_domain_to_cbd(
    domain_gdf=domain_gdf,
    output_csv="domain_to_cbd.csv",
    chunk_size=30,
    start_row=0
)


Processing chunk 1 (rows 0–29), size=30
✅ Saved chunk 1 -> domain_to_cbd.csv
Processing chunk 2 (rows 30–59), size=30
✅ Saved chunk 2 -> domain_to_cbd.csv
Processing chunk 3 (rows 60–89), size=30
✅ Saved chunk 3 -> domain_to_cbd.csv
Processing chunk 4 (rows 90–119), size=30
✅ Saved chunk 4 -> domain_to_cbd.csv
Processing chunk 5 (rows 120–149), size=30
✅ Saved chunk 5 -> domain_to_cbd.csv
Processing chunk 6 (rows 150–179), size=30
✅ Saved chunk 6 -> domain_to_cbd.csv
Processing chunk 7 (rows 180–209), size=30
✅ Saved chunk 7 -> domain_to_cbd.csv
Processing chunk 8 (rows 210–239), size=30
✅ Saved chunk 8 -> domain_to_cbd.csv
Processing chunk 9 (rows 240–269), size=30
✅ Saved chunk 9 -> domain_to_cbd.csv
Processing chunk 10 (rows 270–299), size=30
✅ Saved chunk 10 -> domain_to_cbd.csv
Processing chunk 11 (rows 300–329), size=30
✅ Saved chunk 11 -> domain_to_cbd.csv
Processing chunk 12 (rows 330–359), size=30
✅ Saved chunk 12 -> domain_to_cbd.csv
Processing chunk 13 (rows 360–389), size=3



✅ Saved chunk 41 -> domain_to_cbd.csv
Processing chunk 42 (rows 1230–1259), size=30
✅ Saved chunk 42 -> domain_to_cbd.csv
Processing chunk 43 (rows 1260–1289), size=30
✅ Saved chunk 43 -> domain_to_cbd.csv
Processing chunk 44 (rows 1290–1319), size=30
✅ Saved chunk 44 -> domain_to_cbd.csv
Processing chunk 45 (rows 1320–1349), size=30
✅ Saved chunk 45 -> domain_to_cbd.csv
Processing chunk 46 (rows 1350–1379), size=30
✅ Saved chunk 46 -> domain_to_cbd.csv
Processing chunk 47 (rows 1380–1409), size=30
✅ Saved chunk 47 -> domain_to_cbd.csv
Processing chunk 48 (rows 1410–1439), size=30
✅ Saved chunk 48 -> domain_to_cbd.csv
Processing chunk 49 (rows 1440–1469), size=30
✅ Saved chunk 49 -> domain_to_cbd.csv
Processing chunk 50 (rows 1470–1499), size=30
✅ Saved chunk 50 -> domain_to_cbd.csv
Processing chunk 51 (rows 1500–1529), size=30
✅ Saved chunk 51 -> domain_to_cbd.csv
Processing chunk 52 (rows 1530–1559), size=30
✅ Saved chunk 52 -> domain_to_cbd.csv
Processing chunk 53 (rows 1560–1589), 



✅ Saved chunk 66 -> domain_to_cbd.csv
Processing chunk 67 (rows 1980–2009), size=30
✅ Saved chunk 67 -> domain_to_cbd.csv
Processing chunk 68 (rows 2010–2039), size=30
✅ Saved chunk 68 -> domain_to_cbd.csv
Processing chunk 69 (rows 2040–2069), size=30
✅ Saved chunk 69 -> domain_to_cbd.csv
Processing chunk 70 (rows 2070–2099), size=30
✅ Saved chunk 70 -> domain_to_cbd.csv
Processing chunk 71 (rows 2100–2129), size=30
✅ Saved chunk 71 -> domain_to_cbd.csv
Processing chunk 72 (rows 2130–2159), size=30
✅ Saved chunk 72 -> domain_to_cbd.csv
Processing chunk 73 (rows 2160–2189), size=30
✅ Saved chunk 73 -> domain_to_cbd.csv
Processing chunk 74 (rows 2190–2219), size=30
✅ Saved chunk 74 -> domain_to_cbd.csv
Processing chunk 75 (rows 2220–2249), size=30
✅ Saved chunk 75 -> domain_to_cbd.csv
Processing chunk 76 (rows 2250–2279), size=30
✅ Saved chunk 76 -> domain_to_cbd.csv
Processing chunk 77 (rows 2280–2309), size=30
✅ Saved chunk 77 -> domain_to_cbd.csv
Processing chunk 78 (rows 2310–2339), 



✅ Saved chunk 115 -> domain_to_cbd.csv
Processing chunk 116 (rows 3450–3479), size=30
✅ Saved chunk 116 -> domain_to_cbd.csv
Processing chunk 117 (rows 3480–3509), size=30
✅ Saved chunk 117 -> domain_to_cbd.csv
Processing chunk 118 (rows 3510–3539), size=30
✅ Saved chunk 118 -> domain_to_cbd.csv
Processing chunk 119 (rows 3540–3569), size=30
✅ Saved chunk 119 -> domain_to_cbd.csv
Processing chunk 120 (rows 3570–3599), size=30
✅ Saved chunk 120 -> domain_to_cbd.csv
Processing chunk 121 (rows 3600–3629), size=30
✅ Saved chunk 121 -> domain_to_cbd.csv
Processing chunk 122 (rows 3630–3659), size=30
✅ Saved chunk 122 -> domain_to_cbd.csv
Processing chunk 123 (rows 3660–3689), size=30
✅ Saved chunk 123 -> domain_to_cbd.csv
Processing chunk 124 (rows 3690–3719), size=30
✅ Saved chunk 124 -> domain_to_cbd.csv
Processing chunk 125 (rows 3720–3749), size=30
✅ Saved chunk 125 -> domain_to_cbd.csv
Processing chunk 126 (rows 3750–3779), size=30
✅ Saved chunk 126 -> domain_to_cbd.csv
Processing chun



✅ Saved chunk 163 -> domain_to_cbd.csv
Processing chunk 164 (rows 4890–4919), size=30
✅ Saved chunk 164 -> domain_to_cbd.csv
Processing chunk 165 (rows 4920–4949), size=30
✅ Saved chunk 165 -> domain_to_cbd.csv
Processing chunk 166 (rows 4950–4979), size=30
✅ Saved chunk 166 -> domain_to_cbd.csv
Processing chunk 167 (rows 4980–5009), size=30
✅ Saved chunk 167 -> domain_to_cbd.csv
Processing chunk 168 (rows 5010–5039), size=30
✅ Saved chunk 168 -> domain_to_cbd.csv
Processing chunk 169 (rows 5040–5069), size=30
✅ Saved chunk 169 -> domain_to_cbd.csv
Processing chunk 170 (rows 5070–5099), size=30
✅ Saved chunk 170 -> domain_to_cbd.csv
Processing chunk 171 (rows 5100–5129), size=30
✅ Saved chunk 171 -> domain_to_cbd.csv
Processing chunk 172 (rows 5130–5159), size=30
✅ Saved chunk 172 -> domain_to_cbd.csv
Processing chunk 173 (rows 5160–5189), size=30
✅ Saved chunk 173 -> domain_to_cbd.csv
Processing chunk 174 (rows 5190–5219), size=30
✅ Saved chunk 174 -> domain_to_cbd.csv
Processing chun



✅ Saved chunk 203 -> domain_to_cbd.csv
Processing chunk 204 (rows 6090–6119), size=30
✅ Saved chunk 204 -> domain_to_cbd.csv
Processing chunk 205 (rows 6120–6149), size=30
✅ Saved chunk 205 -> domain_to_cbd.csv
Processing chunk 206 (rows 6150–6179), size=30
✅ Saved chunk 206 -> domain_to_cbd.csv
Processing chunk 207 (rows 6180–6209), size=30
✅ Saved chunk 207 -> domain_to_cbd.csv
Processing chunk 208 (rows 6210–6239), size=30
✅ Saved chunk 208 -> domain_to_cbd.csv
Processing chunk 209 (rows 6240–6269), size=30
✅ Saved chunk 209 -> domain_to_cbd.csv
Processing chunk 210 (rows 6270–6299), size=30
✅ Saved chunk 210 -> domain_to_cbd.csv
Processing chunk 211 (rows 6300–6329), size=30
✅ Saved chunk 211 -> domain_to_cbd.csv
Processing chunk 212 (rows 6330–6359), size=30
✅ Saved chunk 212 -> domain_to_cbd.csv
Processing chunk 213 (rows 6360–6389), size=30
✅ Saved chunk 213 -> domain_to_cbd.csv
Processing chunk 214 (rows 6390–6419), size=30
✅ Saved chunk 214 -> domain_to_cbd.csv
Processing chun



✅ Saved chunk 260 -> domain_to_cbd.csv
Processing chunk 261 (rows 7800–7829), size=30
✅ Saved chunk 261 -> domain_to_cbd.csv
Processing chunk 262 (rows 7830–7859), size=30
✅ Saved chunk 262 -> domain_to_cbd.csv
Processing chunk 263 (rows 7860–7889), size=30
✅ Saved chunk 263 -> domain_to_cbd.csv
Processing chunk 264 (rows 7890–7919), size=30
✅ Saved chunk 264 -> domain_to_cbd.csv
Processing chunk 265 (rows 7920–7949), size=30
✅ Saved chunk 265 -> domain_to_cbd.csv
Processing chunk 266 (rows 7950–7979), size=30
✅ Saved chunk 266 -> domain_to_cbd.csv
Processing chunk 267 (rows 7980–8009), size=30
✅ Saved chunk 267 -> domain_to_cbd.csv
Processing chunk 268 (rows 8010–8039), size=30
✅ Saved chunk 268 -> domain_to_cbd.csv
Processing chunk 269 (rows 8040–8069), size=30
✅ Saved chunk 269 -> domain_to_cbd.csv
Processing chunk 270 (rows 8070–8099), size=30
✅ Saved chunk 270 -> domain_to_cbd.csv
Processing chunk 271 (rows 8100–8129), size=30
✅ Saved chunk 271 -> domain_to_cbd.csv
Processing chun



✅ Saved chunk 287 -> domain_to_cbd.csv
Processing chunk 288 (rows 8610–8639), size=30
✅ Saved chunk 288 -> domain_to_cbd.csv
Processing chunk 289 (rows 8640–8669), size=30
✅ Saved chunk 289 -> domain_to_cbd.csv
Processing chunk 290 (rows 8670–8699), size=30
✅ Saved chunk 290 -> domain_to_cbd.csv
Processing chunk 291 (rows 8700–8729), size=30
✅ Saved chunk 291 -> domain_to_cbd.csv
Processing chunk 292 (rows 8730–8759), size=30
✅ Saved chunk 292 -> domain_to_cbd.csv
Processing chunk 293 (rows 8760–8789), size=30
✅ Saved chunk 293 -> domain_to_cbd.csv
Processing chunk 294 (rows 8790–8819), size=30
✅ Saved chunk 294 -> domain_to_cbd.csv
Processing chunk 295 (rows 8820–8849), size=30
✅ Saved chunk 295 -> domain_to_cbd.csv
Processing chunk 296 (rows 8850–8879), size=30




✅ Saved chunk 296 -> domain_to_cbd.csv
Processing chunk 297 (rows 8880–8909), size=30
✅ Saved chunk 297 -> domain_to_cbd.csv
Processing chunk 298 (rows 8910–8939), size=30
✅ Saved chunk 298 -> domain_to_cbd.csv
Processing chunk 299 (rows 8940–8969), size=30
✅ Saved chunk 299 -> domain_to_cbd.csv
Processing chunk 300 (rows 8970–8999), size=30
✅ Saved chunk 300 -> domain_to_cbd.csv
Processing chunk 301 (rows 9000–9029), size=30
✅ Saved chunk 301 -> domain_to_cbd.csv
Processing chunk 302 (rows 9030–9059), size=30
✅ Saved chunk 302 -> domain_to_cbd.csv
Processing chunk 303 (rows 9060–9089), size=30
✅ Saved chunk 303 -> domain_to_cbd.csv
Processing chunk 304 (rows 9090–9119), size=30
✅ Saved chunk 304 -> domain_to_cbd.csv
Processing chunk 305 (rows 9120–9149), size=30
✅ Saved chunk 305 -> domain_to_cbd.csv
Processing chunk 306 (rows 9150–9179), size=30
✅ Saved chunk 306 -> domain_to_cbd.csv
Processing chunk 307 (rows 9180–9209), size=30
✅ Saved chunk 307 -> domain_to_cbd.csv
Processing chun



✅ Saved chunk 341 -> domain_to_cbd.csv
Processing chunk 342 (rows 10230–10259), size=30
✅ Saved chunk 342 -> domain_to_cbd.csv
Processing chunk 343 (rows 10260–10289), size=30
✅ Saved chunk 343 -> domain_to_cbd.csv
Processing chunk 344 (rows 10290–10319), size=30
✅ Saved chunk 344 -> domain_to_cbd.csv
Processing chunk 345 (rows 10320–10349), size=30
✅ Saved chunk 345 -> domain_to_cbd.csv
Processing chunk 346 (rows 10350–10379), size=30
✅ Saved chunk 346 -> domain_to_cbd.csv
Processing chunk 347 (rows 10380–10409), size=30
✅ Saved chunk 347 -> domain_to_cbd.csv
Processing chunk 348 (rows 10410–10439), size=30
✅ Saved chunk 348 -> domain_to_cbd.csv
Processing chunk 349 (rows 10440–10469), size=30
✅ Saved chunk 349 -> domain_to_cbd.csv
Processing chunk 350 (rows 10470–10499), size=30
✅ Saved chunk 350 -> domain_to_cbd.csv
Processing chunk 351 (rows 10500–10529), size=30
✅ Saved chunk 351 -> domain_to_cbd.csv
Processing chunk 352 (rows 10530–10559), size=30
✅ Saved chunk 352 -> domain_to_c



✅ Saved chunk 413 -> domain_to_cbd.csv
Processing chunk 414 (rows 12390–12419), size=30
✅ Saved chunk 414 -> domain_to_cbd.csv
Processing chunk 415 (rows 12420–12449), size=30




✅ Saved chunk 415 -> domain_to_cbd.csv
Processing chunk 416 (rows 12450–12479), size=30
✅ Saved chunk 416 -> domain_to_cbd.csv
Processing chunk 417 (rows 12480–12509), size=30
✅ Saved chunk 417 -> domain_to_cbd.csv
Processing chunk 418 (rows 12510–12539), size=30
✅ Saved chunk 418 -> domain_to_cbd.csv
Processing chunk 419 (rows 12540–12569), size=30
✅ Saved chunk 419 -> domain_to_cbd.csv
Processing chunk 420 (rows 12570–12599), size=30
✅ Saved chunk 420 -> domain_to_cbd.csv
Processing chunk 421 (rows 12600–12617), size=18
✅ Saved chunk 421 -> domain_to_cbd.csv
