### Services & Amenities near Properties

In [6]:
import openrouteservice as ors
import pandas as pd
import numpy as np
import folium 
import json
import csv
import time
import requests

In [3]:
# Load domain data
domain_df = pd.read_csv("../../datasets/raw/cleaned/domain_cleaned.csv")
domain_df

Unnamed: 0,sa2_code,sa2_name,suburb,postcode,weekly_rent,bond,address,lat,lon,bedrooms,...,ensuite,dishwasher,garden,gym,pets_allowed,gas,intercom,security_system,washing_machine,median_weekly_rent_sa2
0,213021344,Newport,SOUTH KINGSVILLE,3015,460.0,1994.0,3/53 Greene Street,-37.830982,144.87091,2,...,0,0,0,0,1,0,0,0,0,650.0
1,213021344,Newport,SOUTH KINGSVILLE,3015,400.0,1738.0,1/3 New Street,-37.826218,144.86755,2,...,0,0,0,0,1,0,0,0,1,650.0
2,213021343,Altona North,SOUTH KINGSVILLE,3015,795.0,3454.0,19/92 New Street,-37.831226,144.86632,3,...,1,1,0,0,1,1,0,0,1,670.0
3,213021344,Newport,SOUTH KINGSVILLE,3015,675.0,2933.0,3/14 Saltley Street,-37.827423,144.86768,3,...,0,1,0,0,0,0,0,0,0,650.0
4,213021344,Newport,SOUTH KINGSVILLE,3015,450.0,1955.0,4/2B Saltley Street,-37.826270,144.86790,2,...,0,0,0,0,0,0,0,0,0,650.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12613,208011169,Brighton (Vic.),BRIGHTON,3186,1575.0,6300.0,,-37.912884,144.99155,2,...,1,1,0,0,1,0,0,0,1,1600.0
12614,208011169,Brighton (Vic.),BRIGHTON,3186,2625.0,10500.0,2/71 Roslyn Street,-37.922750,145.00224,4,...,0,0,0,0,0,0,0,0,0,1600.0
12615,208011169,Brighton (Vic.),BRIGHTON,3186,2200.0,13200.0,23 Bay Street,-37.903280,144.98697,5,...,0,0,0,0,0,0,0,0,0,1600.0
12616,208011169,Brighton (Vic.),BRIGHTON,3186,1390.0,8340.0,2/7B Wilson Street,-37.909650,144.99810,3,...,0,0,0,0,0,0,0,0,0,1600.0


In [16]:
import requests
import pandas as pd
import time
import os
from geopy.distance import geodesic

# Config 
OSM_TAGS = {
    "supermarket": ['["shop"="supermarket"]'],
    "train_station": ['["railway"="station"]']
}

AMENITY_TYPES = list(OSM_TAGS.keys())
BATCH_SIZE = 100
SEARCH_RADIUS = 3000 
OUTPUT_FILE = '../../datasets/property/property_nearest_amenities.csv'

# Load properties 
domain_df = pd.read_csv("../../datasets/raw/cleaned/domain_cleaned.csv")

if os.path.exists(OUTPUT_FILE):
    summary_df = pd.read_csv(OUTPUT_FILE)
    processed_coords = set(zip(summary_df['Property_Lat'], summary_df['Property_Lon']))
    rows = summary_df.to_dict('records')
    print(f"Resuming. {len(processed_coords)} properties already processed.")
else:
    rows = []
    processed_coords = set()

# Function to query OSM and get nearest amenity
def get_nearest_coordinates(lat, lon, amenity_type):
    tag_list = OSM_TAGS[amenity_type]
    nearest = None
    min_dist = float('inf')

    for tag in tag_list:
        query = f"""
        [out:json];
        (
          node{tag}(around:{SEARCH_RADIUS},{lat},{lon});
          way{tag}(around:{SEARCH_RADIUS},{lat},{lon});
          relation{tag}(around:{SEARCH_RADIUS},{lat},{lon});
        );
        out center;
        """
        for attempt in range(5):  
            try:
                response = requests.get(
                    'http://overpass-api.de/api/interpreter',
                    params={'data': query},
                    timeout=60
                )
                response.raise_for_status()
                elements = response.json().get('elements', [])

                for a in elements:
                    if 'lat' in a and 'lon' in a:
                        a_lat, a_lon = a['lat'], a['lon']
                    elif 'center' in a:
                        a_lat, a_lon = a['center']['lat'], a['center']['lon']
                    else:
                        continue

                    dist = geodesic((lat, lon), (a_lat, a_lon)).meters
                    if dist < min_dist:
                        min_dist = dist
                        nearest = (a_lat, a_lon)
                break  
            except requests.exceptions.HTTPError as e:
                if response.status_code in [429, 504]:
                    wait = 5 * (attempt + 1)
                    print(f"Rate limit/timeout for {amenity_type} at ({lat},{lon}), retry in {wait}s...")
                    time.sleep(wait)
                else:
                    raise e
            except requests.exceptions.RequestException as e:
                print(f"Request error for {amenity_type} at ({lat},{lon}): {e}, retrying in 5s...")
                time.sleep(5)
    return nearest if nearest else (None, None)

# Process properties in batches 
for start in range(0, len(domain_df), BATCH_SIZE):
    batch = domain_df.iloc[start:start + BATCH_SIZE]
    print(f"\nProcessing properties {start + 1} to {start + len(batch)}")

    batch_rows = []
    for _, row in batch.iterrows():
        lat = row['lat']
        lon = row['lon']
        address = row.get('address', '')

        if (lat, lon) in processed_coords:
            continue

        for amenity_type in AMENITY_TYPES:
            try:
                amenity_lat, amenity_lon = get_nearest_coordinates(lat, lon, amenity_type)
                batch_rows.append({
                    'Property_Lat': lat,
                    'Property_Lon': lon,
                    'Address': address,
                    'Amenity_Type': amenity_type,
                    'Amenity_Lat': amenity_lat,
                    'Amenity_Lon': amenity_lon
                })
                time.sleep(1) 
            except Exception as e:
                print(f"Error processing ({lat},{lon}) for {amenity_type}: {e}")
                continue

        processed_coords.add((lat, lon))

    # Append batch to CSV
    batch_df = pd.DataFrame(batch_rows)
    if os.path.exists(OUTPUT_FILE):
        batch_df.to_csv(OUTPUT_FILE, index=False, mode='a', header=False)
    else:
        batch_df.to_csv(OUTPUT_FILE, index=False)

    print(f"Batch saved. Total properties processed so far: {len(processed_coords)}")

print("\nAll properties processed. Data saved to:", OUTPUT_FILE)



Processing properties 1 to 100
Rate limit/timeout for train_station at (-37.82627,144.8679), retry in 5s...
Rate limit/timeout for train_station at (-37.73087,144.95424), retry in 5s...
Rate limit/timeout for supermarket at (-36.22527,145.5591), retry in 5s...
Rate limit/timeout for supermarket at (-37.82033,144.71889), retry in 5s...
Rate limit/timeout for train_station at (-37.8311,144.71599), retry in 5s...
Batch saved. Total properties processed so far: 100

Processing properties 101 to 200
Rate limit/timeout for train_station at (-37.76404,144.67564), retry in 5s...
Batch saved. Total properties processed so far: 200

Processing properties 201 to 300
Rate limit/timeout for train_station at (-37.76833,144.69543), retry in 5s...
Rate limit/timeout for supermarket at (-37.90359,145.04881), retry in 5s...
Rate limit/timeout for supermarket at (-37.903347,145.04514), retry in 5s...
Rate limit/timeout for train_station at (-37.90227,145.04028), retry in 5s...
Rate limit/timeout for tra