Imports

In [83]:
from pathlib import Path
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
import pandas as pd
DATA =  Path("..") / "datasets" / "VMFEAT"# adjust if needed

In [84]:
foi_points = gpd.read_file(DATA / "FOI_POINT.shp")  

In [None]:
all_properties = pd.read_csv("../datasets/all_properties_tidy_enriched.csv")
domain_properties = pd.read_csv("../datasets/domain_cleaned.csv")

# Clean and preprocess FOI POINTS and add SA2 code for each FOI points

In [86]:
cols_to_drop = [
    "UFI", "PFI", "FEATURE_ID", "PARENTFTID",
    "SUPER_PFI", "CRDATE_PFI", "CRDATE_UFI",
    "FEATURE_UF", "FEATURE_CR", "NAME_LABEL",
    "PARENTNAME", "VICNMSTATC" , "CHILDEXIST",
    "AUTHORGC", "AUTHORGID", "AUTHORGVER",
    "VMADD_PFI", "VICNAMESID" ,"THEME1","THEME2",
    "FEATSTATUS" 
]

foi_points_clean = foi_points.drop(columns=[c for c in cols_to_drop if c in foi_points.columns])
foi_points_clean = foi_points_clean[foi_points_clean["STATE"].str.upper() == "VIC"].copy()

In [87]:
from shapely.geometry import MultiPoint
def to_point(g):
    return g.geoms[0] if isinstance(g, MultiPoint) and len(g.geoms) > 0 else g
foi_points_clean["geometry"] = foi_points_clean.geometry.apply(to_point)

# Load ABS SA2 2021 polygons
sa2 = gpd.read_file(DATA / "SA2_2021_AUST_GDA2020.shp")

# Ensure CRS match
foi_points_clean = foi_points_clean.to_crs(sa2.crs)

# Spatial join: assign SA2 to each FOI
foi_points_clean = gpd.sjoin(
    foi_points_clean,
    sa2[["SA2_CODE21","SA2_NAME21","geometry"]],
    how="left",
    predicate="intersects"
).drop(columns=["index_right"])

In [88]:
#For the points that is not within any boundary we put them to the closest point
needs = foi_points_clean["SA2_CODE21"].isna()
if needs.any():
    sa2_pts = sa2.copy()
    sa2_pts["geometry"] = sa2_pts.geometry.representative_point()
    fix = gpd.sjoin_nearest(
        foi_points_clean.loc[needs, ["geometry"]],
        sa2_pts[["SA2_CODE21","SA2_NAME21","geometry"]],
        how="left",
        distance_col="dist_to_sa2_m"
    )
    foi_points_clean.loc[needs, ["SA2_CODE21","SA2_NAME21"]] = fix[["SA2_CODE21","SA2_NAME21"]].values




In [89]:
education =['primary school', 'secondary school', 'primary/secondary school','university']

health = ['maternal/child health centre', 'community health centre', 'day procedure centre', 'disability support centre',
          'general hospital', 'general hospital (emergency)',
          'bush nursing hospital', 'ambulance station']

tourist = ['tourist information centre', 'tourist attraction']

cultural = ['church', 'mosque', 'monastry', 'vihara (buddhist)', 'mandir (hindu)']

def assign_group(category):
    if category in education:
        return 'education'
    elif category in health:
        return 'health'
    elif category in tourist:
        return 'tourist'
    elif category in cultural:
        return 'cultural'
    else:
        return 'others'

foi_points_clean['group'] = foi_points_clean['FEATSUBTYP'].apply(assign_group)

In [90]:
foi_points_clean

Unnamed: 0,FTYPE,FEATSUBTYP,NAME,STATE,geometry,SA2_CODE21,SA2_NAME21,group
0,control point,survey monument,,VIC,POINT (143.52433 -38.84588),217031476,Otway,others
1,control point,survey monument,,VIC,POINT (146.16829 -36.72986),204021064,Benalla Surrounds,others
2,control point,survey monument,,VIC,POINT (145.1764 -37.09963),204011060,Seymour Surrounds,others
3,control point,survey monument,,VIC,POINT (144.12032 -37.4246),201021011,Daylesford,others
4,control point,survey monument,,VIC,POINT (144.52872 -36.06312),216011408,Lockington - Gunbower,others
...,...,...,...,...,...,...,...,...
50672,place of worship,church,,VIC,POINT (146.38005 -36.57062),204021067,Wangaratta Surrounds,cultural
50673,place of worship,church,APSLEY CATHOLIC CHURCH,VIC,POINT (141.08392 -36.96737),215011393,West Wimmera,cultural
50675,community space,camp ground,LODDON FLOODWAY - MIDDLE BEND CAMPING AREA,VIC,POINT (143.69958 -35.3812),215031405,Swan Hill Surrounds,others
50676,health facility,maternal/child health centre,DIGGERS REST MATERNAL AND CHILD HEALTH,VIC,POINT (144.70974 -37.62036),210041539,Diggers Rest,health


In [91]:
pivot_counts = pd.pivot_table(
    foi_points_clean,
    index="SA2_CODE21",
    columns="group",
    values="FTYPE",    # any column works, we just need a placeholder
    aggfunc="count",
    fill_value=0
).reset_index()

print(pivot_counts.head())


group SA2_CODE21  cultural  education  health  others  tourist
0      101041023         0          0       0       1        0
1      109031180         0          0       0       1        0
2      109031181         0          0       0       2        0
3      109031183         0          0       0       2        0
4      109031185         0          0       0       3        0


In [92]:
pivot_counts

group,SA2_CODE21,cultural,education,health,others,tourist
0,101041023,0,0,0,1,0
1,109031180,0,0,0,1,0
2,109031181,0,0,0,2,0
3,109031183,0,0,0,2,0
4,109031185,0,0,0,3,0
...,...,...,...,...,...,...
522,217041477,16,6,1,375,0
523,217041478,16,6,3,552,2
524,217041479,7,13,1,108,0
525,217041480,11,8,3,181,2


In [None]:
#Save to CSV
pivot_counts.to_csv("pivot_counts.csv", index=False)

In [None]:
#Rename Column names
pivot_counts["SA2_CODE21"] = pivot_counts["SA2_CODE21"].astype(float)
pivot_counts = pivot_counts.rename(columns={
    "cultural": "cultural_foi_count",
    "education": "education_foi_count",
    "health": "health_foi_count",
    "others": "others_foi_count",
    "tourist": "tourist_foi_count"   
})


In [None]:
#Merge with pivot counts
all_properties = all_properties.merge(
    pivot_counts,
    on="SA2_CODE21",
    how="left"   # keeps all rows from all_properties, even if no match
)
all_properties

Unnamed: 0,Suburb,date,bedrooms,property_type,Count,Median,Lat,Lng,SA2_CODE21,SA2_NAME21,LGA_CODE21,LGA_NAME21,cultural_foi_count,education_foi_count,health_foi_count,others_foi_count,tourist_foi_count
0,Albert Park-Middle Park-West St Kilda,2000-03-01,1.0,flat,352,165,-37.853484,144.970161,206051128.0,Albert Park,25900.0,Port Phillip,4.0,7.0,2.0,103.0,12.0
1,Albert Park-Middle Park-West St Kilda,2000-03-01,2.0,flat,292,250,-37.853484,144.970161,206051128.0,Albert Park,25900.0,Port Phillip,4.0,7.0,2.0,103.0,12.0
2,Albert Park-Middle Park-West St Kilda,2000-03-01,2.0,house,172,300,-37.853484,144.970161,206051128.0,Albert Park,25900.0,Port Phillip,4.0,7.0,2.0,103.0,12.0
3,Albert Park-Middle Park-West St Kilda,2000-03-01,3.0,flat,25,350,-37.853484,144.970161,206051128.0,Albert Park,25900.0,Port Phillip,4.0,7.0,2.0,103.0,12.0
4,Albert Park-Middle Park-West St Kilda,2000-03-01,3.0,house,115,390,-37.853484,144.970161,206051128.0,Albert Park,25900.0,Port Phillip,4.0,7.0,2.0,103.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100170,Yarraville-Seddon,2025-03-01,2.0,house,182,600,-37.812809,144.884163,213031352.0,Yarraville,24330.0,Maribyrnong,2.0,4.0,2.0,43.0,0.0
100171,Yarraville-Seddon,2025-03-01,3.0,flat,28,698,-37.812809,144.884163,213031352.0,Yarraville,24330.0,Maribyrnong,2.0,4.0,2.0,43.0,0.0
100172,Yarraville-Seddon,2025-03-01,3.0,house,313,700,-37.812809,144.884163,213031352.0,Yarraville,24330.0,Maribyrnong,2.0,4.0,2.0,43.0,0.0
100173,Yarraville-Seddon,2025-03-01,4.0,house,51,860,-37.812809,144.884163,213031352.0,Yarraville,24330.0,Maribyrnong,2.0,4.0,2.0,43.0,0.0


In [97]:
domain_properties["sa2_code"] = domain_properties["sa2_code"].astype(float)
domain_properties.rename(columns={"sa2_code": "SA2_CODE21"}, inplace=True)

In [None]:
#Merge with pivot counts
domain_properties = domain_properties.merge(
    pivot_counts,
    on="SA2_CODE21",
    how="left"   # keeps all rows from domain_properties, even if no match
)
domain_properties

Unnamed: 0,SA2_CODE21,sa2_name,suburb,postcode,weekly_rent,bond,address,lat,lon,bedrooms,...,gas,intercom,security_system,washing_machine,median_weekly_rent_sa2,cultural_foi_count,education_foi_count,health_foi_count,others_foi_count,tourist_foi_count
0,213021344.0,Newport,SOUTH KINGSVILLE,3015,460.0,1994.0,3/53 Greene Street,-37.830982,144.87091,2,...,0,0,0,0,650.0,2,6,0,71,0
1,213021344.0,Newport,SOUTH KINGSVILLE,3015,400.0,1738.0,1/3 New Street,-37.826218,144.86755,2,...,0,0,0,1,650.0,2,6,0,71,0
2,213021343.0,Altona North,SOUTH KINGSVILLE,3015,795.0,3454.0,19/92 New Street,-37.831226,144.86632,3,...,1,0,0,1,670.0,2,9,1,44,0
3,213021344.0,Newport,SOUTH KINGSVILLE,3015,675.0,2933.0,3/14 Saltley Street,-37.827423,144.86768,3,...,0,0,0,0,650.0,2,6,0,71,0
4,213021344.0,Newport,SOUTH KINGSVILLE,3015,450.0,1955.0,4/2B Saltley Street,-37.826270,144.86790,2,...,0,0,0,0,650.0,2,6,0,71,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12613,208011169.0,Brighton (Vic.),BRIGHTON,3186,1575.0,6300.0,,-37.912884,144.99155,2,...,0,0,0,1,1600.0,10,13,4,73,0
12614,208011169.0,Brighton (Vic.),BRIGHTON,3186,2625.0,10500.0,2/71 Roslyn Street,-37.922750,145.00224,4,...,0,0,0,0,1600.0,10,13,4,73,0
12615,208011169.0,Brighton (Vic.),BRIGHTON,3186,2200.0,13200.0,23 Bay Street,-37.903280,144.98697,5,...,0,0,0,0,1600.0,10,13,4,73,0
12616,208011169.0,Brighton (Vic.),BRIGHTON,3186,1390.0,8340.0,2/7B Wilson Street,-37.909650,144.99810,3,...,0,0,0,0,1600.0,10,13,4,73,0


In [None]:
#Save to CSV
all_properties.to_csv("all_properties_foi_count.csv", index=False)
domain_properties.to_csv("domain_properties_foi_count.csv", index=False)