In [1]:
import os
from urllib.request import urlretrieve
import zipfile

In [2]:
# Dowload population data
data_url = "https://www.abs.gov.au/statistics/people/population/regional-population/2022-23/32180_ERP_2023_SA2_GDA2020.zip"
urlretrieve(data_url, "../data/landing/population.zip")

('../data/landing/population.zip', <http.client.HTTPMessage at 0x111379850>)

In [3]:
# Extract the contents of the zip file
with zipfile.ZipFile("../data/landing/population.zip", 'r') as zip_ref:
    zip_ref.extractall("../data/population/")

In [4]:
import geopandas as gpd

# Load the GeoPackage file
gdf = gpd.read_file("../data/population/32180_ERP_2023_SA2_GDA2020.gpkg")

# Check the first few rows of the dataset to understand its structure
print(gdf.head(1))

   State_code_2021  State_name_2021  SA2_code_2021 SA2_name_2021  \
0                1  New South Wales      101021007     Braidwood   

   SA3_code_2021 SA3_name_2021  SA4_code_2021   SA4_name_2021 GCCSA_code_2021  \
0          10102    Queanbeyan            101  Capital Region           1RNSW   

  GCCSA_name_2021  ...  Births_2022_23  Deaths_2022_23  \
0     Rest of NSW  ...              44              41   

   Natural_increase_2022_23  Internal_arrivals_2022_23  \
0                         3                        316   

   Internal_departures_2022_23  Net_internal_migration_2022_23  \
0                          301                              15   

   Overseas_arrivals_2022_23  Overseas_departures_2022_23  \
0                         19                            7   

   Net_overseas_migration_2022_23  \
0                              12   

                                            geometry  
0  MULTIPOLYGON (((149.58424 -35.44426, 149.58432...  

[1 rows x 56 columns]


In [5]:
gdf = gdf[gdf["State_name_2021"] == "Victoria"]
gdf["SA2_name_2021"].unique()

array(['Alfredton', 'Ballarat', 'Buninyong', 'Delacombe', 'Smythes Creek',
       'Wendouree - Miners Rest', 'Ballarat East - Warrenheip',
       'Ballarat North - Invermay', 'Canadian - Mount Clear',
       'Sebastopol - Redan', 'Bacchus Marsh Surrounds',
       'Creswick - Clunes', 'Daylesford', 'Gordon (Vic.)', 'Avoca',
       'Beaufort', 'Golden Plains - North', 'Maryborough (Vic.)',
       'Maryborough Surrounds', 'Bendigo', 'California Gully - Eaglehawk',
       'East Bendigo - Kennington', 'Flora Hill - Spring Gully',
       'Kangaroo Flat - Golden Square', 'Maiden Gully', 'Strathfieldsaye',
       'White Hills - Ascot', 'Bendigo Surrounds - South', 'Castlemaine',
       'Castlemaine Surrounds', 'Heathcote', 'Kyneton', 'Woodend',
       'Bendigo Surrounds - North', 'Loddon', 'Bannockburn',
       'Golden Plains - South', 'Winchelsea', 'Belmont', 'Geelong',
       'Geelong West - Hamlyn Heights', 'Highton', 'Lara', 'Leopold',
       'Newcomb - Moolap', 'Newtown (Vic.)', 'North Ge

In [6]:
gdf.columns

Index(['State_code_2021', 'State_name_2021', 'SA2_code_2021', 'SA2_name_2021',
       'SA3_code_2021', 'SA3_name_2021', 'SA4_code_2021', 'SA4_name_2021',
       'GCCSA_code_2021', 'GCCSA_name_2021', 'ERP_2001', 'ERP_2002',
       'ERP_2003', 'ERP_2004', 'ERP_2005', 'ERP_2006', 'ERP_2007', 'ERP_2008',
       'ERP_2009', 'ERP_2010', 'ERP_2011', 'ERP_2012', 'ERP_2013', 'ERP_2014',
       'ERP_2015', 'ERP_2016', 'ERP_2017', 'ERP_2018', 'ERP_2019', 'ERP_2020',
       'ERP_2021', 'ERP_2022', 'ERP_2023', 'ERP_change_number_2022_23',
       'ERP_change_per_cent_2022_23', 'Area_km2',
       'Pop_density_2023_people_per_km2', 'Births_2021_22', 'Deaths_2021_22',
       'Natural_increase_2021_22', 'Internal_arrivals_2021_22',
       'Internal_departures_2021_22', 'Net_internal_migration_2021_22',
       'Overseas_arrivals_2021_22', 'Overseas_departures_2021_22',
       'Net_overseas_migration_2021_22', 'Births_2022_23', 'Deaths_2022_23',
       'Natural_increase_2022_23', 'Internal_arrivals_2022_2

In [7]:
# Calculate net migration
gdf["Net_migration_2021_22"] = gdf["Net_overseas_migration_2021_22"] + gdf["Net_internal_migration_2021_22"]
gdf["Net_migration_2022_23"] = gdf["Net_overseas_migration_2022_23"] + gdf["Net_internal_migration_2022_23"]

In [8]:
# Calculate ERP/km2 from 2001-2023
for year in range(2001, 2024):
    gdf[f"ERP_per_km2_{year}"] = gdf[f"ERP_{year}"] / gdf["Area_km2"]

In [9]:
# Calculate ERP increase each year from 2002-2023
for year in range(2002, 2024):
    erp_increase = gdf[f"ERP_{year}"] - gdf[f"ERP_{year - 1}"]
    gdf[f"ERP_increase_{year - 1}_{year - 2000}"] = erp_increase

In [10]:
gdf.columns

Index(['State_code_2021', 'State_name_2021', 'SA2_code_2021', 'SA2_name_2021',
       'SA3_code_2021', 'SA3_name_2021', 'SA4_code_2021', 'SA4_name_2021',
       'GCCSA_code_2021', 'GCCSA_name_2021',
       ...
       'ERP_increase_2013_14', 'ERP_increase_2014_15', 'ERP_increase_2015_16',
       'ERP_increase_2016_17', 'ERP_increase_2017_18', 'ERP_increase_2018_19',
       'ERP_increase_2019_20', 'ERP_increase_2020_21', 'ERP_increase_2021_22',
       'ERP_increase_2022_23'],
      dtype='object', length=103)

In [11]:
import folium
import branca.colormap as cm

In [49]:
# Create directories to store plots
os.makedirs("../plots/net_migrants/", exist_ok=True)
os.makedirs("../plots/erp/", exist_ok=True)
os.makedirs("../plots/area/", exist_ok=True)
os.makedirs("../plots/erp_per_km2/", exist_ok=True)
os.makedirs("../plots/erp_increase/", exist_ok=True)

In [13]:
def create_map(gdf, value_col, suburb_col, ouput_directory): 
    # Create a map centered on Victoria
    m = folium.Map(location=[-37.4713, 144.7852], zoom_start=7)

    # Create a color map using branca
    colormap = cm.LinearColormap(
        colors=['blue', 'white', 'yellow', 'orange', 'red'],
        vmin=gdf[value_col].min(),
        vmax=gdf[value_col].max(),
        caption=value_col
    )

     # Define a function to style the features
    def style_function(feature):
        return {
            'fillOpacity': 0.7,
            'weight': 0.5,
            'fillColor': colormap(feature['properties'][value_col]),
            'color': 'black'
        }

    # Add the GeoDataFrame to the map with Folium
    folium.GeoJson(
        gdf.to_json(),
        style_function=style_function,
        tooltip=folium.GeoJsonTooltip(
            fields=[suburb_col, value_col], 
            aliases=['Suburb Name', value_col]
        )
    ).add_to(m)

    # Add the colormap legend to the map
    colormap.add_to(m)

    # Save and display the map
    m.save(f"{ouput_directory}/suburbs_{value_col.lower()}.html")

In [50]:
# Plot suburbs' net migrations from 2021-2023
for col in ['Net_internal_migration_2021_22', 'Net_overseas_migration_2021_22', 'Net_migration_2021_22', 
'Net_internal_migration_2022_23', 'Net_overseas_migration_2022_23', 'Net_migration_2022_23']:
    create_map(gdf, col, 'SA2_name_2021', '../plots/net_migrants')

In [51]:
# Plot suburbs' ERP from 2001-2023    
for col in ['ERP_2001', 'ERP_2002',
       'ERP_2003', 'ERP_2004', 'ERP_2005', 'ERP_2006', 'ERP_2007', 'ERP_2008',
       'ERP_2009', 'ERP_2010', 'ERP_2011', 'ERP_2012', 'ERP_2013', 'ERP_2014',
       'ERP_2015', 'ERP_2016', 'ERP_2017', 'ERP_2018', 'ERP_2019', 'ERP_2020',
       'ERP_2021', 'ERP_2022', 'ERP_2023']:
    create_map(gdf, col, 'SA2_name_2021', '../plots/erp')

In [52]:
# Plot suburbs' area
create_map(gdf,'Area_km2', 'SA2_name_2021', '../plots/area')

In [53]:
# Plot suburbs' ERP/km2 from 2001-2023
for col in ['ERP_per_km2_2001', 'ERP_per_km2_2002',
       'ERP_per_km2_2003', 'ERP_per_km2_2004', 'ERP_per_km2_2005',
       'ERP_per_km2_2006', 'ERP_per_km2_2007', 'ERP_per_km2_2008',
       'ERP_per_km2_2009', 'ERP_per_km2_2010', 'ERP_per_km2_2011',
       'ERP_per_km2_2012', 'ERP_per_km2_2013', 'ERP_per_km2_2014',
       'ERP_per_km2_2015', 'ERP_per_km2_2016', 'ERP_per_km2_2017',
       'ERP_per_km2_2018', 'ERP_per_km2_2019', 'ERP_per_km2_2020',
       'ERP_per_km2_2021', 'ERP_per_km2_2022', 'ERP_per_km2_2023']:
    create_map(gdf, col, 'SA2_name_2021', '../plots/erp_per_km2')

In [54]:
# Plot suburbs' ERP increase rate from 2002-2023
for col in ['ERP_increase_2001_2',
       'ERP_increase_2002_3', 'ERP_increase_2003_4', 'ERP_increase_2004_5',
       'ERP_increase_2005_6', 'ERP_increase_2006_7', 'ERP_increase_2007_8',
       'ERP_increase_2008_9', 'ERP_increase_2009_10', 'ERP_increase_2010_11',
       'ERP_increase_2011_12', 'ERP_increase_2012_13', 'ERP_increase_2013_14',
       'ERP_increase_2014_15', 'ERP_increase_2015_16', 'ERP_increase_2016_17',
       'ERP_increase_2017_18', 'ERP_increase_2018_19', 'ERP_increase_2019_20',
       'ERP_increase_2020_21', 'ERP_increase_2021_22', 'ERP_increase_2022_23']:
    create_map(gdf, col, 'SA2_name_2021', '../plots/erp_increase')

In [19]:
# Select relevant columns
pop_df = gdf[['SA2_name_2021', 'Net_migration_2021_22',
       'Net_migration_2022_23', 'ERP_per_km2_2001', 'ERP_per_km2_2002',
       'ERP_per_km2_2003', 'ERP_per_km2_2004', 'ERP_per_km2_2005',
       'ERP_per_km2_2006', 'ERP_per_km2_2007', 'ERP_per_km2_2008',
       'ERP_per_km2_2009', 'ERP_per_km2_2010', 'ERP_per_km2_2011',
       'ERP_per_km2_2012', 'ERP_per_km2_2013', 'ERP_per_km2_2014',
       'ERP_per_km2_2015', 'ERP_per_km2_2016', 'ERP_per_km2_2017',
       'ERP_per_km2_2018', 'ERP_per_km2_2019', 'ERP_per_km2_2020',
       'ERP_per_km2_2021', 'ERP_per_km2_2022', 'ERP_per_km2_2023', 'ERP_increase_2001_2',
       'ERP_increase_2002_3', 'ERP_increase_2003_4', 'ERP_increase_2004_5',
       'ERP_increase_2005_6', 'ERP_increase_2006_7', 'ERP_increase_2007_8',
       'ERP_increase_2008_9', 'ERP_increase_2009_10', 'ERP_increase_2010_11',
       'ERP_increase_2011_12', 'ERP_increase_2012_13', 'ERP_increase_2013_14',
       'ERP_increase_2014_15', 'ERP_increase_2015_16', 'ERP_increase_2016_17',
       'ERP_increase_2017_18', 'ERP_increase_2018_19', 'ERP_increase_2019_20',
       'ERP_increase_2020_21', 'ERP_increase_2021_22', 'ERP_increase_2022_23', 'geometry']]

# Rename 'SA2_name_2021' column
pop_df.rename(columns={'SA2_name_2021': 'SA2_Name'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pop_df.rename(columns={'SA2_name_2021': 'SA2_Name'}, inplace=True)


In [20]:
# Download income data
import pandas as pd
income_df = pd.read_csv('../data/curated/suburbs_income.csv')

In [21]:
# Merge income and population data
population_income_df = pop_df.merge(income_df, on='SA2_Name', how='inner')

In [22]:
population_income_df.columns

Index(['SA2_Name', 'Net_migration_2021_22', 'Net_migration_2022_23',
       'ERP_per_km2_2001', 'ERP_per_km2_2002', 'ERP_per_km2_2003',
       'ERP_per_km2_2004', 'ERP_per_km2_2005', 'ERP_per_km2_2006',
       'ERP_per_km2_2007', 'ERP_per_km2_2008', 'ERP_per_km2_2009',
       'ERP_per_km2_2010', 'ERP_per_km2_2011', 'ERP_per_km2_2012',
       'ERP_per_km2_2013', 'ERP_per_km2_2014', 'ERP_per_km2_2015',
       'ERP_per_km2_2016', 'ERP_per_km2_2017', 'ERP_per_km2_2018',
       'ERP_per_km2_2019', 'ERP_per_km2_2020', 'ERP_per_km2_2021',
       'ERP_per_km2_2022', 'ERP_per_km2_2023', 'ERP_increase_2001_2',
       'ERP_increase_2002_3', 'ERP_increase_2003_4', 'ERP_increase_2004_5',
       'ERP_increase_2005_6', 'ERP_increase_2006_7', 'ERP_increase_2007_8',
       'ERP_increase_2008_9', 'ERP_increase_2009_10', 'ERP_increase_2010_11',
       'ERP_increase_2011_12', 'ERP_increase_2012_13', 'ERP_increase_2013_14',
       'ERP_increase_2014_15', 'ERP_increase_2015_16', 'ERP_increase_2016_17',
    

In [23]:
# Create directories to store plots
os.makedirs("../plots/job/", exist_ok=True)
os.makedirs("../plots/income/", exist_ok=True)

In [24]:
# Plot suburbs' number of jobs from 2016-2021
for col in ['NUMBER_OF_JOBS_PERSONS_2016-17',
       'NUMBER_OF_JOBS_PERSONS_2017-18', 'NUMBER_OF_JOBS_PERSONS_2018-19',
       'NUMBER_OF_JOBS_PERSONS_2019-20', 'NUMBER_OF_JOBS_PERSONS_2020-21']:
    create_map(population_income_df, col, 'SA2_Name', '../plots/job')

In [25]:
# Plot suburbs' median income from 2016-2021
for col in ['MEDIAN_INCOME_PERSONS_2016-17', 'MEDIAN_INCOME_PERSONS_2017-18',
       'MEDIAN_INCOME_PERSONS_2018-19', 'MEDIAN_INCOME_PERSONS_2019-20',
       'MEDIAN_INCOME_PERSONS_2020-21']:
    create_map(population_income_df, col, 'SA2_Name', '../plots/income')

In [26]:
# Calculate centroids of all SA2
population_income_df['centroid'] = population_income_df.geometry.centroid


  population_income_df['centroid'] = population_income_df.geometry.centroid


In [27]:
# Download houses data
houses_df = pd.read_csv('../data/curated/houses_with_closest_groceries.csv')

In [28]:
import ast
from shapely.geometry import Point
# Function to convert geometry strings to Point objects
def string_to_point(geometry_str):
    # Convert the string to a dictionary
    geometry_dict = ast.literal_eval(geometry_str)
    # Extract latitude and longitude
    latitude = geometry_dict['latitude']
    longitude = geometry_dict['longitude']
    # Create and return a Point object
    return Point(longitude, latitude)

In [29]:
# Apply the function to the 'geometry' column to convert it to Point geometries
houses_df['geometry'] = houses_df['geometry'].apply(string_to_point)

In [30]:
# Convert the DataFrame to a GeoDataFrame
houses_df = gpd.GeoDataFrame(houses_df, geometry='geometry', crs=population_income_df.crs)

In [31]:
houses_df.columns

Index(['address', 'parking', 'type', 'num_schools', 'cost', 'suburb', 'beds',
       'baths', 'cost/(beds+baths)', 'geometry', 'closest_train_station_name',
       'closest_train_station_distance_km', 'closest_tram_station_name',
       'closest_tram_station_distance_km', 'closest_hospital_name',
       'closest_hospital_distance_km', 'closest_grocery_name',
       'closest_grocery_distance_km'],
      dtype='object')

In [32]:
# Handle Melbourne 3004 seperately
special_houses = houses_df[houses_df['suburb'] == 'Melbourne']
houses_df = houses_df[houses_df['suburb'] != 'Melbourne']

In [33]:
# Custom merge based on substring matching (suburb_name in SA2_Name)
def merge_substring(gdf1, sa2_series):
    matches = []
    for idx1, row1 in gdf1.iterrows():
        match_found = False
        for idx2, sa2_name in sa2_series.items():  # Iterate over Series
            if row1['suburb'] in sa2_name:  # Substring matching
                match_found = True
                # Append the matched row, including the SA2 name
                matches.append({**row1, 'SA2_Name': sa2_name})
        if not match_found:
            # Append the unmatched row with a None for 'SA2_Name'
            matches.append({**row1, 'SA2_Name': None})

    return gpd.GeoDataFrame(matches, geometry='geometry', crs=gdf1.crs)

In [34]:
# Perform the substring merge
matched_houses = merge_substring(houses_df, population_income_df['SA2_Name'])

In [35]:
# Handle unmatched houses seperately
unmatched_houses = matched_houses[matched_houses['SA2_Name'].isna()].drop('SA2_Name', axis=1)
matched_houses = matched_houses[~matched_houses['SA2_Name'].isna()]

In [36]:
# Combine with special houses from Melbourne 3004
special_houses = pd.concat([special_houses, unmatched_houses], ignore_index=True)

In [37]:
# Perform merge for matched houses to add the rest of the columns from population_income_df
matched_houses_sjoin = matched_houses.merge(population_income_df.drop('geometry', axis=1), on='SA2_Name', how='inner')

In [38]:
matched_houses_sjoin.columns

Index(['address', 'parking', 'type', 'num_schools', 'cost', 'suburb', 'beds',
       'baths', 'cost/(beds+baths)', 'geometry', 'closest_train_station_name',
       'closest_train_station_distance_km', 'closest_tram_station_name',
       'closest_tram_station_distance_km', 'closest_hospital_name',
       'closest_hospital_distance_km', 'closest_grocery_name',
       'closest_grocery_distance_km', 'SA2_Name', 'Net_migration_2021_22',
       'Net_migration_2022_23', 'ERP_per_km2_2001', 'ERP_per_km2_2002',
       'ERP_per_km2_2003', 'ERP_per_km2_2004', 'ERP_per_km2_2005',
       'ERP_per_km2_2006', 'ERP_per_km2_2007', 'ERP_per_km2_2008',
       'ERP_per_km2_2009', 'ERP_per_km2_2010', 'ERP_per_km2_2011',
       'ERP_per_km2_2012', 'ERP_per_km2_2013', 'ERP_per_km2_2014',
       'ERP_per_km2_2015', 'ERP_per_km2_2016', 'ERP_per_km2_2017',
       'ERP_per_km2_2018', 'ERP_per_km2_2019', 'ERP_per_km2_2020',
       'ERP_per_km2_2021', 'ERP_per_km2_2022', 'ERP_per_km2_2023',
       'ERP_increase_20

In [39]:
# Merge special houses using spatial join
special_houses_sjoin = gpd.sjoin(special_houses, population_income_df, how="left", predicate="within")

In [40]:
special_houses_sjoin.columns

Index(['address', 'parking', 'type', 'num_schools', 'cost', 'suburb', 'beds',
       'baths', 'cost/(beds+baths)', 'geometry', 'closest_train_station_name',
       'closest_train_station_distance_km', 'closest_tram_station_name',
       'closest_tram_station_distance_km', 'closest_hospital_name',
       'closest_hospital_distance_km', 'closest_grocery_name',
       'closest_grocery_distance_km', 'index_right', 'SA2_Name',
       'Net_migration_2021_22', 'Net_migration_2022_23', 'ERP_per_km2_2001',
       'ERP_per_km2_2002', 'ERP_per_km2_2003', 'ERP_per_km2_2004',
       'ERP_per_km2_2005', 'ERP_per_km2_2006', 'ERP_per_km2_2007',
       'ERP_per_km2_2008', 'ERP_per_km2_2009', 'ERP_per_km2_2010',
       'ERP_per_km2_2011', 'ERP_per_km2_2012', 'ERP_per_km2_2013',
       'ERP_per_km2_2014', 'ERP_per_km2_2015', 'ERP_per_km2_2016',
       'ERP_per_km2_2017', 'ERP_per_km2_2018', 'ERP_per_km2_2019',
       'ERP_per_km2_2020', 'ERP_per_km2_2021', 'ERP_per_km2_2022',
       'ERP_per_km2_2023', '

In [41]:
# Concatenate all houses
all_houses = pd.concat([matched_houses_sjoin, special_houses_sjoin.drop('index_right', axis=1)], ignore_index=True)

In [42]:
all_houses.columns

Index(['address', 'parking', 'type', 'num_schools', 'cost', 'suburb', 'beds',
       'baths', 'cost/(beds+baths)', 'geometry', 'closest_train_station_name',
       'closest_train_station_distance_km', 'closest_tram_station_name',
       'closest_tram_station_distance_km', 'closest_hospital_name',
       'closest_hospital_distance_km', 'closest_grocery_name',
       'closest_grocery_distance_km', 'SA2_Name', 'Net_migration_2021_22',
       'Net_migration_2022_23', 'ERP_per_km2_2001', 'ERP_per_km2_2002',
       'ERP_per_km2_2003', 'ERP_per_km2_2004', 'ERP_per_km2_2005',
       'ERP_per_km2_2006', 'ERP_per_km2_2007', 'ERP_per_km2_2008',
       'ERP_per_km2_2009', 'ERP_per_km2_2010', 'ERP_per_km2_2011',
       'ERP_per_km2_2012', 'ERP_per_km2_2013', 'ERP_per_km2_2014',
       'ERP_per_km2_2015', 'ERP_per_km2_2016', 'ERP_per_km2_2017',
       'ERP_per_km2_2018', 'ERP_per_km2_2019', 'ERP_per_km2_2020',
       'ERP_per_km2_2021', 'ERP_per_km2_2022', 'ERP_per_km2_2023',
       'ERP_increase_20

In [43]:
# Calculate the distance between the 'geometry' and the 'centroid' for each row
all_houses['distance_to_centroid'] = all_houses.apply(lambda row: row['geometry'].distance(row['centroid']), axis=1)

In [44]:
# Select the suburb with the minimum distance from its centroid to the house
houses_final = all_houses.loc[all_houses.groupby('address')['distance_to_centroid'].idxmin()]

In [45]:
houses_final.columns

Index(['address', 'parking', 'type', 'num_schools', 'cost', 'suburb', 'beds',
       'baths', 'cost/(beds+baths)', 'geometry', 'closest_train_station_name',
       'closest_train_station_distance_km', 'closest_tram_station_name',
       'closest_tram_station_distance_km', 'closest_hospital_name',
       'closest_hospital_distance_km', 'closest_grocery_name',
       'closest_grocery_distance_km', 'SA2_Name', 'Net_migration_2021_22',
       'Net_migration_2022_23', 'ERP_per_km2_2001', 'ERP_per_km2_2002',
       'ERP_per_km2_2003', 'ERP_per_km2_2004', 'ERP_per_km2_2005',
       'ERP_per_km2_2006', 'ERP_per_km2_2007', 'ERP_per_km2_2008',
       'ERP_per_km2_2009', 'ERP_per_km2_2010', 'ERP_per_km2_2011',
       'ERP_per_km2_2012', 'ERP_per_km2_2013', 'ERP_per_km2_2014',
       'ERP_per_km2_2015', 'ERP_per_km2_2016', 'ERP_per_km2_2017',
       'ERP_per_km2_2018', 'ERP_per_km2_2019', 'ERP_per_km2_2020',
       'ERP_per_km2_2021', 'ERP_per_km2_2022', 'ERP_per_km2_2023',
       'ERP_increase_20

In [46]:
houses_final[['suburb', 'SA2_Name', 'address']].to_csv('../data/curated/compare.csv', index=False)

In [47]:
houses_final.to_csv('../data/curated/houses_all_properties.csv', index=False)