In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json
import os
import requests
import io
import zipfile

In [2]:
dfMB = pd.concat([pd.read_csv(f'data/blocks/MB-{state_code}.csv', dtype=str) for state_code in [1, 2, 4, 5, 6, 8, 9, 'Z']], ignore_index=True)

In [3]:
dfMB['AREA_ALBERS_SQKM'] = dfMB['AREA_ALBERS_SQKM'].astype(float)

In [4]:
dfMB.columns

Index(['MB_CODE_2021', 'MB_CATEGORY_2021', 'CHANGE_FLAG_2021',
       'CHANGE_LABEL_2021', 'SA1_CODE_2021', 'SA2_CODE_2021', 'SA2_NAME_2021',
       'SA3_CODE_2021', 'SA3_NAME_2021', 'SA4_CODE_2021', 'SA4_NAME_2021',
       'GCCSA_CODE_2021', 'GCCSA_NAME_2021', 'STATE_CODE_2021',
       'STATE_NAME_2021', 'AUS_CODE_2021', 'AUS_NAME_2021', 'AREA_ALBERS_SQKM',
       'ASGS_LOCI_URI_2021', 'LGA_CODE_2023', 'LGA_NAME_2023', 'SAL_CODE_2021',
       'SAL_NAME_2021', 'POA_CODE_2021', 'POA_NAME_2021'],
      dtype='object')

In [5]:
dfMB['GCCSA_NAME_2021'].value_counts()

GCCSA_NAME_2021
Greater Sydney                            60881
Greater Melbourne                         59483
Rest of NSW                               51848
Rest of Vic.                              29248
Greater Perth                             27112
Greater Adelaide                          19362
Rest of WA                                16186
Rest of Tas.                               9088
Rest of SA                                 9054
Australian Capital Territory               6660
Greater Hobart                             3928
Other Territories                           132
Migratory - Offshore - Shipping (WA)         23
Migratory - Offshore - Shipping (Tas.)       17
Migratory - Offshore - Shipping (SA)         14
Migratory - Offshore - Shipping (NSW)         8
Migratory - Offshore - Shipping (Vic.)        7
Migratory - Offshore - Shipping (OT)          3
Migratory - Offshore - Shipping (ACT)         1
No usual address (OT)                         1
No usual address (ACT)  

In [6]:

df_suburbs = dfMB.groupby(['SAL_CODE_2021', 'POA_CODE_2021']).aggregate({
    'SAL_NAME_2021' : 'first',
    'POA_NAME_2021' : 'first',
    'GCCSA_NAME_2021' : 'unique',
    'LGA_NAME_2023' : 'unique',
    'STATE_CODE_2021' : 'first',
    'STATE_NAME_2021' : 'first',
    'AUS_CODE_2021' : 'first',
    'AUS_NAME_2021' : 'first',
    'AREA_ALBERS_SQKM' : 'sum',
    'MB_CATEGORY_2021' : 'unique',
}).reset_index().rename(columns={
    'SAL_CODE_2021' : 'suburb_id',
    'POA_CODE_2021' : 'postcode_id',
    'SAL_NAME_2021' : 'suburb_name',
    'POA_NAME_2021' : 'postcode',
    'GCCSA_NAME_2021' : 'capital_city',
    'LGA_NAME_2023' : 'lgas',
    'STATE_CODE_2021' : 'state_id',
    'STATE_NAME_2021' : 'state_name',
    'AUS_CODE_2021' : 'country_id',
    'AUS_NAME_2021' : 'country_name',
    'AREA_ALBERS_SQKM' : 'area_sqkm',
    'MB_CATEGORY_2021' : 'categories',
})

In [7]:
# Temporary analyses
df_suburbs['lga'] = df_suburbs['lgas'].apply(lambda x: x[0] if len(x) == 1 else x)
df_suburbs['lga_count'] = df_suburbs['lgas'].apply(len)
df_suburbs['gccsa_count'] = df_suburbs['capital_city'].apply(len)
df_suburbs['lga_count'].value_counts()

lga_count
1    10888
2     1004
3       63
4        5
5        1
Name: count, dtype: int64

In [8]:
greater_melbourne_postcodes_2 = df_suburbs[(df_suburbs['capital_city'].apply(lambda x: 'Greater Melbourne' in x))]['postcode_id'].str[:2].unique()
greater_melbourne_postcodes_2.sort()

In [9]:
gdf : gpd.GeoDataFrame = gpd.GeoDataFrame(pd.concat([gpd.read_file(f'data/suburbs/suburbs-2-{p2}.geojson') for p2 in greater_melbourne_postcodes_2], ignore_index=True))
# 20s - 30s

In [10]:
df_greater_melbourne_ids = df_suburbs[(df_suburbs['capital_city'].apply(lambda x: 'Greater Melbourne' in x))][['suburb_id', 'postcode_id']]

In [11]:
df_greater_melbourne_ids['Greater Melbourne'] = True

In [12]:
gdf = gdf.merge(df_greater_melbourne_ids, on=['suburb_id', 'postcode_id'], how='left')

In [13]:
gdf = gdf[gdf['Greater Melbourne'] == True]

In [14]:
gdf.drop(columns=['Greater Melbourne'], inplace=True)

In [15]:
os.makedirs('data/custom', exist_ok=True)

In [16]:
gdf.to_file('data/custom/suburbs-greater-melbourne.geojson', driver='GeoJSON')