# University-Suburb distance calculation

In [1]:
import os
import pandas as pd
from geopy.geocoders import GoogleV3  # for determining uni campus coordinates
from geopy.distance import geodesic  # for calculating distance between coordinates

import helper

# make all output interactive
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


In [2]:
# Define universal constants
# ================================
DATA_PATH = os.path.join(os.getcwd(), "../data/")
RAW_DATA_PATH = os.path.join(DATA_PATH, "raw")
DERIVED_DATA_PATH = os.path.join(DATA_PATH, "derived")
CORR_DATA_PATH = os.path.join(DATA_PATH, "correspondence")


In [3]:
geolocator = GoogleV3(api_key=os.getenv("GMAPS_APIKEY"))

In order for the above `geolocator` code to work, VSCode must be opened from the terminal using the following code to ensure the environment variable for the Google Maps API key is set correctly and inherited by the session:

``` bash
> source ~/.bash-profile
> code .
```

In [4]:
CAMPUS_NAMES = [
    "Swinbourne University of Technology Hawthorn Campus",
    "Swinbourne University of Technology Croydon Campus",
    "Swinbourne University of Technology Wantirna Campus",
    "Deakin University Burwood Campus",
    "Deakin University Geelong Campus",
    "Deakin University Warrnambool Campus",
    "Federation University Australia Ballarat Campus",
    "Federation University Australia Churchill Campus",
    "Federation University Australia Berwick Campus",
    "Federation University Australia Wimmera Campus",
    "La Trobe University Melbourne Campus",
    "La Trobe University Bendigo Campus",
    "La Trobe University Shepparton Campus",
    "La Trobe University Wodonga Campus",
    "La Trobe University Mildura Campus",
    "Monash University Clayton Campus",
    "Monash University Caulfield Campus",
    "Monash University Peninsula Campus",
    "Monash University Parkville Campus",
    "RMIT University Melbourne Campus",
    "Swinburne University of Technology Hawthorne Campus",
    "Swinburne University of Technology Croydon Campus",
    "Swinburne University of Technology Wantirna Campus",
    "The University of Melbourne Parkville Campus",
    "The University of Melbourne Southbank Campus",
    "The University of Melbourne Burnley Campus",
    "The University of Melbourne Dookie Campus",
    "The University of Melbourne Creswick Campus",
    "The University of Melbourne Werribee Campus",
    "The University of Melbourne Shepparton Campus",
    "Victoria University Melbourne Campus",
    "Victoria University Footscray Campus",
    "Victoria University St Albans Campus",
    "Victoria University Sunshine Campus",
    "Victoria University Werribee Campus",
    "Australian Catholic University Ballarat Campus",
    "Australian Catholic University Melbourne Campus",
    "Torrens University Australia Melbourne Campus",
]

CAMPUSES_CODES = [
    "com_swinbourne_hawthorn",
    "com_swinbourne_croydon",
    "com_swinbourne_wantirna",
    "com_deakin_burwood",
    "com_deakin_geelong",
    "com_deakin_warrnambool",
    "com_federation_ballarat",
    "com_federation_churchill",
    "com_federation_berwick",
    "com_federation_wimmera",
    "com_latrobe_melbourne",
    "com_latrobe_bendigo",
    "com_latrobe_shepparton",
    "com_latrobe_wodonga",
    "com_latrobe_mildura",
    "com_monash_clayton",
    "com_monash_caulfield",
    "com_monash_peninsula",
    "com_monash_parkville",
    "com_rmit_melbourne",
    "com_swinburne_hawthorne",
    "com_swinburne_croydon",
    "com_swinburne_wantirna",
    "com_unimelb_parkville",
    "com_unimelb_southbank",
    "com_unimelb_burnley",
    "com_unimelb_dookie",
    "com_unimelb_creswick",
    "com_unimelb_werribee",
    "com_unimelb_shepparton",
    "com_vicuni_melbourne",
    "com_vicuni_footscray",
    "com_vicuni_stalbans",
    "com_vicuni_sunshine",
    "com_vicuni_werribee",
    "com_catholic_ballarat",
    "com_catholic_melbourne",
    "com_torrens_melbourne",
]


## Get campus coordinates

In [5]:
campus_coordinates = []

for name in CAMPUS_NAMES:
    point = geolocator.geocode(name).point
    tup = (point.latitude, point.longitude)
    campus_coordinates.append(tup)

campus_coordinates


[(-37.8221504, 145.0389546),
 (-37.801686, 145.285759),
 (-37.8734845, 145.2348976),
 (-37.8445153, 145.1122556),
 (-38.1438793, 144.3599533),
 (-38.3908717, 142.5384536),
 (-37.5621587, 143.8502556),
 (-38.311211, 146.429409),
 (-38.0404323, 145.3396478),
 (-36.3131369, 142.3613378),
 (-37.7207472, 145.047159),
 (-36.7790388, 144.3012214),
 (-36.3805003, 145.4063541),
 (-36.11140340000001, 146.8488051),
 (-34.205484, 142.1665775),
 (-37.9145125, 145.1349971),
 (-37.8773524, 145.0450003),
 (-38.1526488, 145.1360384),
 (-37.7840053, 144.9587458),
 (-37.8083332, 144.9639386),
 (-37.8221504, 145.0389546),
 (-37.801686, 145.285759),
 (-37.8734845, 145.2348976),
 (-37.7983459, 144.960974),
 (-37.8247391, 144.9701685),
 (-37.8282356, 145.0221405),
 (-37.7983459, 144.960974),
 (-37.41996, 143.9006805),
 (-37.889461, 144.6933006),
 (-36.36201090000001, 145.4064461),
 (-37.7937398, 144.8985171),
 (-37.8045059, 144.8984888),
 (-37.7522655, 144.7979192),
 (-37.7773774, 144.8352895),
 (-37.8944969

## Get distance from campus to suburb

We will be using the standard derived metadata file for the suburb coordinates, found in [`SuburbMetadata.csv`](../data/derived/SuburbMetadata.csv).

In [6]:
# All localities meta data
# ========================

# get data
suburb_metadata_df = helper.getSuburbsMetadata()
# convert to numpy array for faster operations
suburb_metadata = suburb_metadata_df[["locality", "coordinates"]].to_numpy()
suburb_metadata


array([['Melbourne', (-37.8152065, 144.963937)],
       ['East Melbourne', (-37.8161444, 144.9804594)],
       ['West Melbourne', (-37.8114504, 144.9253974)],
       ...,
       ['Wilsons Promontory', (-38.9572966, 146.28311)],
       ['Bringenbrong', (-36.1428573, 148.0518011)],
       ['Martins Creek', (-37.3709674, 148.6031889)]], dtype=object)

In [7]:
dist_matrix = []

for details in suburb_metadata:
    # prepare variables for locality details from list
    suburb_name = details[0]
    suburb_coordinates = details[1]

    for campus_code, campus_coord in zip(CAMPUSES_CODES, campus_coordinates):
        dist = geodesic(suburb_coordinates, campus_coord).km
        dist_matrix.append([
            suburb_name,
            campus_code,
            dist
        ])

dist_matrix

[['Melbourne', 'com_swinbourne_hawthorn', 6.649984473541059],
 ['Melbourne', 'com_swinbourne_croydon', 28.379475417800773],
 ['Melbourne', 'com_swinbourne_wantirna', 24.710974796885473],
 ['Melbourne', 'com_deakin_burwood', 13.456361465458011],
 ['Melbourne', 'com_deakin_geelong', 64.39456543829631],
 ['Melbourne', 'com_deakin_warrnambool', 222.11852549241067],
 ['Melbourne', 'com_federation_ballarat', 102.16499152436786],
 ['Melbourne', 'com_federation_churchill', 139.89216384604276],
 ['Melbourne', 'com_federation_berwick', 41.425241147081735],
 ['Melbourne', 'com_federation_wimmera', 285.2252691989631],
 ['Melbourne', 'com_latrobe_melbourne', 12.79396855142761],
 ['Melbourne', 'com_latrobe_bendigo', 129.13838945057185],
 ['Melbourne', 'com_latrobe_shepparton', 164.0078080736785],
 ['Melbourne', 'com_latrobe_wodonga', 252.82683335029523],
 ['Melbourne', 'com_latrobe_mildura', 473.25021200410777],
 ['Melbourne', 'com_monash_clayton', 18.656359403839378],
 ['Melbourne', 'com_monash_cau

## Cleaning

Round numbers and pivot for column attributes for each campus.

In [8]:
dist_df = pd.DataFrame(dist_matrix, columns=["locality", "campus", "distance_km"])

# round distances to nearest km
dist_df["distance_km"] = dist_df["distance_km"].round(0).astype(int)

# pivot table
dist_df = dist_df\
    .pivot(index="locality", columns="campus", values="distance_km")\
    .reset_index()

dist_df.shape
dist_df.head()

(3268, 39)

campus,locality,com_catholic_ballarat,com_catholic_melbourne,com_deakin_burwood,com_deakin_geelong,com_deakin_warrnambool,com_federation_ballarat,com_federation_berwick,com_federation_churchill,com_federation_wimmera,...,com_unimelb_dookie,com_unimelb_parkville,com_unimelb_shepparton,com_unimelb_southbank,com_unimelb_werribee,com_vicuni_footscray,com_vicuni_melbourne,com_vicuni_stalbans,com_vicuni_sunshine,com_vicuni_werribee
0,Abbeyard,267,183,175,249,404,267,172,150,401,...,184,184,140,185,209,189,188,194,193,209
1,Abbotsford,105,2,11,68,225,105,40,138,287,...,3,3,164,4,29,9,9,19,15,28
2,Aberfeldie,95,9,21,63,218,95,50,148,277,...,7,7,162,10,23,5,4,9,6,23
3,Aberfeldy,223,122,111,183,344,222,98,68,388,...,124,124,171,123,148,129,129,138,135,148
4,Acheron,168,88,83,154,306,167,92,133,316,...,89,89,103,90,113,93,93,97,96,113


## Save

In [13]:
dist_df.to_csv(os.path.join(DERIVED_DATA_PATH, "SuburbCampusDist.csv"), index=False)
