In [1]:
import sys, time
from datetime import datetime
import pandas as pd
import numpy as np

In [2]:
np.random.seed(2025)

In [3]:
TZTravelTimesLoc = "./INITFMCC_TZtoTZTT_Y2056_WFY_Funded_Op2_001.CSV"
concordanceLoc = "./CC_Concordance_2056.csv"
pertFileLoc = "./INITFMCC_PertFac_Y2056_WFY_Funded_Op2_001.DAT"

In [4]:
concordance = pd.read_csv(concordanceLoc)
concordance

Unnamed: 0,TZ,FA,Postcode
0,1,Melbourne (C) - Inner,3000
1,2,Melbourne (C) - Inner,3000
2,3,Melbourne (C) - Inner,3000
3,4,Melbourne (C) - Inner,3000
4,5,Melbourne (C) - Inner,3000
...,...,...,...
6728,6980,NSW,NSW
6729,6980,QLD,QLD
6730,6975,SA,SA
6731,6975,WA,WA


In [5]:
externals = set(['ACT','NSW','QLD','SA','WA','NT'])
concordance = concordance.loc[(concordance['Postcode'] < '4') |\
                              (concordance['Postcode'].isin(externals))]
concordance

Unnamed: 0,TZ,FA,Postcode
0,1,Melbourne (C) - Inner,3000
1,2,Melbourne (C) - Inner,3000
2,3,Melbourne (C) - Inner,3000
3,4,Melbourne (C) - Inner,3000
4,5,Melbourne (C) - Inner,3000
...,...,...,...
6728,6980,NSW,NSW
6729,6980,QLD,QLD
6730,6975,SA,SA
6731,6975,WA,WA


In [38]:
pd.read_csv(TZTravelTimesLoc, low_memory=False)

Unnamed: 0,From_TZ,To_TZ,Travel_Time
0,1.0,1.0,0.23
1,1.0,2.0,0.59
2,1.0,3.0,0.57
3,1.0,4.0,0.59
4,1.0,5.0,1.92
...,...,...,...
48734356,6981.0,6977.0,954.98
48734357,6981.0,6978.0,639.37
48734358,6981.0,6979.0,526.59
48734359,6981.0,6980.0,473.90


In [19]:
def getTravelTime(row):
    travelTimeArray = list(map(float, row['Travel Time'].split(';')))
    if len(travelTimeArray) < 10:
        percentileAdjustment = 0
    else:
        percentileAdjustment = min(2 * max(travelTimeArray), 50)
    return np.percentile(travelTimeArray, 100 - percentileAdjustment)

In [21]:
chunksize = 10 ** 6
postcodeTravelTimes = pd.DataFrame()

for TZTravelTimesCurrent in pd.read_csv(TZTravelTimesLoc, chunksize=chunksize):
    TZTravelTimesCurrent = TZTravelTimesCurrent.rename(columns={
        ' To_TZ': 'To_TZ',
        ' Travel_Time': 'Travel Time'
    })

    TZTravelTimesCurrent = TZTravelTimesCurrent.merge(
        concordance, left_on='From_TZ', right_on='TZ', how='left'
    ).filter(['From_TZ', 'Postcode', 'To_TZ', 'Travel Time']).rename(columns={'Postcode': 'From Postcode'})

    TZTravelTimesCurrent = TZTravelTimesCurrent.merge(
        concordance, left_on='To_TZ', right_on='TZ', how='left'
    ).filter(['From_TZ', 'From Postcode', 'To_TZ', 'Postcode', 'Travel Time']).rename(columns={'Postcode': 'To Postcode'})

    TZTravelTimesCurrent['Travel Time'] = TZTravelTimesCurrent['Travel Time'].astype('str')

    postcodeTravelTimesCurrent = TZTravelTimesCurrent.groupby(['From Postcode', 'To Postcode'])['Travel Time']\
        .apply(';'.join).reset_index()

    postcodeTravelTimesCurrent['Travel Time Max'] = postcodeTravelTimesCurrent\
        .apply(lambda row: getTravelTime(row), axis=1)

    postcodeTravelTimesCurrent = postcodeTravelTimesCurrent\
        .filter(['From Postcode', 'To Postcode', 'Travel Time Max'])\
        .rename(columns={'Travel Time Max': 'Travel Time'})

    postcodeTravelTimes = pd.concat([postcodeTravelTimes, postcodeTravelTimesCurrent], ignore_index=True)


In [24]:
postcodeTravelTimes

Unnamed: 0,From Postcode,To Postcode,Travel Time
0,3000,3000,4.600000
1,3000,3002,6.430452
2,3000,3003,6.651228
3,3000,3004,9.514688
4,3000,3005,6.794320
...,...,...,...
785234,WA,NSW,481.730000
785235,WA,NT,13.780000
785236,WA,QLD,481.730000
785237,WA,SA,13.780000


In [25]:
postcodeTravelTimes = postcodeTravelTimes.groupby(['From Postcode','To Postcode'])\
                        .agg({'Travel Time':'max'}).reset_index()

In [26]:
postcodeTravelTimes

Unnamed: 0,From Postcode,To Postcode,Travel Time
0,3000,3000,4.972154
1,3000,3002,7.253280
2,3000,3003,7.078512
3,3000,3004,10.341632
4,3000,3005,7.100000
...,...,...,...
385636,WA,NSW,481.730000
385637,WA,NT,13.780000
385638,WA,QLD,481.730000
385639,WA,SA,13.780000


In [27]:
postcodeTravelTimes.loc[postcodeTravelTimes['From Postcode'] ==\
                        postcodeTravelTimes['To Postcode'], 'Travel Time'] = 0

In [29]:
def getClosestTime(row, allTTs):
    routeMatches = allTTs.loc[(allTTs['From Postcode'] == row['From Postcode']) &\
                              (allTTs['Travel Time'] != 0)]
    return(min(routeMatches['Travel Time']))

postcodeTravelTimes.loc[postcodeTravelTimes['Travel Time'] == 0, 'Travel Time'] =\
        postcodeTravelTimes.loc[postcodeTravelTimes['Travel Time'] == 0]\
    .apply(lambda row: getClosestTime(row, postcodeTravelTimes), axis = 1)


In [31]:
with open(pertFileLoc, 'r') as pertFile:
    pertFactor = float(pertFile.readline())

In [39]:
pertFactor

4.0

In [33]:
postcodeTravelTimesPerturbed = postcodeTravelTimes

perturbations = np.random.rand(len(postcodeTravelTimesPerturbed))\
                    * pertFactor * 2 - pertFactor
postcodeTravelTimesPerturbed.loc[postcodeTravelTimesPerturbed['Travel Time'] + perturbations > 0,'Travel Time'] = postcodeTravelTimesPerturbed['Travel Time'] +\
                    perturbations


In [34]:
postcodeTravelTimesPerturbed

Unnamed: 0,From Postcode,To Postcode,Travel Time
0,3000,3000,3.163905
1,3000,3002,10.356094
2,3000,3003,10.539357
3,3000,3004,9.906177
4,3000,3005,6.205884
...,...,...,...
385636,WA,NSW,478.342453
385637,WA,NT,9.880946
385638,WA,QLD,479.196594
385639,WA,SA,17.472153


In [35]:
postcodeTravelTimes

Unnamed: 0,From Postcode,To Postcode,Travel Time
0,3000,3000,3.163905
1,3000,3002,10.356094
2,3000,3003,10.539357
3,3000,3004,9.906177
4,3000,3005,6.205884
...,...,...,...
385636,WA,NSW,478.342453
385637,WA,NT,9.880946
385638,WA,QLD,479.196594
385639,WA,SA,17.472153
