# Retired code below, too slow due to numerous API calls

### Get minimum distance/time using OneMap API call

In [None]:
def route_api_call(routeType: str, start: str, end: str, metric: str, credentials : str,
                   date = '01-26-2023', time_start = '07:35:00', mode = 'TRANSIT', 
                   maxWalkDistance = 1000, numItineraries = 2, verbose=0, recursive_call=None):
    '''
    Function to api call OneMap for routing
    ## Parameters
    routeType : str
        option between ['walk','drive','cycle', 'pt]
    Below only applicable if routeType == 'pt'
        date : str MM-DD-YYYY
            default '01-26-2023'
        time : str HH:MM:SS
            default '07:35:00'
        mode : str 
            choose between TRANSIT, BUS, RAIL
            default 'TRANSIT'
        maxWalkDistance : int
            max walking distance allowed, in meters
            default 1000
        numItineraries : int 
            number of suggested routes
            default 2
    verbose : int 
        1 to print time and distance, 2 for the whole json response
        default 0
    
    ### Returns (time, distance) for chosen routeType
        time is in seconds 
        total_distance is in metres.
    '''
    # Lag time between calls to ensure we stay within 250 calls per minute, 0.24 is calculated time
    # Removed, server lag response gives us an average of about 0.7s per call already, no need to slow down somemore
    # time.sleep(0.24)

    # Walk
    if routeType in ['walk','drive','cycle']:
        response = requests.get(f"https://developers.onemap.sg/privateapi/routingsvc/route?start={start}&end={end}&routeType={routeType}&token={credentials}")
        response.raise_for_status()
        data = response.json()
        time_taken = data['route_summary']['total_time']
        distance = data['route_summary']['total_distance']
        if verbose==1:
            print(f'Walking time: {time_taken}')
            print(f'Walking distance: {distance}')
    
    # Public transport
    elif routeType == 'pt':   
        response = requests.get(f"https://developers.onemap.sg/privateapi/routingsvc/route?start={start}&end={end}&routeType={routeType}&token={credentials}&date={date}&time={time_start}&mode={mode}&maxWalkDistance={maxWalkDistance}&numItineraries={numItineraries}")
        response.raise_for_status()
        data = response.json()

        summary = {'walkTime': data['plan']['itineraries'][0]['walkTime'],
                   'transitTime': data['plan']['itineraries'][0]['transitTime'],
                   'waitingTime': data['plan']['itineraries'][0]['waitingTime']
                   }
        distance = time_taken = sum(summary.values())
        pt_walk_distance = data['plan']['itineraries'][0]['walkDistance']
        if verbose==1:
            pprint(summary)
            print(f'Total public transport time: {time_taken}')
            print(f'Walk distance to public transport: {pt_walk_distance}')
    else:
        raise KeyError("Enter valid routeType, choose between 'walk','drive','cycle', 'pt'")
    
    # To end the call
    if verbose==2:
            pprint(data)
    '''# To Let us know if the retry on recursive call is successful
    if recursive_call:
        print('\tRetry successful')'''

    return time_taken if metric=='time' else distance

@timeit
@error_handler
def time_taken_to_station(geo_data_df, credentials, mrt_coordinates_dict=mrt_coordinates_dict,
                          n_nearest_stations=n_nearest_stations):
    '''
    Function to coordinate route_api_call() to build walking distance and minimum time to nearest mrts
    '''
    start = geo_data_df['lat_long']
    # Columns will depend on how many columns of nearest_stations we obtained previously, defaulted to 2
    columns = geo_data_df[['nearest_station_'+ str(x) for x in range(n_nearest_stations)]] 

    time_distance = []
    for index, mrt_station in enumerate(columns):
        # List comprehension to build latitude and longitude in string (1.121231,102.123123)
        list_of_strings = [str(x) for x in mrt_coordinates_dict[mrt_station]]
        end = ','.join(list_of_strings)

        # Only return closest station's walking distance
        if index==0:
            walk= route_api_call('walk', start, end, 'distance', credentials)
            if walk:
                time_distance.append(walk)
            else:
                time_distance.append(0)

        # Return time for each station
        pt = route_api_call('pt', start, end, 'time', credentials, numItineraries = 1)
        if pt:
            time_distance.append(pt)
        else:
            time_distance.append(0)

    return time_distance

Due to the large amount of API calls, we will split the data into batches to extract the data.

In [None]:
@error_handler
def split_df(geo_data_df: pd.DataFrame, interval: int=500):
    splitted_df_list = []
    for start in range(0, len(geo_data_df.index), interval):
        splitted_df_list.append(geo_data_df.iloc[start:start+interval , :])
    print(f'Number of dataframes split into: {len(splitted_df_list)}')
    return splitted_df_list

def iterate_function(splitted_df_list: list, results: list, func: function, start: int, stop: int):
    '''
    Appends to results (list) in place.
    '''
    print(f'Writing to {id(results)} with {len(results)} elements already present')
    for index, splitted_df in enumerate(splitted_df_list):
        if index >= start and index < stop:
            time_distance = splitted_df.apply(func, credentials=credentials, n_nearest_stations=n_nearest_stations, axis=1)
            results.append(time_distance)
            cont = input(f'Done with index {index}, continue? Y/N \n')
            if cont.lower() == 'n':
                break
    print(f'Length of updated results list: {len(results)}')

splitted_df_list = split_df(geo_data_df, interval=400)

Run the code by batches while appending the results to a list inplace

In [None]:
credentials=get_token("venv/onemap.json")
time_distance_list = []
iterate_function(splitted_df_list, time_distance_list, time_taken_to_station, 0, len(splitted_df_list))

Put the DataFrame back together if all runs successful

In [None]:
if len(splitted_df_list) == len(time_distance_list):
    time_distance = pd.DataFrame(pd.concat(time_distance_list).to_dict()).transpose()
    time_distance.columns=['dist_to_station']+['time_route_'+ str(x) for x in range(n_nearest_stations)]
    display(time_distance)
else:
    raise IndexError('Mismatch in length of starting and results list')

### Determine minimum time

In [None]:
# temporary df to find minimum time among public transport times
temp_df = time_distance.drop(labels=['dist_to_station'], axis=1)
min_pt_time = temp_df.min(axis=1).rename('min_pt_time')
geo_data_df = pd.concat([time_distance.loc[:,'dist_to_station'],min_pt_time], axis=1)
                        # Unused columns ['lat_long', 'latitude', 'longitude', 'postal_code']+
geo_data_df