In [41]:
from pprint import pprint
import datetime
from auxiliary_functions.api_calls_classes import *
from auxiliary_functions.parse_api_calls import *
from auxiliary_functions.useful_functions import *
import pandas as pd
import numpy as np

version = str(datetime.datetime.today().strftime('%d-%m-%Y'))

## Setting the parameters for the entire process

In [42]:
country_code = 'PT'
read_path = f'/workspace/main_folder/parsed_data/parsed_responses_27-10-2022_PT.parquet'

## Execute...

In [43]:
save_path = f'/workspace/main_folder/results/{country_code}_search_logs_with_coordinates_{version}.csv'

# Defining the api calls process:

In [44]:
def apply_tt_api(x: pd.Series, api_instance, limit: int = 5, sleep: int = 1, debug: bool = False) -> dict:
    """Function that gets a row from a dataframe and makes the geocoding API calls using the searched_query to the TT API.
    
    :param x: Row of the DataFrame on which you want to perform API calls. The DataFrame must at least contain
    :type x: pd.Series
    :param api_instance: Instantiation of the API class you want to run the results on. Generally, api_instance = TomtomApi().
    :type api_instance: __main__.TomtomApi()
    :param limit: Max number of responses you want from the TT API. Defaults to 5
    :type limit: int, optional
    :param sleep: Number of seconds of pause between two API calls, defaults to 1 second.
    :type sleep: int, optional
    :param debug: Define whether you want to output the progression through the different searched queries. Defaults to False, which doesn't print the searched query
    :type debug: bool, optional
    :return: A dictionary with the full TT API geocoding response to the searched query.
    :rtype: dict
    """
    searched_query = x['searched_query']
    country = x['country']
    
    if debug:
        print(f'collected searched_query: {searched_query}')
        
    return api_instance.call_api({'address': searched_query, 'sleep': sleep, 'limit': limit, 'country': country}, call_type='geocode')

In [45]:
def full_function_make_api_calls(
    path_to_read: str, path_to_save: str, country_iso2: str, limit: int = 5, sleep: int = 1, threshold: float = 0.85, test_size: int or None = None
) -> pd.DataFrame:
    """Function that makes the calls to the TomTom API after reading the Data from a path in the DBFS. You can pass an argument to save the api responses to another file in the DBFS as well.

    :param path_to_read: Path where you want to read the search logs from.
    :type path_to_read: str
    :param path_to_save: Path where you want to save the data, if any. Defaults to None, which means you don't need to save the results.
    :type path_to_save: str or None
    :param country_iso2: Country in ISO-2 format.
    :type country_iso2: str
    :param limit: Max number of responses you want from the TT API, defaults to 5
    :type limit: int, optional
    :param sleep: Time to wait between queries to the API server, defaults to 1
    :type sleep: int, optional
    :param threshold: The minimum level of confidence that is considered a match from the API response, defaults to 0.85, which means the confidence that the API has on what it is responding for the address to be a match is at least 85%.
    :type threshold: float, optional
    :param test_size: Size of the sample you want to take from the read dataframe. This parameter should only be used for testing purposes! It should be left at None! Defaults to None.
    :type test_size: int or None
    :return: A dataframe that contains the parsed response for each of the addresses returned. It contains the coordinates, the searched query, the confidence level, the type of query, the bounding box (if it is a geometry) and whether it is considered a match or not under the threshold criteria.
    :rtype: pd.DataFrame
    """
    api = TomtomApi()
    if ((test_size is None) or (test_size == 0)):
        df = pd.read_parquet(path_to_read).reset_index(drop=True)
    else:
        df = pd.read_parquet(path_to_read).reset_index(drop=True).sample(test_size, random_state=101)
        
    df = df[df['method_name'] != 'search 2 nearbySearch']
    
    function = lambda x: api_calls_parser((apply_tt_api(x, api, limit=limit, sleep=sleep)), x.name, x['searched_query'], threshold)
    
    result =  df.apply(function, axis=1)
    
    final_df = pd.DataFrame(result.tolist())

    if threshold is not None:
        final_df = final_df[final_df['confidence'] >= threshold]
    
    if path_to_save not in ['', None]:
        final_df.to_csv(path_to_save, sep=',', index=False)
        
    return final_df

In [47]:
#result_comp     = full_function_make_api_calls(comp_path, comp_save_path, country_iso2, 1, 1.5, 0.85)
results = full_function_make_api_calls(
    path_to_read=read_path, 
    path_to_save=save_path, 
    country_iso2=country_code, 
    limit=1, 
    sleep=1, 
    threshold=0.85,
    test_size=None
)

## Visualizing an example

Below you will find an example of how your results will look like after the entire process. Once you have run this final process, you may export the "csv" file to your preferred folder for analyzing. The path shown below is an example we used to create this notebook, if by any chance you remove the file in the path or change its location, the process will no longer work.

In [40]:
pd.read_csv('/workspace/main_folder/results/PT_search_logs_with_coordinates_27-10-2022.csv', sep=',')

Unnamed: 0,original_formatted_query,confidence,lat,lon,query_type,id,bounding_box,searched_query,match
0,rua goncalo velho 34 quarteira,1.0,37.06914,-8.10607,Point Address,55538,,"RUA GONCALO VELHO 34, QUARTEIRA",1
1,largo da revista militar 4 1500 909 lisboa,0.936287,38.75301,-9.18964,Street,13317,,"Largo da Revista Militar,4,1500-909,lisboa",1
2,praca de soror mariana alcoforado 4 beja,0.958894,38.00794,-7.8544,Point Address,67534,,"PRACA DE SOROR MARIANA ALCOFORADO 4, BEJA",1
3,rua do machado 62 lagoa,1.0,37.7472,-25.56574,Address Range,66840,,"RUA DO MACHADO 62, LAGOA",1
4,2470 315,1.0,31.95729,-94.66532,Address Range,9305,,2470-315,1
5,rua fernando namora pedroucos,1.0,41.18155,-8.59073,Street,79975,,Rua Fernando Namora Pedroucos,1
6,avenida da igreja 17 4560 145 penafiel,1.0,41.19527,-8.3159,Point Address,95404,,"avenida da Igreja ,17,4560-145,penafiel",1
7,padre manuel mario da silva 28 tabuadelo,0.957941,41.39827,-8.27945,Point Address,46332,,"PADRE MANUEL MARIO DA SILVA 28, TABUADELO",1
8,rua da infancia lt 65 muge,0.875935,39.09994,-8.70868,Point Address,38533,,"RUA DA INFANCIA LT 65, MUGE",1
9,hamerstraat bussum 1402 pt,1.0,52.26934,5.17041,Street,2964,,Hamerstraat Bussum 1402 PT,1
