In [1]:
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
import networkx as nx
import random
import numpy as np
from typing import Dict, List, Tuple
import regex
import pandas as pd

In [162]:
# Define the problem data
distance_matrix = [
    [0, 5, 8, 3, 4, 5, 7, 6, 9, 11],
    [5, 0, 2, 5, 9, 10, 12, 11, 8, 6],
    [8, 2, 0, 8, 12, 13, 15, 14, 11, 9],
    [3, 5, 8, 0, 7, 8, 10, 9, 12, 14],
    [4, 9, 12, 7, 0, 1, 3, 2, 5, 7],
    [5, 10, 13, 8, 1, 0, 2, 1, 4, 6],
    [7, 12, 15, 10, 3, 2, 0, 1, 2, 4],
    [6, 11, 14, 9, 2, 1, 1, 0, 3, 5],
    [9, 8, 11, 12, 5, 4, 2, 3, 0, 2],
    [11, 6, 9, 14, 7, 6, 4, 5, 2, 0]
]

In [182]:
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

class TSPSolver:
    def __init__(self, distance_matrix):
        self.distance_matrix = distance_matrix
        self.num_vehicles = 1
        self.depot = 0

    def solve(self,max_time=3):
        """Solve the TSP problem and return the solution."""
        # Create the routing index manager.
        manager = pywrapcp.RoutingIndexManager(len(self.distance_matrix),
                                               self.num_vehicles, self.depot)

        # Create Routing Model.
        routing = pywrapcp.RoutingModel(manager)

        # Create and register a transit callback.
        def distance_callback(from_index, to_index):
            """Returns the distance between the two nodes."""
            from_node = manager.IndexToNode(from_index)
            to_node = manager.IndexToNode(to_index)
            return self.distance_matrix[from_node][to_node]

        transit_callback_index = routing.RegisterTransitCallback(distance_callback)

        # Define cost of each arc.
        routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)

        # Set the solver parameters.
        search_parameters = pywrapcp.DefaultRoutingSearchParameters()
        search_parameters.local_search_metaheuristic = (
            routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
        search_parameters.time_limit.seconds = max_time

        # Solve the problem.
        solution = routing.SolveWithParameters(search_parameters)

        # Return the solution.
        if solution:
            return self._get_solution(manager, routing, solution)

    @staticmethod
    def _get_solution(manager, routing, solution):
        """Return the solution as a dictionary."""
        index = routing.Start(0)
        route = [manager.IndexToNode(index)]
        distance = 0
        while not routing.IsEnd(index):
            previous_index = index
            index = solution.Value(routing.NextVar(index))
            route.append(manager.IndexToNode(index))
            distance += routing.GetArcCostForVehicle(previous_index, index, 0)
        return {
            "route": route,
            "distance": distance
        }

In [215]:
import numpy as np

class MatrixSplitter:
    def __init__(self, M, names, num_matrices):
        self.M = np.array(M)
        self.names = names
        self.num_matrices = num_matrices
        self.matrices = None
        self.name_to_matrix = None
        self.name_dict = {i: [] for i in range(0, self.num_matrices)}

    def split_matrix(self):
        n = len(self.M)
        total_distance = np.sum(self.M)
        min_sum_distances = float('inf')
        matrices = [np.zeros((0, 0)) for _ in range(self.num_matrices)]
        name_to_matrix = {}

        # Initialize division randomly
        division = np.random.randint(self.num_matrices, size=n)

        # Gradient descent loop
        for i in range(5000):
            # Calculate the sum of distances for each matrix
            matrices_distances = []
            for matrix_index in range(self.num_matrices):
                matrix_indices = np.where(division == matrix_index)[0]
                matrix_distance = np.sum(self.M[np.ix_(matrix_indices, matrix_indices)]) - np.sum(np.diag(self.M[np.ix_(matrix_indices, matrix_indices)]))
                matrices_distances.append(matrix_distance)

            # Calculate the cost function
            cost = total_distance - sum(matrices_distances)

            # Update the division by moving one node to a different matrix and calculate the new cost
            new_division = division.copy()
            for node_index in range(n):
                old_matrix_index = division[node_index]
                new_matrix_index = (old_matrix_index + 1) % self.num_matrices
                new_division[node_index] = new_matrix_index

                new_matrices_distances = []
                for matrix_index in range(self.num_matrices):
                    matrix_indices = np.where(new_division == matrix_index)[0]
                    matrix_distance = np.sum(self.M[np.ix_(matrix_indices, matrix_indices)]) - np.sum(np.diag(self.M[np.ix_(matrix_indices, matrix_indices)]))
                    new_matrices_distances.append(matrix_distance)

                new_cost = total_distance - sum(new_matrices_distances)

                if new_cost < cost:
                    division = new_division
                    cost = new_cost

            # Check if the sum of distances in all matrices is less than the minimum found so far
            if sum(matrices_distances) < min_sum_distances:
                min_sum_distances = sum(matrices_distances)

                # Create the new matrices based on the new indices assignments
                for matrix_index in range(self.num_matrices):
                    matrix_indices = np.where(division == matrix_index)[0]
                    matrices[matrix_index] = self.M[np.ix_(matrix_indices, matrix_indices)]

                    # Map names to matrices
                    self.name_dict[matrix_index] = [self.names[name_index] for name_index in matrix_indices]
                    for name_index in matrix_indices:
                        name_to_matrix[self.names[name_index]] = matrix_index

        self.matrices = [matrix.tolist() for matrix in matrices]
        self.name_to_matrix = name_to_matrix

        
        return self.matrices, self.name_to_matrix, self.name_dict


In [216]:
%%time
names = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]
nr_of_routes = 2
splitter = MatrixSplitter(distance_matrix,names,nr_of_routes)
matrixes,names,name_dict = splitter.split_matrix()

time_per_run = 3




CPU times: user 2.51 s, sys: 8.08 ms, total: 2.52 s
Wall time: 2.51 s


In [217]:
name_dict

{0: ['A', 'F', 'J'], 1: ['B', 'C', 'D', 'E', 'G', 'H', 'I']}

In [192]:
matrixes

[[[0, 9, 10, 12, 11, 8, 6],
  [9, 0, 1, 3, 2, 5, 7],
  [10, 1, 0, 2, 1, 4, 6],
  [12, 3, 2, 0, 1, 2, 4],
  [11, 2, 1, 1, 0, 3, 5],
  [8, 5, 4, 2, 3, 0, 2],
  [6, 7, 6, 4, 5, 2, 0]],
 [[0, 8, 3], [8, 0, 8], [3, 8, 0]]]

In [189]:
name_dict = {name: [] for name in names}
name_dict

{'A': [],
 'B': [],
 'C': [],
 'H': [],
 'I': [],
 'D': [],
 'E': [],
 'F': [],
 'G': [],
 'J': []}

In [186]:
matrixes

[[[0, 5, 8, 3], [5, 0, 2, 5], [8, 2, 0, 8], [3, 5, 8, 0]],
 [[0, 1, 3, 2, 5, 7],
  [1, 0, 2, 1, 4, 6],
  [3, 2, 0, 1, 2, 4],
  [2, 1, 1, 0, 3, 5],
  [5, 4, 2, 3, 0, 2],
  [7, 6, 4, 5, 2, 0]]]

In [187]:
import requests
import json

# Define the API endpoint
endpoint = 'https://maps.googleapis.com/maps/api/directions/json'

# Define the API parameters
params = {
    'origin': 'placeA',
    'destination': 'placeB',
    'mode': 'transit',
    'departure_time': 'now',
    'key': 'YOUR_API_KEY'
}

# Send the API request
response = requests.get(endpoint, params=params)

# Parse the JSON response
data = json.loads(response.text)

# Extract the relevant information from the response
best_route = data['routes'][0]['legs'][0]

# Print the relevant information
print(f"Distance: {best_route['distance']['text']}")
print(f"Duration: {best_route['duration']['text']}")
print("Steps:")
for step in best_route['steps']:
    print(f"{step['html_instructions']}")

{'A': 0,
 'B': 0,
 'C': 0,
 'H': 1,
 'I': 1,
 'D': 0,
 'E': 1,
 'F': 1,
 'G': 1,
 'J': 1}

In [279]:
import googlemaps

# Insira sua chave de API do Google Maps aqui
gmaps = googlemaps.Client(key='AIzaSyDV93ChQ2_YJYGaLKy1elcyexcVj7ggzK4')

# Lista de lugares em Lisboa
places = ["hotel tivoli avenida da liberdade","centro comercial colombo benfica","pcdiga benfica","praça do comércio", "baixa chiado", "praça marques de pombal", "parque do monsanto", "estadio da luz", "elevador de santa justa", "bairro alto"]

# Obter a matriz de distância entre os lugares
matrix = gmaps.distance_matrix(places, places, mode='transit')

In [280]:
matrix

{'destination_addresses': ['Av. da Liberdade 185, 1269-050 Lisboa, Portugal',
  'Av. Lusíada, 1500-392 Lisboa, Portugal',
  'R. João de Freitas Branco 34B, 1500-714 Lisboa, Portugal',
  '1100-148 Lisbon, Portugal',
  'Baixa-Chiado, 1200-443 Lisboa, Portugal',
  'Praça Marquês de Pombal, Lisboa, Portugal',
  'Parque Florestal de Monsanto, Lisboa, Portugal',
  'Av. Eusébio da Silva Ferreira, 1500-313 Lisboa, Portugal',
  'R. do Ouro, 1150-060 Lisboa, Portugal',
  'Bairro Alto, Lisbon, Portugal'],
 'origin_addresses': ['Av. da Liberdade 185, 1269-050 Lisboa, Portugal',
  'Av. Lusíada, 1500-392 Lisboa, Portugal',
  'R. João de Freitas Branco 34B, 1500-714 Lisboa, Portugal',
  '1100-148 Lisbon, Portugal',
  'Baixa-Chiado, 1200-443 Lisboa, Portugal',
  'Praça Marquês de Pombal, Lisboa, Portugal',
  'Parque Florestal de Monsanto, Lisboa, Portugal',
  'Av. Eusébio da Silva Ferreira, 1500-313 Lisboa, Portugal',
  'R. do Ouro, 1150-060 Lisboa, Portugal',
  'Bairro Alto, Lisbon, Portugal'],
 'row

In [281]:
import numpy as np

def response_to_matrix(response):
    """
    Converts the response from the Maps API to a distance matrix.
    
    Args:
    - response: A dictionary representing the response from the Maps API.
    
    Returns:
    - A NumPy array representing the distance matrix.
    """
    n = len(response['destination_addresses'])
    matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            if i == j:
                matrix[i][j] = 0
            elif response['rows'][i]['elements'][j]['status'] == 'OK':
                matrix[i][j] = response['rows'][i]['elements'][j]['distance']['value']
            else:
                matrix[i][j] = np.inf
    return matrix

In [282]:
response_to_matrix(matrix)

array([[   0., 6115., 5613., 2484., 1351., 1067., 7045., 5815., 1556.,
        1456.],
       [6071.,    0., 1365., 8264., 7131., 5471., 1365.,  807., 7336.,
        7236.],
       [5586., 1382.,    0., 7780., 6647., 4986., 1624.,  404., 6852.,
        6751.],
       [2534., 8359., 7857.,    0., 1140., 3312., 8582., 8059., 1345.,
        2358.],
       [1394., 7219., 6717., 1132.,    0., 2172., 7144., 6919.,  194.,
         840.],
       [ 602., 5473., 4971., 3199., 2066.,    0., 6403., 5173., 2271.,
        2171.],
       [7028., 1658., 2128., 9222., 7144., 5179.,    0., 1960., 6950.,
        7136.],
       [5788.,  807.,  404., 7982., 6849., 5188., 2300.,    0., 7054.,
        6953.],
       [1607., 7432., 6930.,  969.,  194., 2385., 6950., 7132.,    0.,
         601.],
       [1458., 7283., 6781., 1662.,  503., 2236., 6873., 6983.,  691.,
           0.]])

In [251]:
distance_matrix = response_to_matrix(matrix)

In [252]:
import pickle
with open('/Users/rennanaraujo/routing_app/dist_matrix.pickle', 'wb') as f:
    pickle.dump(distance_matrix, f)

In [230]:
# Mostrar a matriz de distância
for i, row in enumerate(matrix):
    print(lugares[i])
    for j, element in enumerate(row['elements']):
        print(lugares[j] + ':', element['distance']['text'])
    print('\n')

praça do comércio
praça do comércio: 1 m
baixa chiado: 1.8 km


KeyError: 'distance'

In [271]:
import openrouteservice as ors

# list of places
places = ["hotel tivoli avenida da liberdade","centro comercial colombo benfica","pcdiga benfica","praça do comércio", "baixa chiado", "praça marques de pombal", "parque do monsanto", "estadio da luz", "elevador de santa justa", "bairro alto"]

# initialize the OpenRouteService client with the API key
client = ors.Client(key='5b3ce3597851110001cf62480b872cb8beb24ecca5c960be5fb43fdf')

# get the coordinates of the places
coordinates = []
for place in places:
    # use the geocoding service to get the coordinates of the place
    response = client.pelias_search("portugal, Lisbon, " + place, size=1)
    # add the coordinates to the list of coordinates
    coordinates.append(response['features'][0]['geometry']['coordinates'])

In [273]:
coordinates

[[-47.46636, -5.531327],
 [-92.904918, 17.981686],
 [-8.265089, 39.228504],
 [-42.157604, -11.258839],
 [-8.265089, 39.228504],
 [-8.653171, 40.638484],
 [-9.154441, 38.731298],
 [-8.265089, 39.228504],
 [-8.265089, 39.228504],
 [-7.489154, 37.670784]]

In [None]:
# initialize the distance matrix
distances_matrix = []

# calculate the distance between each pair of coordinates
for i in range(len(coordinates)):
    row = []
    for j in range(len(coordinates)):
        if i == j:
            # if it's the same point, the distance is 0
            row.append(0)
        else:
            # use the routing service to calculate the distance between the coordinates
            response = client.directions(coordinates[i], coordinates[j], profile='driving-car', format='geojson')
            # add the distance to the distance matrix
            row.append(response['routes'][0]['segments'][0]['distance'])
    # add the row to the distance matrix
    distances_matrix.append(row)

print(distances_matrix)

In [260]:
coordinates

[[-47.46636, -5.531327],
 [-9.203051, 38.751013],
 [-8.820913, 39.754138],
 [-42.157604, -11.258839],
 [-9.14059, 38.710601],
 [-8.653171, 40.638484],
 [-9.154441, 38.731298],
 [-9.185923, 38.754],
 [-8.834091, 41.70166],
 [-9.145386, 38.713183]]

In [267]:
# Get the distance matrix using openrouteservice
matrix = client.distance_matrix(coordinates,profile="foot-walking")

In [265]:
matrix

{'durations': [[0.0,
   None,
   None,
   1015205.88,
   None,
   None,
   None,
   None,
   None,
   None],
  [None, 0.0, None, None, None, None, None, None, None, None],
  [None, None, 0.0, None, None, None, None, None, None, None],
  [1015205.88, None, None, 0.0, None, None, None, None, None, None],
  [None, None, None, None, 0.0, None, None, None, None, None],
  [None, None, None, None, None, 0.0, None, None, None, None],
  [None, None, None, None, None, None, 0.0, None, None, None],
  [None, None, None, None, None, None, None, 0.0, None, None],
  [None, None, None, None, None, None, None, None, 0.0, None],
  [None, None, None, None, None, None, None, None, None, 0.0]],
 'destinations': [{'location': [-47.46636, -5.531327],
   'snapped_distance': 0.01},
  {'location': [-9.202947, 38.751192], 'snapped_distance': 21.86},
  {'location': [-8.820758, 39.754088], 'snapped_distance': 14.36},
  {'location': [-42.157604, -11.258839], 'snapped_distance': 0.01},
  {'location': [-9.140602, 38.

In [283]:
import requests

In [321]:
rs = requests.post("http://127.0.0.1:5000/trace_route",json={
    "places":["hotel tivoli avenida da liberdade","centro comercial colombo benfica","pcdiga benfica","praça do comércio", "baixa chiado", "praça marques de pombal", "parque do monsanto", "estadio da luz", "elevador de santa justa", "bairro alto"],
    "nr_of_routes":1,
    "max_total_time":6
})

In [322]:
rs.json()

[{'distance': 14297,
  'route_indexes': [0, 8, 3, 4, 9, 6, 1, 7, 2, 5, 0],
  'route_names': ['hotel tivoli avenida da liberdade',
   'elevador de santa justa',
   'praça do comércio',
   'baixa chiado',
   'bairro alto',
   'parque do monsanto',
   'centro comercial colombo benfica',
   'estadio da luz',
   'pcdiga benfica',
   'praça marques de pombal',
   'hotel tivoli avenida da liberdade']}]

In [313]:
places = ["hotel tivoli avenida da liberdade","centro comercial colombo benfica","pcdiga benfica","praça do comércio", "baixa chiado", "praça marques de pombal", "parque do monsanto", "estadio da luz", "elevador de santa justa", "bairro alto"]

In [298]:
places

['hotel tivoli avenida da liberdade',
 'centro comercial colombo benfica',
 'pcdiga benfica',
 'praça do comércio',
 'baixa chiado',
 'praça marques de pombal',
 'parque do monsanto',
 'estadio da luz',
 'elevador de santa justa',
 'bairro alto']

In [300]:
route = []
for i in rs.json()[0]["route_indexes"]:
    route.append(places[i])

In [302]:
route = 
route

['hotel tivoli avenida da liberdade',
 'elevador de santa justa',
 'praça do comércio',
 'baixa chiado',
 'bairro alto',
 'parque do monsanto',
 'centro comercial colombo benfica',
 'estadio da luz',
 'pcdiga benfica',
 'praça marques de pombal',
 'hotel tivoli avenida da liberdade']

In [375]:
import requests
import json
import pandas as pd
URL = 'https://maps.googleapis.com/maps/api/place/textsearch/json'

def generate_places_database(city):
    params = {'query': f'tourist spots in {city}', 'key': "AIzaSyDV93ChQ2_YJYGaLKy1elcyexcVj7ggzK4"}
    
    # Make a request to the Places API
    response = requests.get(URL, params=params)

    # Parse the JSON response
    data = json.loads(response.text)
    results = data['results']
    response = {}
    for result in results:
        name = result['name']
        address = result['formatted_address']
        lat = result['geometry']['location']['lat']
        lng = result['geometry']['location']['lng']
        response.update({name:{
            "address":address,
            "lat":lat,
            "lng":lng,
            "photo_references":[photo['photo_reference'] for photo in result["photos"] if len(photo) > 0]
                              }})
        
    df = pd.DataFrame.from_dict(response, orient='index')
    df = df.reset_index()
    df.columns = ["place_names","address","lat","lng","photo_references"]
    return df



In [383]:
t_spot = pd.read_parquet("data/touristic_spots.parquet")
t_spot

Unnamed: 0,city,place_names,address,lat,lng,photo_references
0,Lisbon,Belém Tower,"Av. Brasília, 1400-038 Lisboa, Portugal",38.691584,-9.215977,[AfLeUgOLAE8j2TALSlI7W8I4ZrxsYE6-mJa_2IkCE8CFC...
1,Lisbon,Oceanário de Lisboa,"Esplanada Dom Carlos I s/nº, 1990-005 Lisboa, ...",38.763543,-9.093742,[AfLeUgN1Ip3rQG0Hya-Y9M3oiqG0ElRut2KEUulXipHfv...
2,Lisbon,Castelo de S. Jorge,"R. de Santa Cruz do Castelo, 1100-129 Lisboa, ...",38.713909,-9.133476,[AfLeUgPY-7k2fwsevI4IuUHpohhAoFPpHI9QjbELt8L7s...
3,Lisbon,Arco da Rua Augusta,"R. Augusta 2, 1100-053 Lisboa, Portugal",38.708445,-9.136824,[AfLeUgO7b6NjeDApEyPPTXiZ_6BJmd8mNd4kA_v3ZIslD...
4,Lisbon,Padrão dos Descobrimentos,"Av. Brasília, 1400-038 Lisboa, Portugal",38.693597,-9.205711,[AfLeUgNUo7Y1gjaKmVtLHjRz9aqzuM8aN27j_obpNIYCD...
...,...,...,...,...,...,...
175,Coimbra,Chapel of São Miguel,"R. Dr. Guilherme Moreira 10, 3000-210 Coimbra,...",40.207416,-8.426452,[AfLeUgPtejx3CkMl4ZDwPEqS4HybprtnTm-joR1OWKlBQ...
176,Coimbra,Iron Gate,"R. de São Pedro 20, 3000-515 Coimbra, Portugal",40.207801,-8.425647,[AfLeUgNi5DZM00RRlttuVM4T5b3WfWcBuC0E31k10wD6R...
177,Coimbra,Jardim Botânico da Universidade de Coimbra,"CC Martim de Freitas, 3000-456 Coimbra, Portugal",40.205491,-8.420743,[AfLeUgM7gBpRpAd3CqLUAj7gYCn5xOclbeX7QjL7Sw3gE...
178,Coimbra,Sereia Garden,"Praça República, 3000-019 Coimbra, Portugal",40.209408,-8.418073,[AfLeUgMxWnC7XpmsXjddWdwgc_y7QdbDMQ_cWQZgQE_P9...


In [385]:
results

[{'business_status': 'OPERATIONAL',
  'formatted_address': 'Av. Brasília, 1400-038 Lisboa, Portugal',
  'geometry': {'location': {'lat': 38.6915837, 'lng': -9.215977299999999},
   'viewport': {'northeast': {'lat': 38.69409094999999,
     'lng': -9.211399349999999},
    'southwest': {'lat': 38.69074795, 'lng': -9.220365150000001}}},
  'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/generic_business-71.png',
  'icon_background_color': '#13B5C7',
  'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/generic_pinlet',
  'name': 'Belém Tower',
  'opening_hours': {'open_now': True},
  'photos': [{'height': 3468,
    'html_attributions': ['<a href="https://maps.google.com/maps/contrib/110421997385737949518">Luca Quattrocchi</a>'],
    'photo_reference': 'AfLeUgOvJmbQyonqC1nWnNRizGdnnUmVa2QKjQ6STNYcrxSv8YkvnCHBj6CBVUuUglxe2N5S53Qp_WzdjxB2bIq0gYJHzOZ9DbM6dXOmzGcpfOGUy2aZFIP9hmt8l65v-WFP1X4rVqouNpDCQjkh6AxyAYM1O2P9Z57KzN_mWIKpGar52xdG',
    'width': 46

In [358]:
import duckdb
import pickle

def save_to_pickle(data, file_path):
    con = duckdb.connect(':memory:')
    con.register('data', data)
    con.execute('CREATE TABLE results AS SELECT * FROM data')
    cursor = con.execute('SELECT * FROM results')
    rows = cursor.fetchall()
    results = {row[0]: {'address': row[1], 'lat': row[2], 'lng': row[3], 'photo_references': pickle.loads(row[4])} for row in rows}
    with open(file_path, 'wb') as f:
        pickle.dump(results, f)


In [377]:
d = generate_places_database("Lisbon")
d

Unnamed: 0,city,place_names,address,lat,lng,photo_references
0,Lisbon,Belém Tower,"Av. Brasília, 1400-038 Lisboa, Portugal",38.691584,-9.215977,[AfLeUgOW83jzI0EJYbyH3B9_MS0rjnyYblKyOj61nDUwr...
1,Lisbon,Arco da Rua Augusta,"R. Augusta 2, 1100-053 Lisboa, Portugal",38.708445,-9.136824,[AfLeUgP94haBzZ1TcQZB2Ih5UPpRUjwm0MtTpqfv2Izem...
2,Lisbon,Castelo de S. Jorge,"R. de Santa Cruz do Castelo, 1100-129 Lisboa, ...",38.713909,-9.133476,[AfLeUgMMk9BSB59-BaVU2cOF7rLvmuxfCtZQ2gIVsgL9j...
3,Lisbon,Oceanário de Lisboa,"Esplanada Dom Carlos I s/nº, 1990-005 Lisboa, ...",38.763543,-9.093742,[AfLeUgNwbwF4jaftNEVkF-BeQ6sGSEtqfW6UW0YPNTUxa...
4,Lisbon,Aqueduto das Águas Livres,"CC da Quintinha 6, 1070-225 Lisboa, Portugal",38.726667,-9.166481,[AfLeUgMSaSu1BFi0lToVrGoaZUY-KL5AyMcOLlKFJSfRv...
5,Lisbon,Padrão dos Descobrimentos,"Av. Brasília, 1400-038 Lisboa, Portugal",38.693597,-9.205711,[AfLeUgOFk9sgKqMgI8fxj_gBFllnOCtHahRExITpRoAKX...
6,Lisbon,Palace Fronteira,"Largo São Domingos de Benfica 01, 1500-554 Lis...",38.74013,-9.180357,[AfLeUgO8_R06KC1nJYItJh1Z85OOJViLZq8-hp65gGsNP...
7,Lisbon,Museu Nacional de Arte Antiga,"R. das Janelas Verdes, 1249-017 Lisboa, Portugal",38.704841,-9.161464,[AfLeUgNHHMZCaPh1j-tnFM1Aoghsg-j6qWFYpnjckPufN...
8,Lisbon,Jerónimos Monastery,"Praça do Império 1400-206 Lisboa, Portugal",38.697891,-9.206704,[AfLeUgOoMR50K0vmn1St1tflOfUs7Twr-enweic_mPXBI...
9,Lisbon,Miradouro de São Pedro de Alcântara,"R. de São Pedro de Alcântara, 1200-470 Lisboa,...",38.71523,-9.144171,[AfLeUgOpLx9xRqUtz7CClhmsqLKW0q7Ce5bblYu9XH0g5...


In [363]:
import pandas as pd

In [367]:
d

{'Belém Tower': {'address': 'Av. Brasília, 1400-038 Lisboa, Portugal',
  'lat': 38.6915837,
  'lng': -9.215977299999999,
  'photo_references': ['AfLeUgN36iHmIQ-ywfqU8jBI9hYvK5WZZ4yxRX3HqJHZu03k62h5rGSIegT-wmUTNYwIqCvaGcOeP7wvQAr9pZ_dubaOnLQ1pgm4c89hMUzA5ltjnES2RBS3cZOpAUu0W-sqknAbEdnDCIzy986OqXn1TSCXChHHDiqNVvihOWIsL-ktsD3l']},
 'Arco da Rua Augusta': {'address': 'R. Augusta 2, 1100-053 Lisboa, Portugal',
  'lat': 38.7084447,
  'lng': -9.136824299999999,
  'photo_references': ['AfLeUgOuO45GJXhvMBbv7WGitENqjw-BjlBFmZNfW69pfuQSEdRb3Ar323qq_uQv3QshecNwHMzg2tFDXj5MXWgF37TFH4XRlWbroIqBGaAKjkcQUGODhsZ9YbVNhhCtZgkwBpBQlDbEjiAlYCQEe-p7QtdmEzdIhNihyQsWPL0tRUBLCW7t']},
 'Castelo de S. Jorge': {'address': 'R. de Santa Cruz do Castelo, 1100-129 Lisboa, Portugal',
  'lat': 38.7139092,
  'lng': -9.1334762,
  'photo_references': ['AfLeUgMoroS0wkNL7ceyPAJ9vgjsm_mGpMMqIX0YpP2qz2K8yXRGN7LVrBl3xdIpJdXQZ-D87I2_ox7IFnoycpmlYEKr0B2Ib22xuEpC5FgoNgIIn76KwT03_V7WXk5ttPZmFMr-K35Ff669hu8mPsDOfhmJ-i-8luvjFLxm_lKL

In [372]:
df

Unnamed: 0,place_names,address,lat,lng,photo_references
0,Belém Tower,"Av. Brasília, 1400-038 Lisboa, Portugal",38.691584,-9.215977,[AfLeUgN36iHmIQ-ywfqU8jBI9hYvK5WZZ4yxRX3HqJHZu...
1,Arco da Rua Augusta,"R. Augusta 2, 1100-053 Lisboa, Portugal",38.708445,-9.136824,[AfLeUgOuO45GJXhvMBbv7WGitENqjw-BjlBFmZNfW69pf...
2,Castelo de S. Jorge,"R. de Santa Cruz do Castelo, 1100-129 Lisboa, ...",38.713909,-9.133476,[AfLeUgMoroS0wkNL7ceyPAJ9vgjsm_mGpMMqIX0YpP2qz...
3,Oceanário de Lisboa,"Esplanada Dom Carlos I s/nº, 1990-005 Lisboa, ...",38.763543,-9.093742,[AfLeUgNQkyuGwdA9fr3klyYqfRXLCB5YsjqGDqB6i7_P-...
4,Aqueduto das Águas Livres,"CC da Quintinha 6, 1070-225 Lisboa, Portugal",38.726667,-9.166481,[AfLeUgMZEy1sWYTNqyLXIDYstgivx9N1Z5dpgD1y_YLAP...
5,Padrão dos Descobrimentos,"Av. Brasília, 1400-038 Lisboa, Portugal",38.693597,-9.205711,[AfLeUgOmV2ewPC0MGi0QQE4nieZg9p4FTR14Rc4Qnr8SJ...
6,Palace Fronteira,"Largo São Domingos de Benfica 01, 1500-554 Lis...",38.74013,-9.180357,[AfLeUgOnhTObU1Sfu_h0vbY0Bx_SDzdpZgn1gS_78ipMg...
7,Museu Nacional de Arte Antiga,"R. das Janelas Verdes, 1249-017 Lisboa, Portugal",38.704841,-9.161464,[AfLeUgPcjbutHHd5FKsc2_tblhChsnPiXE-ARRyoyLw40...
8,Jerónimos Monastery,"Praça do Império 1400-206 Lisboa, Portugal",38.697891,-9.206704,[AfLeUgObxqgdSzvqLXLey27ngdiqQWg2tFq3wL7hPVkE9...
9,Miradouro de São Pedro de Alcântara,"R. de São Pedro de Alcântara, 1200-470 Lisboa,...",38.71523,-9.144171,[AfLeUgPXfQvrE1WCYMTe1gpqC2aH1GSko45cV_ccXa8cc...


In [326]:
import requests

# Set up the API request parameters
api_key = "AIzaSyDV93ChQ2_YJYGaLKy1elcyexcVj7ggzK4"
place_id = "ChIJ9QG0S3EzGQ0RwqIa7mEIsKk"
photo_reference = "AfLeUgPKWqJfmh1y1INhKMRT3A6vx4pq9UuDpupJ5kQpyYNg6j51f240O4GdSDEnpTEKdfkc4ABtbftrWxT26uWDgLbty4EheODjVIlI1djPsIfOfX6IWLoshrJsMXvNhcMxNH65wzxNIcCN8wv3SHCi8ODggiLhCMgdN2nWO3v3WWo907C3"
max_width = 400

# Make the API request to get the photo
photo_url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth={max_width}&photoreference={photo_reference}&key={api_key}"
response = requests.get(photo_url)

# Save the photo to a file
with open("photo.jpg", "wb") as f:
    f.write(response.content)


In [387]:
!pip install geopy

Collecting geopy
  Downloading geopy-2.3.0-py3-none-any.whl (119 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.3/40.3 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.3.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [397]:
from geopy.geocoders import Nominatim, options
from sklearn.metrics.pairwise import pairwise_distances
import pandas as pd

# Set the default user agent for all geocoder instances
options.default_user_agent = "my-application"

# Define the three places in Lisbon
places = ["baixa chiado", "praça marques de pombal", "praça do comercio"]

# Create a geolocator object
geolocator = Nominatim(user_agent="aaa")

# Locate the coordinates of each place
locations = [geolocator.geocode(place + ", Lisbon") for place in places]
coordinates = [(location.latitude, location.longitude) for location in locations]

# Calculate the distance matrix using the Euclidean distance metric
distances = pairwise_distances(coordinates, metric="euclidean")

# Convert the distance matrix to a pandas DataFrame with named rows and columns
df = pd.DataFrame(distances, columns=places, index=places)

# Convert to meters
df = df*100000


                         baixa chiado  praça marques de pombal  \
baixa chiado                 0.000000                 0.017552   
praça marques de pombal      0.017552                 0.000000   
praça do comercio            0.004269                 0.021443   

                         praça do comercio  
baixa chiado                      0.004269  
praça marques de pombal           0.021443  
praça do comercio                 0.000000  


In [422]:
t_spot.to_parquet("/Users/rennanaraujo/routing_app/data/country_tspots/portugal.parquet")

In [434]:
for city in t_spot.city.unique():
    temp_df = t_spot.loc[t_spot.city == city]
    temp_df.reset_index(inplace=True,drop=True)
    city_df = pd.DataFrame(pairwise_distances([(temp_df.lat[i],temp_df.lng[i]) for i in range(len(temp_df))],metric="euclidean"), columns=temp_df.place_names, index=temp_df.place_names)
    city_df = city_df * 100000
    city_df.to_parquet(f"/Users/rennanaraujo/routing_app/data/distance_matrices/portugal/{city}.parquet")

In [427]:
# Calculate the distance matrix using Euclidean distance
distances = pairwise_distances([(t_spot.lat[i],t_spot.lng[i]) for i in range(len(t_spot))], metric='euclidean')

# Rename the columns to include a unique identifier
place_names = [f'{name}_{i}' for i, name in enumerate(t_spot.place_names)]
lisbon = pd.DataFrame(distances, columns=place_names, index=place_names)

# Multiply the distances by 100000 (optional)
lisbon *= 100000
lisbon

Unnamed: 0,Belém Tower_0,Oceanário de Lisboa_1,Castelo de S. Jorge_2,Arco da Rua Augusta_3,Padrão dos Descobrimentos_4,Jerónimos Monastery_5,Museu Nacional de Arte Antiga_6,Miradouro de Santa Luzia_7,Praça do Comércio_8,Lisbon Cathedral_9,...,Barbican Gate_170,Paço das Escolas_171,Dom Dinis_172,Mosteiro de Santa Clara-a-Velha_173,Manga Cloister_174,Chapel of São Miguel_175,Iron Gate_176,Jardim Botânico da Universidade de Coimbra_177,Sereia Garden_178,Torre de Anto_179
Belém Tower_0,0.000000,14184.429357,8546.847051,8092.892394,1046.141646,1121.502203,5610.194716,8810.626583,8114.905543,8537.610401,...,170932.743111,170935.776842,171118.341033,170176.119356,171191.542303,170912.219299,170983.493355,171006.225176,171477.149676,171045.836565
Oceanário de Lisboa_1,14184.429357,0.000000,6357.995062,6994.287253,13202.178530,13065.514801,8962.330983,6338.156512,7058.125613,6624.720653,...,159095.108089,159083.088917,159254.542945,158345.141045,159354.375352,159061.118423,159129.780991,159126.979917,159594.845687,159207.653126
Castelo de S. Jorge_2,8546.847051,6357.995062,0.000000,640.862965,7503.673665,7495.920212,2942.044129,395.624394,722.723868,412.807988,...,165271.993472,165262.917053,165436.564735,164520.585368,165531.214765,165240.632931,165309.817175,165311.974172,165780.511166,165384.665967
Arco da Rua Augusta_3,8092.892394,6994.287253,640.862965,0.000000,7046.908266,7067.206797,2490.210457,738.151782,125.075196,447.580401,...,165909.048078,165900.104467,166073.849955,165157.576048,166168.266757,165877.806298,165947.013835,165949.391459,166417.957148,166021.726120
Padrão dos Descobrimentos_4,1046.141646,13202.178530,7503.673665,7046.908266,0.000000,440.679693,4565.347736,7765.280399,7068.764496,7491.764468,...,170283.260813,170284.766396,170466.219613,169527.234328,170542.133956,170261.367700,170332.383671,170352.519972,170823.180233,170396.310187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Chapel of São Miguel_175,170912.219299,159061.118423,165240.632931,165877.806298,170261.367700,169925.276539,167271.486282,165300.868689,165962.999854,165566.969207,...,285.813941,40.572510,335.178030,840.909138,410.861983,0.000000,89.179895,602.490341,861.258769,306.885012
Iron Gate_176,170983.493355,159129.780991,165309.817175,165947.013835,170332.383671,169996.382801,167341.385082,165369.919630,166032.176340,165636.067359,...,342.520312,51.959390,250.180944,929.282380,411.485199,89.179895,0.000000,542.096601,774.309367,335.787152
Jardim Botânico da Universidade de Coimbra_177,171006.225176,159126.979917,165311.974172,165949.391459,170352.519972,170017.428455,167350.466582,165370.794246,166034.254353,165637.394020,...,883.362505,566.507812,349.558328,1300.022901,915.930327,602.490341,542.096601,0.000000,474.030661,872.588323
Sereia Garden_178,171477.149676,159594.845687,165780.511166,166417.957148,170823.180233,170488.182833,167819.881873,165839.159898,166502.780290,166105.820135,...,1082.530029,820.874672,526.820696,1675.599619,994.329122,861.258769,774.309367,474.030661,0.000000,1022.493424


In [428]:
lisbon = pd.DataFrame(pairwise_distances([(t_spot.lat[i],t_spot.lng[i]) for i in range(len(t_spot))],metric="euclidean"), columns=t_spot.place_names, index=t_spot.place_names)
lisbon = lisbon *100000
#lisbon.to_parquet("/Users/rennanaraujo/routing_app/data/distance_matrices/portugal.parquet")

Index(['Belém Tower', 'Oceanário de Lisboa', 'Castelo de S. Jorge',
       'Arco da Rua Augusta', 'Padrão dos Descobrimentos',
       'Jerónimos Monastery', 'Museu Nacional de Arte Antiga',
       'Miradouro de Santa Luzia', 'Praça do Comércio', 'Lisbon Cathedral',
       ...
       'Barbican Gate', 'Paço das Escolas', 'Dom Dinis',
       'Mosteiro de Santa Clara-a-Velha', 'Manga Cloister',
       'Chapel of São Miguel', 'Iron Gate',
       'Jardim Botânico da Universidade de Coimbra', 'Sereia Garden',
       'Torre de Anto'],
      dtype='object', name='place_names', length=180)

In [418]:
!pip install sqlite

[31mERROR: Could not find a version that satisfies the requirement sqlite (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for sqlite[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [407]:
coordinates

[(38.7101063, -9.140142293300311),
 (38.7252743, -9.1489735),
 (38.70779335, -9.1365543153284)]

In [394]:
locations[0].address

'Baixa-Chiado, Rua do Crucifixo, Chiado, Santa Maria Maior, Lisboa, 1100-184, Portugal'

In [395]:
locations

[Location(Baixa-Chiado, Rua do Crucifixo, Chiado, Santa Maria Maior, Lisboa, 1100-184, Portugal, (38.7101063, -9.140142293300311, 0.0)),
 Location(Praça Marquês de Pombal, Coração de Jesus, Santo António, Lisboa, 1050-044, Portugal, (38.7252743, -9.1489735, 0.0)),
 Location(Praça do Comércio, São Nicolau, Santa Maria Maior, Lisboa, 1100-148, Portugal, (38.70779335, -9.1365543153284, 0.0))]

Unnamed: 0,baixa chiado,praça marques de pombal,praça do comercio
baixa chiado,0.0,1755.159354,426.88785
praça marques de pombal,1755.159354,0.0,2144.340832
praça do comercio,426.88785,2144.340832,0.0


# Using ChatGPT to get coordinates

In [15]:
import openai
import numpy as np
import pandas as pd
import time
from tqdm import tqdm
import backoff
import json
import re

key = "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"
openai.api_key = key


In [2]:
import json

# Open the JSON file
with open('data/europe_touristic_cities.json') as f:
    touristic_cities = json.load(f)

In [3]:
import backoff
@backoff.on_exception(backoff.expo, openai.error.RateLimitError)
def generate(city):
    completion = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[
            {"role": "system","content": "You are a tour guide assistant. You should send only one JSON as a response, no additional text should be sent. The JSON should be valid in Python. "},
            {"role": "user", "content": f"Create a JSON with the 30 most touristy places in {city}. The JSON should also include the postal code, latitude, longitude and the english translation of the name of each place."}
        ]
    )
    return completion

In [4]:
import backoff
@backoff.on_exception(backoff.expo, openai.error.RateLimitError)
def fix_json(json):
    fixed = """{
    "titre": "Assistant RH",
    "competences_techniques": [
    "Gestion administrative du personnel",
    "Connaissances du droit du travail",
    "Connaissance des processus de paie",
    "Maitrise des outils de paie",
    "Connaissance des procédures disciplinaires",
    "Maitrise des outils informatiques de gestion RH"
    ],
    "competences_non_techniques": [
    "Organisation",
    "Rigueur",
    "Discrétion",
    "Capacité d'analyse",
    "Sens de la communication",
    "Esprit d'équipe",
    "Capacité à travailler en environnement multiculturel"
    ],
    "salaire": null,
    "experience": 24
    }"""
    
    completion = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=[
            {"role": "system","content": "You are JSON fixer assistant. You should fix the json returning the a valid python json in your message and nothing else."},
            {"role": "user", "content": """fix this json: {\n    "titre": "Assistant RH",\n    "competences_techniques": [\n        "Gestion administrative du personnel",\n        "Connaissances du droit du travail",\n        "Connaissance des processus de paie",\n        "Maitrise des outils de paie",\n        "Connaissance des procédures disciplinaires",\n        "Maitrise des outils informatiques de gestion RH",\n    ],\n    "competences_non_techniques": [\n        "Organisation",\n        "Rigueur",\n        "Discrétion",\n        "Capacité d\'analyse",\n        "Sens de la communication",\n        "Esprit d\'équipe",\n        "Capacité à travailler en environnement multiculturel"\n    ],\n    "salaire": null,\n    "experience": 24\n}"""},
            {"role": "assistant", "content": fixed},
            {"role": "user", "content": f"""fix this json: {json}"""}
        ]
    )
    return completion

In [5]:
def extract_json(content):
    """
    Extracts the first JSON string from a response dictionary.

    Args:
        response (dict): A dictionary representing a response to some query.

    Returns:
        str or None: The first JSON string found in the response, or None if none is found.
    """
    #match = re.search(r'\{(?:[^{}]*(?:\{[^{}]*\})?[^{}]*,?)*\}', content)
    #match = re.search(r'\{(?:[^{}]|(?0))*\}', content)
    pattern = regex.compile(r'\{(?:[^{}]|(?R))*\}')

    match = pattern.search(content)

    if match:
        return match.group(0)
    else:
        return None

def fix_json_result(response):
    content = response["choices"][0]["message"]["content"]
    try:
        json_str = extract_json(content)
        if json_str is not None:
            rs = json.loads(json_str)
        else:
            rs = None
    except ValueError:
        try:
            print("trying to fix")
            fixed = fix_json(json_str)
            json_str = extract_json(fixed)
            if json_str is not None:
                rs = json.loads(json_str)
            else:
                rs = None
        except ValueError:
            rs = None
    return rs

In [6]:
df = pd.DataFrame(touristic_cities)
df.head()

Unnamed: 0,Albania,Andorra,Austria,Belarus,Belgium,Bosnia and Herzegovina,Bulgaria,Croatia,Cyprus,Czech Republic,...,Russia,San Marino,Serbia,Slovakia,Slovenia,Spain,Sweden,Switzerland,Ukraine,United Kingdom
0,Tirana,Andorra la Vella,Vienna,Minsk,Brussels,Sarajevo,Sofia,Dubrovnik,Nicosia,Prague,...,Moscow,San Marino,Belgrade,Bratislava,Ljubljana,Barcelona,Stockholm,Zurich,Kiev,London
1,Berat,Encamp,Salzburg,Brest,Bruges,Mostar,Plovdiv,Split,Limassol,Cesky Krumlov,...,St. Petersburg,Serravalle,Novi Sad,Kosice,Bled,Madrid,Gothenburg,Geneva,Lviv,Edinburgh
2,Gjirokastra,Ordino,Innsbruck,Gomel,Antwerp,Medjugorje,Varna,Zagreb,Paphos,Karlovy Vary,...,Novosibirsk,Borgo Maggiore,Nis,Banska Bystrica,Maribor,Seville,Malmö,Bern,Odessa,Bath
3,Kruja,La Massana,Graz,Vitebsk,Ghent,Bihac,Burgas,Pula,Ayia Napa,Brno,...,Yekaterinburg,Domagnano,Kragujevac,Nitra,Piran,Valencia,Uppsala,Lucerne,Kharkiv,Oxford
4,Butrint,Canillo,Hallstatt,Grodno,Mechelen,Jajce,Ruse,Rovinj,Larnaca,Olomouc,...,Kazan,Faetano,Subotica,Presov,Ptuj,Granada,Lund,Interlaken,Dnipro,Cambridge


In [9]:
import os
import regex
def check_directory(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

In [510]:
check_directory("data/coordinates/Portugal")

In [17]:
results = {}
for country in df.columns:
    print(country)
    check_directory(f"data/coordinates/{country}")
    for i in tqdm(range(len(df))):
        city = df.loc[i,country]
        file_path = os.path.join(f"data/coordinates/{country}", f"{city}.parquet")
        if not os.path.isfile(file_path):     
            response = generate(city)
            try:
                temp_df = pd.DataFrame.from_records(fix_json_result(response))
                temp_df.to_parquet(f"data/coordinates/{country}/{city}.parquet")
            except Exception as e:
                print(e)

Albania


100%|███████████████████████████████████████| 10/10 [00:00<00:00, 7385.64it/s]


Andorra


100%|██████████████████████████████████████| 10/10 [00:00<00:00, 22121.86it/s]


Austria


100%|██████████████████████████████████████| 10/10 [00:00<00:00, 22168.63it/s]


Belarus


 90%|█████████████████████████████████████▊    | 9/10 [00:05<00:00,  1.62it/s]


APIError: Internal server error {
    "error": {
        "message": "Internal server error",
        "type": "auth_subrequest_error",
        "param": null,
        "code": "internal_error"
    }
}
 500 {'error': {'message': 'Internal server error', 'type': 'auth_subrequest_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Fri, 10 Mar 2023 17:18:12 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '166', 'Connection': 'keep-alive', 'Vary': 'Origin', 'X-Request-Id': '1ff302b96bb242978be9f1ba1705bc28', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains'}

In [14]:
df = pd.read_parquet("data/touristic_spots.parquet")
df['id'] = range(1000001, 1000001 + len(df))
df.to_parquet("data/touristic_spots.parquet")

In [15]:
df

Unnamed: 0,city,country,place_names,address,lat,lng,photo_references,id
0,Lisbon,Portugal,Belém Tower,"Av. Brasília, 1400-038 Lisboa, Portugal",38.691584,-9.215977,[AfLeUgNIQTLp4BihQkjBE4OKFUuUzrgORnifvFmGvb322...,1000001
1,Lisbon,Portugal,Oceanário de Lisboa,"Esplanada Dom Carlos I s/nº, 1990-005 Lisboa, ...",38.763543,-9.093742,[AfLeUgOQwSZjRi2Rj8QGqxNFn6XT9vVBp9FhZ1QWBCzjE...,1000002
2,Lisbon,Portugal,Castelo de S. Jorge,"R. de Santa Cruz do Castelo, 1100-129 Lisboa, ...",38.713909,-9.133476,[AfLeUgPdI01KB6gX572L3p8kFxvkT_g_8QqGbF9cHHTGD...,1000003
3,Lisbon,Portugal,Arco da Rua Augusta,"R. Augusta 2, 1100-053 Lisboa, Portugal",38.708445,-9.136824,[AfLeUgNkdoxjQjehTK9L0N9eD_gJCViGY6-DqIOZyBZST...,1000004
4,Lisbon,Portugal,Padrão dos Descobrimentos,"Av. Brasília, 1400-038 Lisboa, Portugal",38.693597,-9.205711,[AfLeUgOsCMdtzk0JzNfS8GUKQSX9g2vb3co_mzMnOHICv...,1000005
...,...,...,...,...,...,...,...,...
1595,Nafplio,Greece,The Statue of Otto,"Vasileos Konstantinou 28, Nafplio 211 00, Greece",37.565815,22.799406,[AfLeUgOI1xTf688v0rGSvQLxAVrljenqHLcfYATAJKyGU...,1001596
1596,Nafplio,Greece,Πλατεία Τριών Ναυάρχων,"Nafplion 211 00, Greece",37.565880,22.799660,[AfLeUgOsr7h3C3kujPjjqFVWJL2wXnyt3LYxoWivIEofd...,1001597
1597,Nafplio,Greece,"Nauplia History and Tour at Nafplion, Greece (...","HQ8V+HHX, Nafplio, Greece",37.566488,22.793984,[],1001598
1598,Nafplio,Greece,Kolokotronis Park,"Dervenakion 3, Nafplio 211 00, Greece",37.565927,22.802196,[AfLeUgPoyW6u4JVHGq7-vuK9Jf5iiNMBC6IsipIjI-vik...,1001599


In [84]:
geneva = df.loc[df.city == "Geneva"]

In [35]:
import math

def calculate_haversine(lat1, lon1, lat2, lon2):
    r = 6371 # raio médio da Terra em km
    d_lat = math.radians(lat2 - lat1)
    d_lon = math.radians(lon2 - lon1)
    a = math.sin(d_lat / 2)**2 + \
        math.cos(math.radians(lat1)) * \
        math.cos(math.radians(lat2)) * \
        math.sin(d_lon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return r * c * 1000 # retorna a distância em metros


In [36]:
def gen_dist_matrix(df):
    distances = {}

    for nome1 in df['place_names']:
        # cria uma lista vazia para armazenar as distâncias
        distances[nome1] = []
        # itera novamente sobre cada lugar
        for nome2 in df['place_names']:
            # calcula a distância haversine entre os lugares
            distance = calculate_haversine(df.loc[df['place_names'] == nome1, 'lat'].iloc[0],
                                           df.loc[df['place_names'] == nome1, 'lng'].iloc[0],
                                           df.loc[df['place_names'] == nome2, 'lat'].iloc[0],
                                           df.loc[df['place_names'] == nome2, 'lng'].iloc[0])
            # adiciona a distância à lista
            distances[nome1].append(distance)

    # cria um dataframe com as distâncias
    distances_df = pd.DataFrame(distances, index=df['place_names'])
    return distances_df

In [37]:
distances_df = gen_dist_matrix(geneva)

In [38]:
# Cria um dicionário para armazenar as distâncias
distances = {}

# Itera sobre cada lugar
for i, row in geneva.iterrows():
    origem = (row['lat'], row['lng'])
    distances[row['place_names']] = []
    # Itera novamente sobre cada lugar
    for j, row2 in geneva.iterrows():
        destino = (row2['lat'], row2['lng'])
        # Chama a API Directions do Google Maps para obter a distância em metros
        directions_result = gmaps.directions(origem, destino, mode="driving")
        distance = directions_result[0]['legs'][0]['distance']['value']
        distances[row['place_names']].append(distance)

# Cria um dataframe com as distâncias
distances_df = pd.DataFrame(distances, index=df['place_names'])

# Salva o dataframe em um arquivo Parquet
distances_df.to_parquet('distances.parquet')

Unnamed: 0_level_0,St Pierre Cathedral,The Geneva Water Fountain,Brunswick Monument,The Flower Clock,Place du Bourg-de-Four,Palais des Nations,Parc de La Grange,Jardin Anglais,Villa La Grange,Patek Philippe Museum,Phare des Pâquis,Geneva Botanical Garden and Greenhouse,Musée Ariana,International Museum of the Red Cross and Red Crescent,Statue de Gandhi,Promenade de la Treille,Tavel House,Free Walk Geneva,Monument à l'impératrice Sissi,Musée d'Art et d'Histoire
place_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
St Pierre Cathedral,0.0,898.904516,816.438832,390.396229,109.84622,2894.119573,1578.690723,419.68056,1469.644482,847.957057,1183.254198,2943.105465,2803.839232,3059.964605,2808.159878,190.277094,112.773052,590.439269,910.588529,309.176236
The Geneva Water Fountain,898.904516,0.0,545.7185,518.97761,946.871994,2439.815992,895.742325,479.291623,800.752838,1687.164339,303.20139,2349.706479,2399.044564,2654.769217,2438.582094,1072.677631,949.377214,1010.581405,448.31462,950.135793
Brunswick Monument,816.438832,545.7185,0.0,499.280978,912.777309,2115.600583,1439.207461,536.618968,1346.032122,1389.938972,638.743621,2131.005809,2042.868394,2301.555059,2062.184513,918.326827,798.101045,587.445232,153.030731,1031.149376
The Flower Clock,390.396229,518.97761,499.280978,0.0,459.314077,2614.881312,1295.570579,71.820808,1188.319091,1173.825959,794.417212,2618.937863,2540.897806,2799.461201,2557.839909,555.089791,430.685555,602.817107,554.868098,538.268892
Place du Bourg-de-Four,109.84622,946.871994,912.777309,459.314077,0.0,3000.060282,1571.852632,474.354615,1463.202487,868.727617,1239.644969,3042.627628,2911.164475,3167.563489,2916.340252,225.633442,204.774914,696.928322,998.857586,212.302757
Palais des Nations,2894.119573,2439.815992,2115.600583,2614.881312,3000.060282,0.0,3045.306784,2649.267133,3007.164823,3131.511962,2235.54888,505.00259,177.463587,257.534943,283.204404,2939.545381,2842.578605,2407.709942,2085.862553,3142.045655
Parc de La Grange,1578.690723,895.742325,1439.207461,1295.570579,1571.852632,3045.306784,0.0,1229.026823,109.057374,2426.400965,895.616102,2826.119418,3046.111587,3286.82103,3108.611176,1768.870309,1668.72469,1873.356803,1325.349535,1457.077647
Jardin Anglais,419.68056,479.291623,536.618968,71.820808,474.354615,2649.267133,1229.026823,0.0,1121.367133,1227.382063,766.142148,2640.642886,2579.15413,2837.880665,2598.750561,595.354465,474.682958,674.61672,574.466144,524.139573
Villa La Grange,1469.644482,800.752838,1346.032122,1188.319091,1463.202487,3007.164823,109.057374,1121.367133,0.0,2317.344225,825.333018,2804.459458,3002.044574,3245.663946,3061.371519,1659.830591,1559.713236,1768.959306,1236.906757,1350.402755
Patek Philippe Museum,847.957057,1687.164339,1389.938972,1173.825959,868.727617,3131.511962,2426.400965,1227.382063,2317.344225,0.0,1935.069245,3307.713906,3002.752058,3239.316835,2974.298979,658.178957,760.304276,825.069409,1532.847189,1038.241661


In [106]:
def gen_dist_matrix(df, client):
    # Cria um dicionário para armazenar as distâncias
    distances = {}

    # Itera sobre cada lugar
    for i, row in df.iterrows():
        origem = (row['lat'], row['lng'])
        distances[row['place_names']] = {}
        # Itera novamente sobre cada lugar
        for j, row2 in df.iterrows():
            destino = (row2['lat'], row2['lng'])
            # Chama a API Directions do Google Maps para obter a distância em metros
            directions_result = client.directions(origem, destino, mode="driving")
            if len(directions_result) > 0:
                distance = directions_result[0]['legs'][0]['distance']['value']
                distances[row['place_names']][str(row2['id'])] = distance
    # Cria um dataframe com as distâncias
    distances_df = pd.DataFrame(distances)
    distances_df.columns = distances_df.index
    return distances_df


In [95]:
distances_df = gen_dist_matrix(geneva,client)

In [96]:
distances_df

Unnamed: 0,1001301,1001302,1001303,1001304,1001305,1001306,1001307,1001308,1001309,1001310,1001311,1001312,1001313,1001314,1001315,1001316,1001317,1001318,1001319,1001320
1001301,0,1624,2118,1841,129,5259,3072,1434,2382,1819,2555,4923,5286,5865,5120,1334,1434,1581,2258,1395
1001302,2359,0,1193,916,2488,4334,1780,509,724,2389,1630,3998,4361,4940,4195,2196,2296,1994,1333,1196
1001303,2852,1174,0,560,2981,3134,2729,660,1932,2964,430,2798,3161,3740,2995,2689,2789,1249,133,1771
1001304,2298,620,681,0,2427,3822,2175,106,1378,2410,1118,3486,3849,4428,3683,2135,2235,1482,821,1217
1001305,1368,1495,1989,1712,0,5130,2943,1305,2253,1690,2426,4794,5157,5736,4991,1205,1305,1452,2129,1266
1001306,4435,4253,3072,3639,4564,0,5808,3739,5011,5014,2642,2405,156,1019,579,4272,4372,3333,2929,4850
1001307,4010,1617,2844,2567,4139,5985,0,2160,1482,4040,3281,5649,6012,6591,5846,3847,3947,3645,2984,2847
1001308,2187,509,679,402,2316,3820,2064,0,1267,2299,1116,3484,3847,4426,3681,2024,2124,1480,819,1106
1001309,3081,688,1915,1638,3210,5056,1034,1231,0,3111,2352,4720,5083,5662,4917,2918,3018,2716,2055,1918
1001310,1231,2327,2007,2211,1360,4437,3651,2311,3192,0,2396,4101,4464,4648,3902,1068,1168,1413,2098,1745


In [7]:
df.loc[df.city == "Lisbon"]

Unnamed: 0,city,country,place_names,address,lat,lng,photo_references
0,Lisbon,Portugal,Belém Tower,"Av. Brasília, 1400-038 Lisboa, Portugal",38.691584,-9.215977,[AfLeUgNIQTLp4BihQkjBE4OKFUuUzrgORnifvFmGvb322...
1,Lisbon,Portugal,Oceanário de Lisboa,"Esplanada Dom Carlos I s/nº, 1990-005 Lisboa, ...",38.763543,-9.093742,[AfLeUgOQwSZjRi2Rj8QGqxNFn6XT9vVBp9FhZ1QWBCzjE...
2,Lisbon,Portugal,Castelo de S. Jorge,"R. de Santa Cruz do Castelo, 1100-129 Lisboa, ...",38.713909,-9.133476,[AfLeUgPdI01KB6gX572L3p8kFxvkT_g_8QqGbF9cHHTGD...
3,Lisbon,Portugal,Arco da Rua Augusta,"R. Augusta 2, 1100-053 Lisboa, Portugal",38.708445,-9.136824,[AfLeUgNkdoxjQjehTK9L0N9eD_gJCViGY6-DqIOZyBZST...
4,Lisbon,Portugal,Padrão dos Descobrimentos,"Av. Brasília, 1400-038 Lisboa, Portugal",38.693597,-9.205711,[AfLeUgOsCMdtzk0JzNfS8GUKQSX9g2vb3co_mzMnOHICv...
5,Lisbon,Portugal,Museu Nacional de Arte Antiga,"R. das Janelas Verdes, 1249-017 Lisboa, Portugal",38.704841,-9.161464,[AfLeUgMOU3aXB83FhaIQWbEzDrRA72WUl-C34-DuZIEU_...
6,Lisbon,Portugal,Jerónimos Monastery,"Praça do Império 1400-206 Lisboa, Portugal",38.697891,-9.206704,[AfLeUgOaBttlGy8f820dZS611KcUIBpcxV1gFMHwKg0PK...
7,Lisbon,Portugal,Miradouro de Santa Luzia,"Largo Santa Luzia, 1100-487 Lisboa, Portugal",38.711696,-9.130197,[AfLeUgPPlXFzJlFELQHpNHEjRXuLgHUCdFaQULYsNCwdr...
8,Lisbon,Portugal,Lisbon Cathedral,"Largo da Sé, 1100-585 Lisboa, Portugal",38.709879,-9.132584,[AfLeUgPmv4HCtBdvJXUYX0gvF8P6K83eSvN_FIGKcSKL_...
9,Lisbon,Portugal,Santa Justa Lift,"R. do Ouro, 1150-060 Lisboa, Portugal",38.71213,-9.13943,[AfLeUgNGw96d5z0YPD3w9nIxxvjIfCf_dAxKERWb4yVwB...


In [107]:
# Define a chave da API do Google Cloud Platform
client = googlemaps.Client(key='AIzaSyDV93ChQ2_YJYGaLKy1elcyexcVj7ggzK4')

# Salva o dataframe em um arquivo Parquet
#distances_df.to_parquet('distances.parquet')

for country in df.country.unique():
    temp_df = df.loc[df.country == country]
    temp_df.reset_index(inplace=True,drop=True)
    for i,city in enumerate(temp_df.city.unique()):
        print(country,city)
        if not os.path.isfile(f"/Users/rennanaraujo/routing_app/data/distance_matrices/{country}/{city}.parquet"):
            city_df = temp_df.loc[temp_df.city == city]
            city_df.reset_index(inplace=True,drop=True)
            distances_df = gen_dist_matrix(city_df,client)
            directory = os.path.dirname(f"/Users/rennanaraujo/routing_app/data/distance_matrices/{country}/")
            if not os.path.exists(directory):
                os.makedirs(directory)
            distances_df.to_parquet(f"/Users/rennanaraujo/routing_app/data/distance_matrices/{country}/{city}.parquet")


Portugal Lisbon
Portugal Porto
Portugal Funchal
Portugal Coimbra
Portugal Sintra
Portugal Albufeira
Portugal Évora
Portugal Braga
Spain Barcelona
Spain Madrid
Spain Seville
Spain Valencia
Spain Granada
Spain Bilbao
Spain San Sebastian
Spain Malaga
France Paris
France Nice
France Lyon
France Cannes
France Bordeaux
France Marseille
France Toulouse
France Strasbourg
Germany Berlin
Germany Munich
Germany Frankfurt
Germany Hamburg
Germany Cologne
Germany Dresden
Germany Nuremberg
Germany Heidelberg
United Kingdom London
United Kingdom Edinburgh
United Kingdom Manchester
United Kingdom Bath
United Kingdom Oxford
United Kingdom York
United Kingdom Cambridge
United Kingdom Glasgow
Italy Rome
Italy Florence
Italy Venice
Italy Milan
Italy Naples
Italy Pisa
Italy Verona
Italy Bologna
Austria Vienna
Austria Salzburg
Austria Innsbruck
Austria Graz
Austria Linz
Austria Hallstatt
Austria Klagenfurt
Austria Zell am See
Netherlands Amsterdam
Netherlands Rotterdam
Netherlands Utrecht
Netherlands The Hag

In [42]:
import os
directory = os.path.dirname(f"/Users/rennanaraujo/routing_app/data/distance_matrices/{country}/")


In [13]:
df.loc[df.id == 1000963]

Unnamed: 0,city,country,place_names,address,lat,lng,photo_references,id
962,Vienna,Austria,Hundertwasser House,"Kegelgasse 36-38, 1030 Wien, Austria",48.207337,16.394294,[AfLeUgOcrKHq9agsG8MiFqzQ4a338yhBBTbEW-WTJxVF0...,1000963


In [9]:
import pandas as pd
t = pd.read_parquet("/Users/rennanaraujo/routing_app/data/distance_matrices/Austria/Vienna.parquet")
t

Unnamed: 0,1000961,1000962,1000963,1000964,1000965,1000966,1000967,1000968,1000969,1000970,1000971,1000972,1000973,1000974,1000975,1000976,1000977,1000978,1000979,1000980
1000961,0,2575,3341,4760,8757,6287,5108,1054,2745,2575,931,1157,954,1233,1825,1440,486,1600,1747,1397
1000962,2939,0,6303,4717,11899,6241,5065,1873,2702,0,2478,2517,2620,3399,1782,2259,2845,2419,2156,2757
1000963,4947,4058,0,2254,6251,8817,2602,3985,2557,4058,4591,4630,4733,4271,3222,4371,4958,4531,4269,4870
1000964,4887,3998,1580,0,5439,8757,799,3925,2497,3998,4531,4570,4673,4211,3162,4311,4898,4471,4209,4810
1000965,9277,8388,5970,5019,0,12773,4697,8315,6887,8388,8921,8960,9063,8601,7552,8701,9288,8861,8599,9200
1000966,6075,6534,7958,9255,13429,0,9603,5549,7240,6540,5613,5652,5756,6534,6320,5935,5981,6095,5892,5892
1000967,5023,4134,1716,251,5575,8893,0,4061,2633,4134,4667,4706,4809,4347,3298,4447,5034,4607,4345,4946
1000968,2062,2115,2881,4300,8297,5827,4648,0,2285,2115,1601,135,1743,2522,1365,366,1968,525,1287,375
1000969,3309,4423,3677,3594,6022,8016,3942,3820,0,4423,3324,3923,3720,2633,3587,4206,3944,4366,4513,4163
1000970,2939,0,6309,4717,11905,6247,5065,1873,2702,0,2478,2517,2620,3399,1782,2259,2845,2419,2156,2757


In [45]:
os.path.exists(directory)


True

In [44]:
os.makedirs(directory)