## Obtencion de dataset

In [1]:
import pandas as pd, numpy as np
import requests
import json
import time
import itertools

In [2]:
api_key = 'AIzaSyBh6lLBmeM09G5Ela3HatQTxzJe1ubdloU'
place_name = 'spain'
place_name = place_name.lower()
place_name = '+'.join(place_name.split(' '))
place_name

'spain'

In [3]:
text_search_url = 'https://maps.googleapis.com/maps/api/place/textsearch/json?query={}&key={}'
text_search_next_page_url = 'https://maps.googleapis.com/maps/api/place/textsearch/json?key={}&pagetoken={}'
distance_matrix_url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins={}&destinations={}&key={}'
place_details_url = 'https://maps.googleapis.com/maps/api/place/details/json?place_id={}&key={}'

url = 'https://maps.googleapis.com/maps/api/place/textsearch/json?query=what+to+see+{}&key={}'.format(
    place_name,
    api_key,
)

labels = ['place_id','name','latitud','longitud','rating','types', 'open_hours']
data = []

while True:
    response_text = requests.get(url).text
    # print(response_text)
    response = json.loads(response_text)
    places = response['results']
    print(len(places), 'places')
    for place in places:
        name = place['name']
        place_id = place ['place_id']
        latitud = place['geometry']['location']['lat']
        longitud = place['geometry']['location']['lng']
        rating = None
        if 'rating' in place:
            rating = place['rating']
        types = str(place['types'])
        
        details_url = place_details_url.format(
            place_id,
            api_key,
        )
        details_response = json.loads(requests.get(details_url).text)

        open_hours = {}
        try:
            open_hours_lst = details_response['result']['opening_hours']['periods']
            for open_hours_raw in open_hours_lst:
                open_hours[open_hours_raw['open']['day']] = {
                    'open': open_hours_raw['open']['time'],
                    'close': open_hours_raw['close']['time']
                }
        except:
            open_hours = None

        data.append([place_id, name, latitud, longitud, rating, types, open_hours])
    time.sleep(2)
    if 'next_page_token' not in response:
        break
    else:
        next_page_token = response['next_page_token']
        url = 'https://maps.googleapis.com/maps/api/place/textsearch/json?key='+str(api_key)+'&pagetoken='+str(next_page_token)
df = pd.DataFrame.from_records(data, columns=labels)
print(len(df.index))


20 places
20 places
20 places
60


In [4]:
df.head()

Unnamed: 0,place_id,name,latitud,longitud,rating,types,open_hours
0,ChIJD7G2bqduEg0ROdrTdOj1Jok,Plaza de España,37.377196,-5.986893,4.8,"['tourist_attraction', 'point_of_interest', 'e...","{0: {'open': '0800', 'close': '2200'}, 1: {'op..."
1,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,Alhambra,37.176078,-3.588141,4.8,"['tourist_attraction', 'museum', 'point_of_int...","{0: {'open': '0830', 'close': '2000'}, 1: {'op..."
2,ChIJk_s92NyipBIRUMnDG8Kq2Js,La Sagrada Familia,41.40363,2.174356,4.7,"['tourist_attraction', 'church', 'place_of_wor...","{0: {'open': '0900', 'close': '2000'}, 1: {'op..."
3,ChIJYUFLSe2ipBIRD04uni940kA,Casa Batlló,41.391728,2.164949,4.6,"['tourist_attraction', 'point_of_interest', 'e...","{0: {'open': '0830', 'close': '2015'}, 1: {'op..."
4,ChIJZX56134oQg0RHBgRAVQ8pkk,Plaza Mayor,40.415511,-3.707401,4.6,"['tourist_attraction', 'point_of_interest', 'e...",


## Limpieza de los datos

Eliminamos todos los NaN y todos los lugares en donde la puntuación es 0

In [5]:
df.dropna(inplace=True)
df = df[df['rating'] != 0.0]
df.head()

Unnamed: 0,place_id,name,latitud,longitud,rating,types,open_hours
0,ChIJD7G2bqduEg0ROdrTdOj1Jok,Plaza de España,37.377196,-5.986893,4.8,"['tourist_attraction', 'point_of_interest', 'e...","{0: {'open': '0800', 'close': '2200'}, 1: {'op..."
1,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,Alhambra,37.176078,-3.588141,4.8,"['tourist_attraction', 'museum', 'point_of_int...","{0: {'open': '0830', 'close': '2000'}, 1: {'op..."
2,ChIJk_s92NyipBIRUMnDG8Kq2Js,La Sagrada Familia,41.40363,2.174356,4.7,"['tourist_attraction', 'church', 'place_of_wor...","{0: {'open': '0900', 'close': '2000'}, 1: {'op..."
3,ChIJYUFLSe2ipBIRD04uni940kA,Casa Batlló,41.391728,2.164949,4.6,"['tourist_attraction', 'point_of_interest', 'e...","{0: {'open': '0830', 'close': '2015'}, 1: {'op..."
5,ChIJwamkfX4oQg0RUUjO1nnsfy4,Royal Palace of Madrid,40.417955,-3.714312,4.6,"['tourist_attraction', 'point_of_interest', 'e...","{0: {'open': '1000', 'close': '1600'}, 1: {'op..."


Se eliminan los tipo de lugares que no nos interesan, como atraccion turistica (tourist_attraction) o punto de interes (point_of_interest) para conservar unicamente la categoria real del lugar

In [6]:
import ast

non_useful_types = ['tourist_attraction', 'point_of_interest']

def clean_types(list_):
    type_list = ast.literal_eval(list_)
    type_list = [x for x in type_list if x not in non_useful_types]
    return type_list[0]

In [7]:
df_improved = df.copy()
df_improved['types'] = df_improved['types'].apply(clean_types)
df_improved.head(25)

Unnamed: 0,place_id,name,latitud,longitud,rating,types,open_hours
0,ChIJD7G2bqduEg0ROdrTdOj1Jok,Plaza de España,37.377196,-5.986893,4.8,establishment,"{0: {'open': '0800', 'close': '2200'}, 1: {'op..."
1,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,Alhambra,37.176078,-3.588141,4.8,museum,"{0: {'open': '0830', 'close': '2000'}, 1: {'op..."
2,ChIJk_s92NyipBIRUMnDG8Kq2Js,La Sagrada Familia,41.40363,2.174356,4.7,church,"{0: {'open': '0900', 'close': '2000'}, 1: {'op..."
3,ChIJYUFLSe2ipBIRD04uni940kA,Casa Batlló,41.391728,2.164949,4.6,establishment,"{0: {'open': '0830', 'close': '2015'}, 1: {'op..."
5,ChIJwamkfX4oQg0RUUjO1nnsfy4,Royal Palace of Madrid,40.417955,-3.714312,4.6,establishment,"{0: {'open': '1000', 'close': '1600'}, 1: {'op..."
7,ChIJ7aLYZp0oQg0RWoitk33wlBA,Museo Nacional del Prado,40.413782,-3.692127,4.7,museum,"{0: {'open': '1000', 'close': '1900'}, 1: {'op..."
8,ChIJq0HUUq6ipBIRWM6qGqALmok,Park Güell,41.414495,2.152694,4.4,park,"{0: {'open': '0930', 'close': '1930'}, 1: {'op..."
9,ChIJS6JBjBlsEg0Rh_7Brr92qbo,Royal Alcázar of Seville,37.383052,-5.990226,4.7,park,"{0: {'open': '0930', 'close': '1700'}, 1: {'op..."
10,ChIJv-yiGoMoQg0Rj1LLgnhKk1o,Thyssen-Bornemisza Museum,40.416041,-3.694925,4.6,museum,"{0: {'open': '1000', 'close': '1900'}, 1: {'op..."
12,ChIJvQc62ygmQg0Rcb-6WdEUmDA,Museo Nacional Centro de Arte Reina Sofía,40.407912,-3.694557,4.5,museum,"{0: {'open': '1000', 'close': '1430'}, 1: {'op..."


Eliminemos tambien las agencias de viaje y super mercados pues no son lugares en que los viajeros suelan estar interesados

In [8]:
uninteresting_places = ['supermarket', 'travel_agency', 'airport']
df_improved = df_improved[~df_improved['types'].isin(uninteresting_places)]
df_improved.head(25)

Unnamed: 0,place_id,name,latitud,longitud,rating,types,open_hours
0,ChIJD7G2bqduEg0ROdrTdOj1Jok,Plaza de España,37.377196,-5.986893,4.8,establishment,"{0: {'open': '0800', 'close': '2200'}, 1: {'op..."
1,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,Alhambra,37.176078,-3.588141,4.8,museum,"{0: {'open': '0830', 'close': '2000'}, 1: {'op..."
2,ChIJk_s92NyipBIRUMnDG8Kq2Js,La Sagrada Familia,41.40363,2.174356,4.7,church,"{0: {'open': '0900', 'close': '2000'}, 1: {'op..."
3,ChIJYUFLSe2ipBIRD04uni940kA,Casa Batlló,41.391728,2.164949,4.6,establishment,"{0: {'open': '0830', 'close': '2015'}, 1: {'op..."
5,ChIJwamkfX4oQg0RUUjO1nnsfy4,Royal Palace of Madrid,40.417955,-3.714312,4.6,establishment,"{0: {'open': '1000', 'close': '1600'}, 1: {'op..."
7,ChIJ7aLYZp0oQg0RWoitk33wlBA,Museo Nacional del Prado,40.413782,-3.692127,4.7,museum,"{0: {'open': '1000', 'close': '1900'}, 1: {'op..."
8,ChIJq0HUUq6ipBIRWM6qGqALmok,Park Güell,41.414495,2.152694,4.4,park,"{0: {'open': '0930', 'close': '1930'}, 1: {'op..."
9,ChIJS6JBjBlsEg0Rh_7Brr92qbo,Royal Alcázar of Seville,37.383052,-5.990226,4.7,park,"{0: {'open': '0930', 'close': '1700'}, 1: {'op..."
10,ChIJv-yiGoMoQg0Rj1LLgnhKk1o,Thyssen-Bornemisza Museum,40.416041,-3.694925,4.6,museum,"{0: {'open': '1000', 'close': '1900'}, 1: {'op..."
12,ChIJvQc62ygmQg0Rcb-6WdEUmDA,Museo Nacional Centro de Arte Reina Sofía,40.407912,-3.694557,4.5,museum,"{0: {'open': '1000', 'close': '1430'}, 1: {'op..."


In [9]:
df_improved = df_improved.reset_index()

In [10]:
df_improved.head()

Unnamed: 0,index,place_id,name,latitud,longitud,rating,types,open_hours
0,0,ChIJD7G2bqduEg0ROdrTdOj1Jok,Plaza de España,37.377196,-5.986893,4.8,establishment,"{0: {'open': '0800', 'close': '2200'}, 1: {'op..."
1,1,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,Alhambra,37.176078,-3.588141,4.8,museum,"{0: {'open': '0830', 'close': '2000'}, 1: {'op..."
2,2,ChIJk_s92NyipBIRUMnDG8Kq2Js,La Sagrada Familia,41.40363,2.174356,4.7,church,"{0: {'open': '0900', 'close': '2000'}, 1: {'op..."
3,3,ChIJYUFLSe2ipBIRD04uni940kA,Casa Batlló,41.391728,2.164949,4.6,establishment,"{0: {'open': '0830', 'close': '2015'}, 1: {'op..."
4,5,ChIJwamkfX4oQg0RUUjO1nnsfy4,Royal Palace of Madrid,40.417955,-3.714312,4.6,establishment,"{0: {'open': '1000', 'close': '1600'}, 1: {'op..."


In [11]:
len(df_improved.index)

42

In [12]:
df_improved

Unnamed: 0,index,place_id,name,latitud,longitud,rating,types,open_hours
0,0,ChIJD7G2bqduEg0ROdrTdOj1Jok,Plaza de España,37.377196,-5.986893,4.8,establishment,"{0: {'open': '0800', 'close': '2200'}, 1: {'op..."
1,1,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,Alhambra,37.176078,-3.588141,4.8,museum,"{0: {'open': '0830', 'close': '2000'}, 1: {'op..."
2,2,ChIJk_s92NyipBIRUMnDG8Kq2Js,La Sagrada Familia,41.40363,2.174356,4.7,church,"{0: {'open': '0900', 'close': '2000'}, 1: {'op..."
3,3,ChIJYUFLSe2ipBIRD04uni940kA,Casa Batlló,41.391728,2.164949,4.6,establishment,"{0: {'open': '0830', 'close': '2015'}, 1: {'op..."
4,5,ChIJwamkfX4oQg0RUUjO1nnsfy4,Royal Palace of Madrid,40.417955,-3.714312,4.6,establishment,"{0: {'open': '1000', 'close': '1600'}, 1: {'op..."
5,7,ChIJ7aLYZp0oQg0RWoitk33wlBA,Museo Nacional del Prado,40.413782,-3.692127,4.7,museum,"{0: {'open': '1000', 'close': '1900'}, 1: {'op..."
6,8,ChIJq0HUUq6ipBIRWM6qGqALmok,Park Güell,41.414495,2.152694,4.4,park,"{0: {'open': '0930', 'close': '1930'}, 1: {'op..."
7,9,ChIJS6JBjBlsEg0Rh_7Brr92qbo,Royal Alcázar of Seville,37.383052,-5.990226,4.7,park,"{0: {'open': '0930', 'close': '1700'}, 1: {'op..."
8,10,ChIJv-yiGoMoQg0Rj1LLgnhKk1o,Thyssen-Bornemisza Museum,40.416041,-3.694925,4.6,museum,"{0: {'open': '1000', 'close': '1900'}, 1: {'op..."
9,12,ChIJvQc62ygmQg0Rcb-6WdEUmDA,Museo Nacional Centro de Arte Reina Sofía,40.407912,-3.694557,4.5,museum,"{0: {'open': '1000', 'close': '1430'}, 1: {'op..."


In [13]:
city_file = place_name+'_places.csv'
df_improved.to_csv(city_file)


# Distancias

In [14]:
df_places = pd.read_csv("{}_places.csv".format(place_name))
df_places = df_places.drop(['Unnamed: 0'], axis=1)
df_places.head()

Unnamed: 0,index,place_id,name,latitud,longitud,rating,types,open_hours
0,0,ChIJD7G2bqduEg0ROdrTdOj1Jok,Plaza de España,37.377196,-5.986893,4.8,establishment,"{0: {'open': '0800', 'close': '2200'}, 1: {'op..."
1,1,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,Alhambra,37.176078,-3.588141,4.8,museum,"{0: {'open': '0830', 'close': '2000'}, 1: {'op..."
2,2,ChIJk_s92NyipBIRUMnDG8Kq2Js,La Sagrada Familia,41.40363,2.174356,4.7,church,"{0: {'open': '0900', 'close': '2000'}, 1: {'op..."
3,3,ChIJYUFLSe2ipBIRD04uni940kA,Casa Batlló,41.391728,2.164949,4.6,establishment,"{0: {'open': '0830', 'close': '2015'}, 1: {'op..."
4,5,ChIJwamkfX4oQg0RUUjO1nnsfy4,Royal Palace of Madrid,40.417955,-3.714312,4.6,establishment,"{0: {'open': '1000', 'close': '1600'}, 1: {'op..."


In [15]:
places = df_places['place_id'].values
n = 10
places_by_25 = [places[i * n:(i + 1) * n] for i in range((len(places) + n - 1) // n )] 

subsets = []
for subset in itertools.combinations_with_replacement(places_by_25, 2):
    subsets.append((subset[0], subset[1]))
    if not set(subset[0]) == set(subset[1]):
        subsets.append((subset[1], subset[0]))

responses = []

for subset in subsets:
    origins = "|".join(['place_id:' + s for s in subset[0]])
    destinations = "|".join(['place_id:' + s for s in subset[1]])
    url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins={}&destinations={}&key={}'.format(
        origins,
        destinations,
        api_key
    )
    # print(url)
    response_text = requests.get(url).text
    # print(response_text)
    response = json.loads(response_text)
    responses.append((response, (origins, destinations)))
    time.sleep(2)
responses

[({'destination_addresses': ['Av. Isabel la Católica, 41004 Sevilla, Spain',
    'C. Real de la Alhambra, s/n, 18009 Granada, Spain',
    'C/ de Mallorca, 401, 08013 Barcelona, Spain',
    'Pg. de Gràcia, 43, 08007 Barcelona, Spain',
    'C. de Bailén, s/n, 28071 Madrid, Spain',
    'C. de Ruiz de Alarcón, 23, 28014 Madrid, Spain',
    'Park Güell, 08024 Barcelona, Spain',
    'Royal Alcázar of Seville, Patio de Banderas, s/n, 41004 Sevilla, Spain',
    'P.º del Prado, 8, 28014 Madrid, Spain',
    'C. de Sta. Isabel, 52, 28012 Madrid, Spain'],
   'origin_addresses': ['Av. Isabel la Católica, 41004 Sevilla, Spain',
    'C. Real de la Alhambra, s/n, 18009 Granada, Spain',
    'C/ de Mallorca, 401, 08013 Barcelona, Spain',
    'Pg. de Gràcia, 43, 08007 Barcelona, Spain',
    'C. de Bailén, s/n, 28071 Madrid, Spain',
    'C. de Ruiz de Alarcón, 23, 28014 Madrid, Spain',
    'Park Güell, 08024 Barcelona, Spain',
    'Royal Alcázar of Seville, Patio de Banderas, s/n, 41004 Sevilla, Spain',
 

In [16]:
df_distance_matrix = np.full((len(df_places), len(df_places)), 0)
df_distance_matrix = pd.DataFrame(df_distance_matrix, index=df_places['place_id'].values, columns=df_places['place_id'].values)
for response_bundle in responses:
    response = response_bundle[0]
    origins  = response_bundle[1][0]
    origins = origins.replace('place_id:', '')
    origins = origins.split('|')
    destinations  = response_bundle[1][1]
    destinations = destinations.replace('place_id:', '')
    destinations = destinations.split('|')
    for origin, row in zip(origins, response['rows']):
        for destination, place in zip(destinations, row['elements']):
            if place['status'] == "OK":
                df_distance_matrix[origin][destination] = place['duration']['value']
            else:
                df_distance_matrix[origin][destination] = None
    

In [17]:
df_distance_matrix.head()

Unnamed: 0,ChIJD7G2bqduEg0ROdrTdOj1Jok,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,ChIJk_s92NyipBIRUMnDG8Kq2Js,ChIJYUFLSe2ipBIRD04uni940kA,ChIJwamkfX4oQg0RUUjO1nnsfy4,ChIJ7aLYZp0oQg0RWoitk33wlBA,ChIJq0HUUq6ipBIRWM6qGqALmok,ChIJS6JBjBlsEg0Rh_7Brr92qbo,ChIJv-yiGoMoQg0Rj1LLgnhKk1o,ChIJvQc62ygmQg0Rcb-6WdEUmDA,...,ChIJySfBCJooQg0Ry8Qhx3Mk3zs,ChIJHQS2xgqIQQ0RgyGlQ3AakYk,ChIJv3ERb-IoQg0Rs2mh59vAhRY,ChIJ35PCcoQoQg0RGhxNE3_2Iak,ChIJZY540HsoQg0Rrq2nqutjg6I,ChIJaQbyBhMoQg0RnrxRkEi3nxQ,ChIJeQ--ZogoQg0RpbSSHRBuj8U,ChIJ1VA7UXcoQg0RI0MvNRtWfiI,ChIJrWpuK2QoQg0RNy5SsBRnZK0,ChIJ4-4wsokoQg0Rg8n_sK3Jiy0
ChIJD7G2bqduEg0ROdrTdOj1Jok,0,10075,35825,35437,18239,18095,35567,561,18064,18132,...,18258,17714,18629,18176,18489,18165,18563,18147,18336,18534
ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,10017,0,31597,31209,15250,15068,31339,10228,15037,15114,...,15147,15203,15399,15149,15615,15242,15610,15158,15461,15508
ChIJk_s92NyipBIRUMnDG8Kq2Js,35837,31507,0,553,22508,22023,466,36022,22044,22274,...,21944,22528,21978,22015,22482,22468,22382,22416,22470,22259
ChIJYUFLSe2ipBIRD04uni940kA,35557,31227,534,0,22502,22017,823,35742,22038,22268,...,21938,22522,21971,22009,22476,22462,22376,22410,22464,22253
ChIJwamkfX4oQg0RUUjO1nnsfy4,18373,15104,22863,22661,0,633,22563,18115,597,626,...,791,805,1264,709,481,514,796,210,635,844


In [18]:
matrix_distance_file = place_name+'_distance_matrix.csv'
df_distance_matrix.to_csv(matrix_distance_file)

# Aeropuerto

In [19]:
df_distance_matrix = pd.read_csv("{}_distance_matrix.csv".format(place_name), index_col=0)
df_distance_matrix.head()

Unnamed: 0,ChIJD7G2bqduEg0ROdrTdOj1Jok,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,ChIJk_s92NyipBIRUMnDG8Kq2Js,ChIJYUFLSe2ipBIRD04uni940kA,ChIJwamkfX4oQg0RUUjO1nnsfy4,ChIJ7aLYZp0oQg0RWoitk33wlBA,ChIJq0HUUq6ipBIRWM6qGqALmok,ChIJS6JBjBlsEg0Rh_7Brr92qbo,ChIJv-yiGoMoQg0Rj1LLgnhKk1o,ChIJvQc62ygmQg0Rcb-6WdEUmDA,...,ChIJySfBCJooQg0Ry8Qhx3Mk3zs,ChIJHQS2xgqIQQ0RgyGlQ3AakYk,ChIJv3ERb-IoQg0Rs2mh59vAhRY,ChIJ35PCcoQoQg0RGhxNE3_2Iak,ChIJZY540HsoQg0Rrq2nqutjg6I,ChIJaQbyBhMoQg0RnrxRkEi3nxQ,ChIJeQ--ZogoQg0RpbSSHRBuj8U,ChIJ1VA7UXcoQg0RI0MvNRtWfiI,ChIJrWpuK2QoQg0RNy5SsBRnZK0,ChIJ4-4wsokoQg0Rg8n_sK3Jiy0
ChIJD7G2bqduEg0ROdrTdOj1Jok,0,10075,35825,35437,18239,18095,35567,561,18064,18132,...,18258,17714,18629,18176,18489,18165,18563,18147,18336,18534
ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,10017,0,31597,31209,15250,15068,31339,10228,15037,15114,...,15147,15203,15399,15149,15615,15242,15610,15158,15461,15508
ChIJk_s92NyipBIRUMnDG8Kq2Js,35837,31507,0,553,22508,22023,466,36022,22044,22274,...,21944,22528,21978,22015,22482,22468,22382,22416,22470,22259
ChIJYUFLSe2ipBIRD04uni940kA,35557,31227,534,0,22502,22017,823,35742,22038,22268,...,21938,22522,21971,22009,22476,22462,22376,22410,22464,22253
ChIJwamkfX4oQg0RUUjO1nnsfy4,18373,15104,22863,22661,0,633,22563,18115,597,626,...,791,805,1264,709,481,514,796,210,635,844


In [20]:
df_places = pd.read_csv("{}_places.csv".format(place_name), index_col=0)
df_places.head()

Unnamed: 0,index,place_id,name,latitud,longitud,rating,types,open_hours
0,0,ChIJD7G2bqduEg0ROdrTdOj1Jok,Plaza de España,37.377196,-5.986893,4.8,establishment,"{0: {'open': '0800', 'close': '2200'}, 1: {'op..."
1,1,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,Alhambra,37.176078,-3.588141,4.8,museum,"{0: {'open': '0830', 'close': '2000'}, 1: {'op..."
2,2,ChIJk_s92NyipBIRUMnDG8Kq2Js,La Sagrada Familia,41.40363,2.174356,4.7,church,"{0: {'open': '0900', 'close': '2000'}, 1: {'op..."
3,3,ChIJYUFLSe2ipBIRD04uni940kA,Casa Batlló,41.391728,2.164949,4.6,establishment,"{0: {'open': '0830', 'close': '2015'}, 1: {'op..."
4,5,ChIJwamkfX4oQg0RUUjO1nnsfy4,Royal Palace of Madrid,40.417955,-3.714312,4.6,establishment,"{0: {'open': '1000', 'close': '1600'}, 1: {'op..."


In [21]:
url = 'https://maps.googleapis.com/maps/api/place/textsearch/json?query=airport+{}&key={}'.format(
    place_name,
    api_key,
)

labels = ['place_id','name','latitud','longitud','rating','types', 'city']
data = []

while True:
    response_text = requests.get(url).text
    # print(response_text)
    response = json.loads(response_text)
    places = response['results']
    print(len(places), 'places')
    for place in places:
        name = place['name']
        place_id = place ['place_id']
        latitud = place['geometry']['location']['lat']
        longitud = place['geometry']['location']['lng']
        city = place['formatted_address'].split(',')
        city =''.join([i for i in city[-2] if not i.isdigit()])
        city = city.strip()
        rating = None
        if 'rating' in place:
            rating = place['rating']
        types = str(place['types'])
        # vicinity = place['vicinity']
        data.append([place_id, name, latitud, longitud, rating, types, city])
    time.sleep(2)
    if 'next_page_token' not in response:
        break
    else:
        next_page_token = response['next_page_token']
        url = 'https://maps.googleapis.com/maps/api/place/textsearch/json?key='+str(api_key)+'&pagetoken='+str(next_page_token)
df_airports = pd.DataFrame.from_records(data, columns=labels)
print(len(df_airports.index))


20 places
7 places
27


In [22]:
df_airports.head()

Unnamed: 0,place_id,name,latitud,longitud,rating,types,city
0,ChIJAQAAANAxQg0R786FD-old24,Adolfo Suárez Madrid–Barajas Airport,40.498332,-3.567598,4.0,"['airport', 'point_of_interest', 'establishment']",Madrid
1,ChIJpY58hGSepBIR15tv-0LpK_M,Josep Tarradellas Barcelona-El Prat Airport,41.297445,2.083294,3.9,"['airport', 'point_of_interest', 'establishment']",Barcelona
2,ChIJpTC6dhj6cg0Rmj7eRy-ttVc,Málaga-Costa del Sol Airport,36.677128,-4.491568,3.9,"['airport', 'point_of_interest', 'establishment']",Málaga
3,ChIJM9oQTcZKYg0R8b65Jd5Fstw,Alicante Airport,38.285093,-0.562498,4.3,"['airport', 'point_of_interest', 'establishment']",Alicante
4,ChIJ8zXb9S-UlxIRVvm3aWXaMXA,Palma de Mallorca Airport,39.551741,2.736165,4.2,"['airport', 'point_of_interest', 'establishment']",Balearic Islands


In [23]:
airports_file = place_name+'_airports.csv'
df_airports.to_csv(airports_file)


In [24]:
df_airports = pd.read_csv("{}_airports.csv".format(place_name), index_col=0)
df_airports

Unnamed: 0,place_id,name,latitud,longitud,rating,types,city
0,ChIJAQAAANAxQg0R786FD-old24,Adolfo Suárez Madrid–Barajas Airport,40.498332,-3.567598,4.0,"['airport', 'point_of_interest', 'establishment']",Madrid
1,ChIJpY58hGSepBIR15tv-0LpK_M,Josep Tarradellas Barcelona-El Prat Airport,41.297445,2.083294,3.9,"['airport', 'point_of_interest', 'establishment']",Barcelona
2,ChIJpTC6dhj6cg0Rmj7eRy-ttVc,Málaga-Costa del Sol Airport,36.677128,-4.491568,3.9,"['airport', 'point_of_interest', 'establishment']",Málaga
3,ChIJM9oQTcZKYg0R8b65Jd5Fstw,Alicante Airport,38.285093,-0.562498,4.3,"['airport', 'point_of_interest', 'establishment']",Alicante
4,ChIJ8zXb9S-UlxIRVvm3aWXaMXA,Palma de Mallorca Airport,39.551741,2.736165,4.2,"['airport', 'point_of_interest', 'establishment']",Balearic Islands
5,ChIJHS1mw2aiQAwR1IWfFX7PS4I,Gran Canaria Airport,27.928922,-15.387404,4.2,"['airport', 'point_of_interest', 'establishment']",Las Palmas
6,ChIJZ1rMYJ9aYA0RggGxqv-3wkw,Valencia Airport,39.489233,-0.478026,4.0,"['airport', 'point_of_interest', 'establishment']",Valencia
7,ChIJtQc1pJmfagwRATBZlraFkIo,Tenerife South Airport,28.046757,-16.57253,4.0,"['airport', 'point_of_interest', 'establishment']",Santa Cruz de Tenerife
8,ChIJVZouaa4Bcg0Rc1VJsULO08s,Federico García Lorca Granada Airport,37.187313,-3.778043,4.0,"['airport', 'point_of_interest', 'establishment']",Granada
9,ChIJr7HMJfVFmRIRC1WADVXEsiY,Ibiza Airport,38.874859,1.371223,4.1,"['airport', 'point_of_interest', 'establishment']",Balearic Islands


In [25]:
airport = df_airports.iloc[0]
airport['place_id']

'ChIJAQAAANAxQg0R786FD-old24'

In [26]:
df_distance_matrix[airport['place_id']] = df_distance_matrix.apply(lambda _: None, axis=1)
df_distance_matrix.head()

Unnamed: 0,ChIJD7G2bqduEg0ROdrTdOj1Jok,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,ChIJk_s92NyipBIRUMnDG8Kq2Js,ChIJYUFLSe2ipBIRD04uni940kA,ChIJwamkfX4oQg0RUUjO1nnsfy4,ChIJ7aLYZp0oQg0RWoitk33wlBA,ChIJq0HUUq6ipBIRWM6qGqALmok,ChIJS6JBjBlsEg0Rh_7Brr92qbo,ChIJv-yiGoMoQg0Rj1LLgnhKk1o,ChIJvQc62ygmQg0Rcb-6WdEUmDA,...,ChIJHQS2xgqIQQ0RgyGlQ3AakYk,ChIJv3ERb-IoQg0Rs2mh59vAhRY,ChIJ35PCcoQoQg0RGhxNE3_2Iak,ChIJZY540HsoQg0Rrq2nqutjg6I,ChIJaQbyBhMoQg0RnrxRkEi3nxQ,ChIJeQ--ZogoQg0RpbSSHRBuj8U,ChIJ1VA7UXcoQg0RI0MvNRtWfiI,ChIJrWpuK2QoQg0RNy5SsBRnZK0,ChIJ4-4wsokoQg0Rg8n_sK3Jiy0,ChIJAQAAANAxQg0R786FD-old24
ChIJD7G2bqduEg0ROdrTdOj1Jok,0,10075,35825,35437,18239,18095,35567,561,18064,18132,...,17714,18629,18176,18489,18165,18563,18147,18336,18534,
ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,10017,0,31597,31209,15250,15068,31339,10228,15037,15114,...,15203,15399,15149,15615,15242,15610,15158,15461,15508,
ChIJk_s92NyipBIRUMnDG8Kq2Js,35837,31507,0,553,22508,22023,466,36022,22044,22274,...,22528,21978,22015,22482,22468,22382,22416,22470,22259,
ChIJYUFLSe2ipBIRD04uni940kA,35557,31227,534,0,22502,22017,823,35742,22038,22268,...,22522,21971,22009,22476,22462,22376,22410,22464,22253,
ChIJwamkfX4oQg0RUUjO1nnsfy4,18373,15104,22863,22661,0,633,22563,18115,597,626,...,805,1264,709,481,514,796,210,635,844,


In [27]:
df_distance_matrix = pd.read_csv("{}_distance_matrix.csv".format(place_name), index_col=0)
df_distance_matrix.head()
df_airport_origin_matrix = pd.DataFrame(df_distance_matrix, columns=df_distance_matrix.columns.values, index=[airport['place_id']])
df_airport_origin_matrix

Unnamed: 0,ChIJD7G2bqduEg0ROdrTdOj1Jok,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,ChIJk_s92NyipBIRUMnDG8Kq2Js,ChIJYUFLSe2ipBIRD04uni940kA,ChIJwamkfX4oQg0RUUjO1nnsfy4,ChIJ7aLYZp0oQg0RWoitk33wlBA,ChIJq0HUUq6ipBIRWM6qGqALmok,ChIJS6JBjBlsEg0Rh_7Brr92qbo,ChIJv-yiGoMoQg0Rj1LLgnhKk1o,ChIJvQc62ygmQg0Rcb-6WdEUmDA,...,ChIJySfBCJooQg0Ry8Qhx3Mk3zs,ChIJHQS2xgqIQQ0RgyGlQ3AakYk,ChIJv3ERb-IoQg0Rs2mh59vAhRY,ChIJ35PCcoQoQg0RGhxNE3_2Iak,ChIJZY540HsoQg0Rrq2nqutjg6I,ChIJaQbyBhMoQg0RnrxRkEi3nxQ,ChIJeQ--ZogoQg0RpbSSHRBuj8U,ChIJ1VA7UXcoQg0RI0MvNRtWfiI,ChIJrWpuK2QoQg0RNy5SsBRnZK0,ChIJ4-4wsokoQg0Rg8n_sK3Jiy0
ChIJAQAAANAxQg0R786FD-old24,,,,,,,,,,,...,,,,,,,,,,


In [28]:
places = df_places['place_id'].values
n = 10
places_subsets = [places[i * n:(i + 1) * n] for i in range((len(places) + n - 1) // n )] 


for subset in places_subsets:
    airport_id = 'place_id:{}'.format(airport['place_id'])
    destinations = "|".join(['place_id:' + s for s in subset])
    url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins={}&destinations={}&key={}'.format(
        airport,
        destinations,
        api_key
    )
    response_text = requests.get(url).text
    response = json.loads(response_text)
    
    destinations_ids = destinations.replace('place_id:', '')
    destinations_ids = destinations_ids.split('|')
    for destination, place in zip(destinations_ids, response['rows'][0]['elements']):
        if place['status'] == "OK":
            df_airport_origin_matrix.loc[[airport['place_id']], destination] = place['duration']['value']

            # df_airport_origin_matrix[destination][airport['place_id']] = place['duration']['value']
        else:
            df_airport_origin_matrix.loc[[airport['place_id']], destination] = None
            # df_airport_origin_matrix[destination][airport['place_id']] = None
    time.sleep(2)

    origins = destinations
    url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins={}&destinations={}&key={}'.format(
        origins,
        airport,
        api_key
    )
    response_text = requests.get(url).text
    response = json.loads(response_text)
    
    origins_ids = origins.replace('place_id:', '')
    origins_ids = origins_ids.split('|')

    for origin, place in zip(origins_ids, response['rows']):
        place_info = place['elements'][0]
        if place_info['status'] == "OK":
            df_distance_matrix.loc[origin, airport['place_id']] = place_info['duration']['value']
        else:
            df_distance_matrix.loc[origin, airport['place_id']] = None
    time.sleep(2)
df_distance_matrix = df_distance_matrix.append(df_airport_origin_matrix)

In [29]:
df_distance_matrix.loc[airport['place_id'], airport['place_id']] = 0

In [30]:
df_distance_matrix

Unnamed: 0,ChIJD7G2bqduEg0ROdrTdOj1Jok,ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,ChIJk_s92NyipBIRUMnDG8Kq2Js,ChIJYUFLSe2ipBIRD04uni940kA,ChIJwamkfX4oQg0RUUjO1nnsfy4,ChIJ7aLYZp0oQg0RWoitk33wlBA,ChIJq0HUUq6ipBIRWM6qGqALmok,ChIJS6JBjBlsEg0Rh_7Brr92qbo,ChIJv-yiGoMoQg0Rj1LLgnhKk1o,ChIJvQc62ygmQg0Rcb-6WdEUmDA,...,ChIJHQS2xgqIQQ0RgyGlQ3AakYk,ChIJv3ERb-IoQg0Rs2mh59vAhRY,ChIJ35PCcoQoQg0RGhxNE3_2Iak,ChIJZY540HsoQg0Rrq2nqutjg6I,ChIJaQbyBhMoQg0RnrxRkEi3nxQ,ChIJeQ--ZogoQg0RpbSSHRBuj8U,ChIJ1VA7UXcoQg0RI0MvNRtWfiI,ChIJrWpuK2QoQg0RNy5SsBRnZK0,ChIJ4-4wsokoQg0Rg8n_sK3Jiy0,ChIJAQAAANAxQg0R786FD-old24
ChIJD7G2bqduEg0ROdrTdOj1Jok,0.0,10075.0,35825.0,35437.0,18239.0,18095.0,35567.0,561.0,18064.0,18132.0,...,17714.0,18629.0,18176.0,18489.0,18165.0,18563.0,18147.0,18336.0,18534.0,18501.0
ChIJO7l_l7f8cQ0Rf6IhEu_RjYA,10017.0,0.0,31597.0,31209.0,15250.0,15068.0,31339.0,10228.0,15037.0,15114.0,...,15203.0,15399.0,15149.0,15615.0,15242.0,15610.0,15158.0,15461.0,15508.0,14998.0
ChIJk_s92NyipBIRUMnDG8Kq2Js,35837.0,31507.0,0.0,553.0,22508.0,22023.0,466.0,36022.0,22044.0,22274.0,...,22528.0,21978.0,22015.0,22482.0,22468.0,22382.0,22416.0,22470.0,22259.0,21738.0
ChIJYUFLSe2ipBIRD04uni940kA,35557.0,31227.0,534.0,0.0,22502.0,22017.0,823.0,35742.0,22038.0,22268.0,...,22522.0,21971.0,22009.0,22476.0,22462.0,22376.0,22410.0,22464.0,22253.0,21536.0
ChIJwamkfX4oQg0RUUjO1nnsfy4,18373.0,15104.0,22863.0,22661.0,0.0,633.0,22563.0,18115.0,597.0,626.0,...,805.0,1264.0,709.0,481.0,514.0,796.0,210.0,635.0,844.0,1279.0
ChIJ7aLYZp0oQg0RWoitk33wlBA,18409.0,15053.0,22596.0,22394.0,827.0,0.0,22296.0,18152.0,81.0,404.0,...,1000.0,827.0,193.0,659.0,1013.0,654.0,815.0,792.0,552.0,807.0
ChIJq0HUUq6ipBIRWM6qGqALmok,35644.0,31313.0,658.0,1006.0,22314.0,21830.0,0.0,35828.0,21850.0,22080.0,...,22334.0,21784.0,21822.0,22288.0,22274.0,22189.0,22223.0,22276.0,22066.0,21437.0
ChIJS6JBjBlsEg0Rh_7Brr92qbo,568.0,10101.0,35916.0,35528.0,18406.0,18262.0,35658.0,0.0,18231.0,18299.0,...,17881.0,18796.0,18343.0,18656.0,18332.0,18730.0,18314.0,18503.0,18701.0,18243.0
ChIJv-yiGoMoQg0Rj1LLgnhKk1o,18699.0,15319.0,22614.0,22412.0,869.0,186.0,22314.0,18441.0,0.0,569.0,...,1132.0,745.0,111.0,578.0,932.0,573.0,734.0,710.0,470.0,828.0
ChIJvQc62ygmQg0Rcb-6WdEUmDA,18457.0,15101.0,22622.0,22420.0,740.0,211.0,22321.0,18199.0,180.0,0.0,...,1048.0,925.0,291.0,758.0,1009.0,753.0,710.0,890.0,650.0,1032.0


In [31]:
matrix_distance_file = place_name+'_distance_matrix.csv'
df_distance_matrix.to_csv(matrix_distance_file)