In [35]:
!pip install python-dotenv



In [36]:
import dotenv
import os
import json
import requests
from pprint import pprint

dotenv.load_dotenv()

True

In [41]:
# ['*'] for all the details or from ['name', 'id', 'types', 'nationalPhoneNumber', 'internationalPhoneNumber', 'formattedAddress', 'addressComponents', 'plusCode', 'location', 'viewport', 'rating', 'googleMapsUri', 'websiteUri', 'regularOpeningHours', 'utcOffsetMinutes', 'adrFormatAddress', 'businessStatus', 'priceLevel', 'userRatingCount', 'iconMaskBaseUri', 'iconBackgroundColor', 'displayName', 'primaryTypeDisplayName', 'takeout', 'delivery', 'dineIn', 'curbsidePickup', 'servesBreakfast', 'servesLunch', 'servesDinner', 'servesBeer', 'servesWine', 'servesVegetarianFood', 'currentOpeningHours', 'primaryType', 'shortFormattedAddress', 'editorialSummary', 'reviews', 'photos', 'outdoorSeating', 'menuForChildren', 'servesDessert', 'servesCoffee', 'goodForChildren', 'restroom', 'goodForGroups', 'paymentOptions', 'parkingOptions', 'accessibilityOptions']
PLACE_DETAILS_FIELDS = [
    '*',
    # 'name', 'id', 'types',
    # 'formattedAddress','addressComponents',
    # 'location', 'rating', 
]
DATA_PATH = 'data/'

if not os.path.exists(os.path.join(DATA_PATH,'places')):
    os.makedirs(os.path.join(DATA_PATH,'places'))

In [13]:
def fetch_places(data, fields: list = ['places.id']):
    headers = {
            "Content-Type": "application/json",
            "X-Goog-Api-Key": os.getenv("GMAPS_API_KEY"),
            "X-Goog-FieldMask": ",".join(fields)
    }
    res = requests.post("https://places.googleapis.com/v1/places:searchNearby", json=data, headers=headers)
    if res.status_code == 200:
        return res.json()
    else:
        raise Exception(f"Error while fetching places with status {res.status_code}: {res.text}")

In [27]:
def fetch_place_details(place_id, fields: list = PLACE_DETAILS_FIELDS):
    headers = {
            "Content-Type": "application/json",
            "X-Goog-Api-Key": os.getenv("GMAPS_API_KEY"),
            "X-Goog-FieldMask": ",".join(fields)
    }
    res = requests.get("https://places.googleapis.com/v1/places/"+place_id, headers=headers)
    if res.status_code == 200:
        return res.json()
    else:
        raise Exception(f"Error while fetching place details with status {res.status_code}: {res.text}")

In [42]:
def save_place(place, prefix=''):
    path = os.path.join(DATA_PATH, 'places', prefix+place['id']+'.json')
    with open(path, 'w') as f:
        json.dump(place, f)
    return path

In [52]:
def get_places(*place_params, detail_fields: list = PLACE_DETAILS_FIELDS, save_prefix='', ignore_if_exists=True):
    places_ids = []
    for pparams in place_params:
        places = fetch_places(pparams)
        places_ids.extend(map(lambda p:p['id'], places['places']))
        
    print(places_ids)
    places_ids = list(set(places_ids))
    
    for place_id in places_ids:
        print(os.path.exists(os.path.join(DATA_PATH, 'places', save_prefix+place_id+'.json')))
        if ignore_if_exists and os.path.exists(os.path.join(DATA_PATH, 'places', save_prefix+place_id+'.json')):
            yield (place_id, None, None)
            continue

        try:
            place = fetch_place_details(place_id, detail_fields)
        except Exception as e:
            yield (place_id, e, None)
            continue

        try:
            save_path = save_place(place, save_prefix)
            yield (place_id, place, save_path)
        except Exception as e:
            yield (place_id, place, e)


In [54]:
padova = {
        "latitude": 45.416668,
        "longitude": 11.866667
        }

place_params1 = {
  "includedTypes": ["restaurant"],
  "locationRestriction": {
    "circle": {
      "center": padova,
      "radius": 1000
    }
  }
}
place_params2 = {
  "includedTypes": ["restaurant"],
  "locationRestriction": {
    "circle": {
      "center": padova,
      "radius": 9000
    }
  }
}

places = get_places(place_params1, place_params2, save_prefix='restaurant-', ignore_if_exists=True)
for p in places:
    print(p)

['ChIJndkoTl3afkcRDto-rckMt7o', 'ChIJn0ipN6fbfkcRANpZ-fvlgl0', 'ChIJ2cAMchzbfkcRQ1IHPKq7ZbQ', 'ChIJBVlgsTnafkcRJeoTzoYd0XE', 'ChIJlyxYRbDbfkcRORBCU2FGMDo', 'ChIJv5RfikXafkcRGeLQgpoFDdI', 'ChIJez3mtJLbfkcRkFmBI3AbBgo', 'ChIJ-y2_lD7afkcRfuWr-pW2IEU', 'ChIJX1jA6FvbfkcRk6FcFeIO7ZI', 'ChIJz4aHi_jbfkcRoPrHfub6BLU', 'ChIJX1F9NETafkcRak9FtEBDeG8', 'ChIJbRsCFkXafkcRFV0UzbORs2A', 'ChIJgahnhkTafkcRjOS0yqc05k0', 'ChIJ08t8WjjafkcRIaydrYfr9Yc', 'ChIJMTPrr3jbfkcRdXuAZ2qdAx8', 'ChIJuU6dlkXafkcRLJtARyhyfYI', 'ChIJpdXyXmvafkcR-xt27nsrGVk', 'ChIJC628jEXafkcRb38-THnIM0E', 'ChIJ9-ODzDjbfkcRxzIVcHHfFF4', 'ChIJyeqWM0TafkcR86G_L77oiAI', 'ChIJ2x4rCOzafkcRLZ0HUZrw_SE', 'ChIJE49DguzQfkcREnGf6-fZF_A', 'ChIJrzxAcjTZfkcR-_oZ1mU6F9I', 'ChIJ81IpJZTZfkcR-hM8h-BN7aw', 'ChIJAUJTpNHafkcRAsAh6Mm3n28', 'ChIJLa3j_rDbfkcRApbTngSgAJM', 'ChIJ-7hJkUrYfkcRNckmhLp6J4w', 'ChIJtbw9Ow_afkcRVPmYPV7w85Y', 'ChIJndkoTl3afkcRDto-rckMt7o', 'ChIJIXkQY5_rgUcRXE1-C0FCeR0', 'ChIJWdpV5_7RfkcRI93ap-aXK90', 'ChIJkUI1If7afkcR0E2W3YadCTU', 'ChIJfz

In [39]:
places

{'places': [{'id': 'ChIJndkoTl3afkcRDto-rckMt7o'},
  {'id': 'ChIJn0ipN6fbfkcRANpZ-fvlgl0'},
  {'id': 'ChIJ2cAMchzbfkcRQ1IHPKq7ZbQ'},
  {'id': 'ChIJBVlgsTnafkcRJeoTzoYd0XE'},
  {'id': 'ChIJlyxYRbDbfkcRORBCU2FGMDo'},
  {'id': 'ChIJv5RfikXafkcRGeLQgpoFDdI'},
  {'id': 'ChIJez3mtJLbfkcRkFmBI3AbBgo'},
  {'id': 'ChIJ-y2_lD7afkcRfuWr-pW2IEU'},
  {'id': 'ChIJX1jA6FvbfkcRk6FcFeIO7ZI'},
  {'id': 'ChIJz4aHi_jbfkcRoPrHfub6BLU'},
  {'id': 'ChIJX1F9NETafkcRak9FtEBDeG8'},
  {'id': 'ChIJbRsCFkXafkcRFV0UzbORs2A'},
  {'id': 'ChIJgahnhkTafkcRjOS0yqc05k0'},
  {'id': 'ChIJ08t8WjjafkcRIaydrYfr9Yc'},
  {'id': 'ChIJMTPrr3jbfkcRdXuAZ2qdAx8'},
  {'id': 'ChIJuU6dlkXafkcRLJtARyhyfYI'},
  {'id': 'ChIJpdXyXmvafkcR-xt27nsrGVk'},
  {'id': 'ChIJC628jEXafkcRb38-THnIM0E'},
  {'id': 'ChIJ9-ODzDjbfkcRxzIVcHHfFF4'},
  {'id': 'ChIJyeqWM0TafkcR86G_L77oiAI'}]}

In [25]:
print(place.keys())
place
['name', 'id', 'types', 'formattedAddress', 'addressComponents', 'location', 'rating', ]

dict_keys(['name', 'id', 'types', 'nationalPhoneNumber', 'internationalPhoneNumber', 'formattedAddress', 'addressComponents', 'plusCode', 'location', 'viewport', 'rating', 'googleMapsUri', 'websiteUri', 'regularOpeningHours', 'utcOffsetMinutes', 'adrFormatAddress', 'businessStatus', 'priceLevel', 'userRatingCount', 'iconMaskBaseUri', 'iconBackgroundColor', 'displayName', 'primaryTypeDisplayName', 'takeout', 'delivery', 'dineIn', 'curbsidePickup', 'servesBreakfast', 'servesLunch', 'servesDinner', 'servesBeer', 'servesWine', 'servesVegetarianFood', 'currentOpeningHours', 'primaryType', 'shortFormattedAddress', 'editorialSummary', 'reviews', 'photos', 'outdoorSeating', 'menuForChildren', 'servesDessert', 'servesCoffee', 'goodForChildren', 'restroom', 'goodForGroups', 'paymentOptions', 'parkingOptions', 'accessibilityOptions'])


{'name': 'places/ChIJndkoTl3afkcRDto-rckMt7o',
 'id': 'ChIJndkoTl3afkcRDto-rckMt7o',
 'types': ['fast_food_restaurant',
  'hamburger_restaurant',
  'american_restaurant',
  'restaurant',
  'food',
  'point_of_interest',
  'establishment'],
 'nationalPhoneNumber': '049 654930',
 'internationalPhoneNumber': '+39 049 654930',
 'formattedAddress': 'Piazzale della Stazione, 5, 35131 Padova PD, Italy',
 'addressComponents': [{'longText': '5',
   'shortText': '5',
   'types': ['street_number'],
   'languageCode': 'en-US'},
  {'longText': 'Piazzale della Stazione',
   'shortText': 'Piazzale della Stazione',
   'types': ['route'],
   'languageCode': 'it'},
  {'longText': 'Padova',
   'shortText': 'Padova',
   'types': ['locality', 'political'],
   'languageCode': 'it'},
  {'longText': 'Padova',
   'shortText': 'Padova',
   'types': ['administrative_area_level_3', 'political'],
   'languageCode': 'it'},
  {'longText': 'Provincia di Padova',
   'shortText': 'PD',
   'types': ['administrative_area

In [5]:
def save_raw_results(results, idx: int):
    name_segs = {k:v for k,v in sorted(SEARCH_PARAMS.items(), key=lambda item: item[0], reverse=True)}
    
    name = '_'.join(map(str,name_segs.values())) + "-" + str(idx) +'.json'
    with open(os.path.join(RAW_DATASETS_PATH, name), 'w') as f:
        json.dump(results, f)


## Fetch RAW results

In [41]:
# Get the latitude and longitude of Padova
padova = gmaps.geocode("Padova")[0]["geometry"]["location"]

# Get all the places in Padova
results_0 = gmaps.places_nearby(location=padova, **SEARCH_PARAMS)

# save_raw_results(results_0, 0)

{'html_attributions': [], 'next_page_token': 'AUGGfZnuFHK1aHnCruWvYOieLdgXJQIeT4hZfTn0Vqi3gCUcmsmK9O8N0iUHDsI6WKKWvSvC3wuXotIN6hkuIYTuJu4g_8a1VgAywsT5fGojYvCz-T3kGEIHcqj4X16lxSHf-hVBDYPLU-hVzKRYEfIb7-jNrxtoAd6GF1psyq3YiiR6iW1UNzmjUGtZZAipZeQaTdppt6oMq-YsexmlTL5j8I9EARx0s2h6nuvzQzNPFFB4BzLxYfK9CUs0kjsrp8DMOKKKuGuz4PJhfNfz5QuMQaotk9BTQl8JAVNDHN2JrVy32Gz7hk6vob36MB9cIV1zH1ukLhZgIaZrnA3uQwc2o_BzZaJTbGJba1zAvgZmMA5WVQdatqeemikTwJUv95S3JwYNPSpPehLGCSXpEIeLAFRNTZjzMKDHJLzHsaeZZIcBmA9CRfuq3AEW', 'results': [{'business_status': 'OPERATIONAL', 'geometry': {'location': {'lat': 45.4025612, 'lng': 11.8798475}, 'viewport': {'northeast': {'lat': 45.4038911802915, 'lng': 11.8810891302915}, 'southwest': {'lat': 45.4011932197085, 'lng': 11.8783911697085}}}, 'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/restaurant-71.png', 'icon_background_color': '#FF9E67', 'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/restaurant_pinlet', 'name': 'Hotel Al Santo', 'op

In [42]:
pprint(results_0)

{'html_attributions': [],
 'next_page_token': 'AUGGfZnuFHK1aHnCruWvYOieLdgXJQIeT4hZfTn0Vqi3gCUcmsmK9O8N0iUHDsI6WKKWvSvC3wuXotIN6hkuIYTuJu4g_8a1VgAywsT5fGojYvCz-T3kGEIHcqj4X16lxSHf-hVBDYPLU-hVzKRYEfIb7-jNrxtoAd6GF1psyq3YiiR6iW1UNzmjUGtZZAipZeQaTdppt6oMq-YsexmlTL5j8I9EARx0s2h6nuvzQzNPFFB4BzLxYfK9CUs0kjsrp8DMOKKKuGuz4PJhfNfz5QuMQaotk9BTQl8JAVNDHN2JrVy32Gz7hk6vob36MB9cIV1zH1ukLhZgIaZrnA3uQwc2o_BzZaJTbGJba1zAvgZmMA5WVQdatqeemikTwJUv95S3JwYNPSpPehLGCSXpEIeLAFRNTZjzMKDHJLzHsaeZZIcBmA9CRfuq3AEW',
 'results': [{'business_status': 'OPERATIONAL',
              'geometry': {'location': {'lat': 45.4025612, 'lng': 11.8798475},
                           'viewport': {'northeast': {'lat': 45.4038911802915,
                                                      'lng': 11.8810891302915},
                                        'southwest': {'lat': 45.4011932197085,
                                                      'lng': 11.8783911697085}}},
              'icon': 'https://maps.gstatic.com/mapfiles/pl

In [6]:
idx = 1
while results_0.get("next_page_token", None) and idx < MAX_RESULT_PAGES:
    page_token = results_0["next_page_token"]
    results = gmaps.places(page_token=page_token)
    results['page_token'] = page_token

    save_raw_results(results, idx)

    idx += 1


NameError: name 'results_0' is not defined

## Read RAW results

In [7]:
def get_all_results():
    for p in os.scandir(RAW_DATASETS_PATH):
        if p.name.startswith('review'): continue

        with open(os.path.join(RAW_DATASETS_PATH, p.name)) as f:
            try:
                yield from json.load(f)['results']
            except Exception as e:
                pass


In [10]:
all_results = list(get_all_results())
print("Num of all results:", len(all_results))

Num of all results: 1000


In [27]:
def is_reviews_exist(reviews):
    name = 'reviews-' + reviews['result']['place_id'] + '.json'
    return os.path.exists(os.path.join(RAW_DATASETS_PATH, 'reviews', name))

In [28]:
def save_raw_reviews(reviews):
    name = 'reviews-' + reviews['result']['place_id'] + '.json'
    with open(os.path.join(RAW_DATASETS_PATH, 'reviews', name), 'w') as f:
        json.dump(reviews, f)

## Fetch RAW reviews

In [31]:
all_reviews = []
for res in all_results:
    # print("res:",res)
    # print("res['place_id']:", res['place_id'])
    # restaurant = gmaps.place(res['place_id'])
    # print("restaurant:", restaurant)
    # print("reviews['result']['place_id']:", restaurant['result']['place_id'])
    # print('is_reviews_exist(restaurant):', is_reviews_exist(restaurant))
    # break
    print('fetching place id:', res['place_id'], end='')
    restaurant = gmaps.place(res['place_id'])
    all_reviews.append(restaurant)
    
    print(' ... done => ', end='')
    if is_reviews_exist(restaurant): 
        print('ALREADY EXISTS')
        continue
    save_raw_reviews(restaurant)
    print('SAVED')


fetching place id: ChIJPTRpx3jbfkcRx5Gv5a-P484 ... done => SAVED
fetching place id: ChIJeeU78IjZfkcReoht8Oi1dQs ... done => SAVED
fetching place id: ChIJn0ipN6fbfkcRANpZ-fvlgl0 ... done => SAVED
fetching place id: ChIJpThkkazbfkcR_ZAfff-TNO4 ... done => SAVED
fetching place id: ChIJsyFieFfafkcRN_O0j_MdLPk ... done => SAVED
fetching place id: ChIJpW5wYu_QfkcRn9XltKPMj1w ... done => SAVED
fetching place id: ChIJLUDgA1DafkcR-dtY0yPNtyA ... done => SAVED
fetching place id: ChIJ42w6oiTFfkcR29FnNZ0Qu3w ... done => SAVED
fetching place id: ChIJL7_PE1fafkcRgSh4yy4s7hY ... done => SAVED
fetching place id: ChIJM40mU7LQfkcRPGT8E6UbtrI ... done => SAVED
fetching place id: ChIJAQtKXRbXfkcR7Rqeuk3FXhc ... done => SAVED
fetching place id: ChIJMStIPpTefkcR6vzlMHX-Kuc ... done => SAVED
fetching place id: ChIJP39wIffafkcR_HAseeSiBZ4 ... done => SAVED
fetching place id: ChIJVRdlLHfbfkcR0Itrg7bUo4k ... done => SAVED
fetching place id: ChIJ9c0txpHDfkcRP-WyUmxf8SQ ... done => SAVED
fetching place id: ChIJs1

KeyboardInterrupt: 

In [40]:
len(set(map(lambda rev:rev['result']['place_id'],all_reviews)))

20

In [23]:
pprint(restaurant)

{'html_attributions': [],
 'result': {'address_components': [{'long_name': '741',
                                    'short_name': '741',
                                    'types': ['street_number']},
                                   {'long_name': 'Riva del Vin',
                                    'short_name': 'Riva del Vin',
                                    'types': ['route']},
                                   {'long_name': 'Venezia',
                                    'short_name': 'Venezia',
                                    'types': ['locality', 'political']},
                                   {'long_name': 'Venezia',
                                    'short_name': 'Venezia',
                                    'types': ['administrative_area_level_3',
                                              'political']},
                                   {'long_name': 'Città Metropolitana di '
                                                 'Venezia',
                    

In [28]:
list(map(lambda rest:rest['results'],results))

TypeError: string indices must be integers, not 'str'