In [33]:
# Downloads restaurants data from GooglePlaces within a given rectangle.
# This requires an Google API key.

# We want to obtain all restaurants within a given quadrangle.  
# To overcome the limit of 20 (or 3x20) results, we do multiple queries for several locations 
# (specified by a given step size), then merging and de-duplicating the results


In [18]:
from datetime import datetime
import numpy as np
import pandas as pd


In [19]:
def read_file_content(filename):
    """ Given a filename,
        returns the content of this file
    """
    try:
        with open(filename, 'r') as f:
            return f.read()
    except FileNotFoundError:
        print("'%s' file not found" % filename)

In [20]:
# Makes use of python-google_places, a Python wrapper around the Google Places API
# https://github.com/slimkrazy/python-google-places

import googlemaps
from googleplaces import GooglePlaces, types, lang
import gmaps

# Open Google Places API key file and read keys
# The file is assumed to contain only one single line with the API key
# Configure GooglePlaces and gmaps
PLACES_API_KEY = read_file_content("./API Keys/GOOGLE_PLACES_API_KEY.txt")
google_places = GooglePlaces(PLACES_API_KEY)
gmaps.configure(PLACES_API_KEY)


In [21]:
# Search center = Stuttgart, Germany
lat = 48.783333
lng = 9.183333

In [22]:
# First, try a nearby search around the given search center with search radius of 20km
# This returns only up to 20 results (or 3x20 results maximum by making use of the next_page_token).
# The results are ranked by prominence.
# Places search documentation: 
# https://developers.google.com/maps/documentation/places/web-service/search#PlaceSearchResults

# Nearby search via python-google-places
query_result = google_places.nearby_search(
        lat_lng={'lat': lat, 'lng': lng}, 
        radius=20000,
        types=[types.TYPE_RESTAURANT] or [types.TYPE_CAFE] or [type.TYPE_BAR] or [type.TYPE_CASINO])

for place in query_result.places:
    print (place.name)


Hotel Royal
Der Zauberlehrling
Alte Kanzlei
Oggi
Enchilada Stuttgart
5 Bar Gourmetrestaurant
Valle
Sky Beach Stuttgart
BLOCK HOUSE Eberhardstraße
Arche Weinstube
VAPIANO Stuttgart Bolzstrasse
Weinstube Fröhlich
Paulaner am alten Postplatz
Prince of India
Hotel-Restaurant Köhler
Ambiente Africa
MAREDO Steakhaus Stuttgart
BLOCK HOUSE Arnulf-Klett-Platz
CUBE Restaurant
Amici


In [23]:
# Now, explore nearby search with rankby=distance, 
# which will give us all restaurants around the search center, instead of the most prominent ones.
# The number of results is still restricted to 20 (or 3x20)

query_result = google_places.nearby_search(
        lat_lng={'lat': lat, 'lng': lng}, 
        rankby='distance',
        types=[types.TYPE_RESTAURANT] or [types.TYPE_CAFE] or [type.TYPE_BAR] or [type.TYPE_CASINO])

for place in query_result.places:
    print (place.name)


YORMA'S
NORDSEE Stuttgart Klett-Passage
dean&david
Crêpes
LE CROBAG
Die Zirbelstube
McDonald's
LE CROBAG
Weinwirtschaft | Weingut Franz Keller
minmin City Stuttgart
Burger King
Eiscafé Santin Kö1
ALM DELUXE
BLOCK HOUSE Arnulf-Klett-Platz
Ratskeller Helga's Bistro
Sky Beach Stuttgart
Starbucks
Galeria Restaurant
GALERIA Restaurant Leonhard’s
minmin - Stuttgart HBF - Poke Bowl


In [24]:
# We want to obtain all restaurants around our given search center (Stuttgart)
# To overcome the limit of 20 / 3x20 results, we do multiple queries for several locations, then merging the results

# Define a quadrangle within we want to search
# and a step size which defines the distance between the search centers

step = -0.02789 # ~2km

start_lat = 48.864401
end_lat = 48.687051
start_long= 9.415365
end_long = 8.954283


In [25]:
# Show search centers as markers on Google Maps

marker_locations=[]
for i in np.arange(start_lat, end_lat, step):
    for j in np.arange(start_long, end_long, step):
        marker_locations.append((i, j))

print("Number of markers: {}".format(len(marker_locations)))   

fig = gmaps.figure()
symbols = gmaps.symbol_layer(marker_locations, fill_color='red', stroke_color='red')
fig.add_layer(symbols)
fig


Number of markers: 119


Figure(layout=FigureLayout(height='420px'))

In [26]:
# For each marker location, make a nearby request with rankby=distance. 
# Thus we get a list of googleplaces.GooglePlacesSearchResult instances

query_results=[]
for marker in marker_locations:
    query_result = google_places.nearby_search(
        lat_lng={'lat': marker[0], 'lng': marker[1]},
        rankby='distance', 
        types=[types.TYPE_RESTAURANT] or [types.TYPE_CAFE] or [type.TYPE_BAR] or [type.TYPE_CASINO])
    query_results.append(query_result)


In [27]:
# Relevant fields we can obtain from Google Places with a basic request, without querying Places Details:
# id, name, geolocation, type

# Iterate the query_result list, 
# and for each result store the id, name, geolocation and type in lists to create a DataFrame from
id_list, restaurant_list, lat_list, lng_list, types_list = [], [], [], [], []
for query_result in query_results:
    for place in query_result.places:
        id_list.append(place.place_id)
        restaurant_list.append(place.name)
        lat_list.append(float(place.geo_location['lat']))
        lng_list.append(float(place.geo_location['lng']))
        types_list.append(place.types)
        
assert(len(id_list) == len(restaurant_list) == len(lat_list) == len(lng_list) == len(types_list))
print("Number of restaurants: {}".format(len(restaurant_list)))
print("Number of uniqe restaurants: {}".format(len(set(restaurant_list))))

Number of restaurants: 2380
Number of uniqe restaurants: 1646


In [29]:
# Show restaurant locations on Google Maps

locations=[]
for i in range(len(lat_list)):
    locations.append([lat_list[i], lng_list[i]])
    
fig = gmaps.figure()
symbols = gmaps.symbol_layer(locations, fill_color='red', stroke_color='red')
fig.add_layer(symbols)
fig


Figure(layout=FigureLayout(height='420px'))

In [30]:
# Create initial DataFrame
restaurant_df = pd.DataFrame(
    {'id': id_list,
     'name': restaurant_list,
     'latitude': lat_list,
     'longitude': lng_list,
     'types': types_list
    })

restaurant_df.head()

Unnamed: 0,id,name,latitude,longitude,types
0,ChIJJaT2_CW2mUcRlrbbqIJmeKU,Gaststätte Talaue,48.86867,9.42593,"[restaurant, food, point_of_interest, establis..."
1,ChIJO3jAlCW2mUcRT8h9Ik6kQ1Q,TV Brikmannsweiler Vereinsheim,48.869112,9.4275,"[restaurant, food, point_of_interest, establis..."
2,ChIJwREn7zG2mUcRXmk62UVnVkc,Landhaus Heubach,48.863919,9.431504,"[restaurant, food, point_of_interest, establis..."
3,ChIJN7rvnMnJmUcRy8a3wEWV3UE,Schwabenalm Winnenden,48.866326,9.398349,"[restaurant, food, point_of_interest, establis..."
4,ChIJ78c1rMnJmUcRUgz2MZN3k3M,Gaststätte Tennisclub Winnenden,48.867407,9.398605,"[restaurant, food, point_of_interest, establis..."


In [31]:
# Iterate over all types lists in restaurant_df.types 
# and produce one single set of types that occur in this restaurant_df
import itertools
set(list(itertools.chain.from_iterable([l for l in restaurant_df.types.values])))

{'atm',
 'bakery',
 'bar',
 'bowling_alley',
 'cafe',
 'car_rental',
 'car_wash',
 'casino',
 'convenience_store',
 'establishment',
 'finance',
 'food',
 'furniture_store',
 'gas_station',
 'grocery_or_supermarket',
 'gym',
 'health',
 'home_goods_store',
 'liquor_store',
 'lodging',
 'meal_delivery',
 'meal_takeaway',
 'mosque',
 'movie_theater',
 'night_club',
 'park',
 'place_of_worship',
 'point_of_interest',
 'restaurant',
 'school',
 'spa',
 'store',
 'supermarket',
 'tourist_attraction'}

In [35]:
# "Unstack" the type information for the following (most relevant) types:
# 'restaurant', 'cafe', 'bar', 'lodgin', 'meal_delivery', 'meal_takeaway',

col_names= ['restaurant', 'cafe', 'bar', 'lodging', 'meal_delivery', 'meal_takeaway']
for col in col_names: # if the 'types' column contains 'col', sets the corresponding column to "1"
    restaurant_df[col] = restaurant_df['types'].apply(lambda x : int(col in x))
    
restaurant_df.head()

Unnamed: 0,id,name,latitude,longitude,types,restaurant,cafe,bar,lodging,meal_delivery,meal_takeaway
0,ChIJJaT2_CW2mUcRlrbbqIJmeKU,Gaststätte Talaue,48.86867,9.42593,"[restaurant, food, point_of_interest, establis...",1,0,0,0,0,0
1,ChIJO3jAlCW2mUcRT8h9Ik6kQ1Q,TV Brikmannsweiler Vereinsheim,48.869112,9.4275,"[restaurant, food, point_of_interest, establis...",1,0,0,0,0,0
2,ChIJwREn7zG2mUcRXmk62UVnVkc,Landhaus Heubach,48.863919,9.431504,"[restaurant, food, point_of_interest, establis...",1,0,0,0,0,0
3,ChIJN7rvnMnJmUcRy8a3wEWV3UE,Schwabenalm Winnenden,48.866326,9.398349,"[restaurant, food, point_of_interest, establis...",1,0,0,0,0,0
4,ChIJ78c1rMnJmUcRUgz2MZN3k3M,Gaststätte Tennisclub Winnenden,48.867407,9.398605,"[restaurant, food, point_of_interest, establis...",1,0,0,0,0,0


In [32]:
# Remove duplicates
restaurant_df.drop_duplicates(subset=['id'], inplace=True)

In [37]:
# Write to CSV file and Excel file
restaurant_df.to_csv('restaurants_step=' + str(step) + '.csv', index=False)
restaurant_df.to_excel('restaurants_step=' + str(step) + '.xlsx', index=False)

In [248]:
# Use Wextractor to download all ratings for each restaurant

import requests
import json

WEXTRACTOR_API_KEY = read_file_content("./API Keys/WEXTRACTOR_API_KEY.txt")

# Each offset step returns 10 reveiws
# For now, set the limit just to 1 to spare API requests
offset_limit = 1

reviews_per_restaurant = dict()

# For each restaurant, ...
# until the offset limit is reached
restaurant_ids = restaurant_df.id.values
for restaurant_id in (restaurant_ids):
    
    offset = 0
    resp_list = []
    
    
    while (offset < offset_limit):
        print (offset)
        offset += 1
        # Create REST query as described here: https://wextractor.com/docs 
        query = ('https://wextractor.com/api/v1/reviews?' +
             'id=' + restaurant_id + 
             '&auth_token=' + WEXTRACTOR_API_KEY + 
             '&offset=' + str(offset) +
             '&sort=relevancy' + 
             'hl=de')# Set 'host language' to German, 
                    # otherwise English reviews will be preferred, and sorting by relevancy will not apply

        resp = requests.get(query)
        if resp.status_code != 200:
            # This means something went wrong
            raise ApiError('GET /tasks/ {}'.format(resp.status_code))
        resp_list.append(resp)

    # For each restaurant, save the response list
    reviews_per_restaurant[restaurant_id] = resp_list



ChIJJaT2_CW2mUcRlrbbqIJmeKU
0
ChIJO3jAlCW2mUcRT8h9Ik6kQ1Q
0
ChIJwREn7zG2mUcRXmk62UVnVkc
0
ChIJN7rvnMnJmUcRy8a3wEWV3UE
0
ChIJ78c1rMnJmUcRUgz2MZN3k3M
0
ChIJVVVVVTC2mUcRjEPUKU0azWI
0
ChIJVVVVVTC2mUcR8BOg5Ox7-bo
0
ChIJWROc0QLsC0ERGOunmWFV1Yo
0
ChIJn_gq_iC2mUcRh_1Hzl3XYC8
0
ChIJI5MiqMbJmUcRXkeRxuKSkEU
0
ChIJRxC5pMbJmUcRxiw3nW_1fM4
0
ChIJp1ZNpcbJmUcRDP4odDNgjdA
0
ChIJ7wgbpcbJmUcRE1Bqnrq4zUU
0
ChIJOUg9l8PJmUcRbbPYsva1qIY
0
ChIJk3VvP8jJmUcRZpHvYq-gi1g
0
ChIJGbn9tMbJmUcRqTWttNDO08E
0
ChIJmxcuKsHJmUcR0lbq85alkc8
0
ChIJC87HPMHJmUcRMaYQSmfObWw
0
ChIJzVYyKMHJmUcRt8I3u37kXeg
0
ChIJm63Q0hHJmUcR7r_0EU_NjJE
0
ChIJLWAtprPJmUcRZEAdSR3oIT4
0
ChIJFeS6ybPJmUcRhJRGFhPUlZs
0
ChIJoWwvlrfJmUcRp6lWFaQx1pw
0
ChIJjZPxY7rJmUcRmQnTe0bIz8Y
0
ChIJyYtk6irJmUcRu7xqS4f14wo
0
ChIJa0EzH7rJmUcR6XjbqPPd9Vg
0
ChIJeZYwH7rJmUcRtUXpwlMdCZc
0
ChIJ8Uk__6_JmUcRe02miBiP2Ls
0
ChIJZ0OB5rrJmUcRT-aOq0OxgPU
0
ChIJGTRv2rrJmUcRhM_Pg7N5rAk
0
ChIJPTi7abnJmUcRzpJiuZUsgF8
0
ChIJfb-maLnJmUcRg9u96slI8sU
0
ChIJzwC5RrrJmUcRP2vihk2RDPQ
0
ChIJS0lSZr

NameError: name 'ApiError' is not defined

In [259]:
# Create reviews_df DataFrame and fill it while iterating the reviews_per_restaurant dict
reviews_df = pd.DataFrame(columns=['restaurant_id', 'rating', 'text', 'reviewer'])
for restaurant_id, response_list in reviews_per_restaurant.items():
    print('-----------------------------')
    print(restaurant_id)
    for resp in response_list:
        resp_json = resp.json()
        review_list = resp_json['reviews']
        new_row = pd.Series(data={'restaurant_id':restaurant_id, 
                                  'rating':review['rating'], 
                                  'text':review['text'], 
                                  'reviewer':review['reviewer']}, name='x')
        reviews_df = reviews_df.append(new_row, ignore_index=False) #TODO replace 'append'?
        for review in review_list:
            print(review['rating'])
            print(review['reviewer'])
            print(review['text'])

-----------------------------
ChIJJaT2_CW2mUcRlrbbqIJmeKU
4
Concezio Riccardo D'Andrea
Sie essen gut und die Umgebung ist entspannend
5
Jörg Mohnke
Super Preise,  super Qualität
5
Radojka Vejnovic
Zum Empfehlen👍👍
3
Jörg S.
Wir wollten was bestellen, weil ich a) Hunger hatte und b) gerne die lokalen Restaurants unterstützen möchte. Also, Hörer in die eine Hand und mit der anderen gewählt. Schon ging der Besitzer an den Apparat und fragte nach meinen Befindlichkeiten. Ich nannte ihm mein Anliegen und wollte meine Bestellung durchgeben. Harsch wurde ich unterbrochen, dass ich meine zeitlichen Präferenzen nun mal sogar nicht zur internen Planung passten. Entweder jetzt oder wieder in 2 Stunden. Ich  ja gar nicht klagen, aber der Ton macht halt die Musik und die kakophonischen Klänge, waren nun leider nicht das erste Mal. Sicher mein letzter Versuch hier etwas zu essen.
5
Nico K

5
Günther Luithle
Heute war das Essen wieder super zubereitet,
sehr gut , ausgezeichnet.
Prompte Lieferung, wie 

5
Rainer Fuchs
Wir waren zum Mittagstisch dort. Habe Spaghetti mit Knoblauch gegessen, war sehr lecker. Bedienung war freundlich und schnell. Komme gerne wieder.
4
Helmut Denzinger

5
Andreas R.
Kurz und knapp...die Pizzen sind einfach lecker! Das Lokal gemütlich und die Servicekraft war freundlich und auf zack.
5
Marcel Strobel

5
Erika Meyer
Sehr lecker 😋 und ganz fixe Bedienung.  Und kein Problem bei extra Wünschen.  Immer wieder gerne da.
5
Marcel Taylor
Pizza und Salat waren Super!
4
Andrea Fiore

-----------------------------
ChIJPTi7abnJmUcRzpJiuZUsgF8
5
Hikmet Cakir

4
Devestator21

5
Marlena Kurowski

5
STERNLE 05
Super Essen,  freundlicher Service
5
Heiko Seng
Bin sehr zufrieden gewesen, es hat fantastisch geschmeckt !!!
5
Bernhard Beck
Super lecker....
1
Mario Kunkić

5
Angelika Zographopulos

5
Unlock123

4
Antonia Bäuerle

-----------------------------
ChIJfb-maLnJmUcRg9u96slI8sU
-----------------------------
ChIJzwC5RrrJmUcRP2vihk2RDPQ
5
Daniel Pertschi
Immer sehr lecker.

In [260]:
reviews_df.head()

Unnamed: 0,restaurant_id,rating,text,reviewer
x,ChIJJaT2_CW2mUcRlrbbqIJmeKU,5,,Martin Roth
x,ChIJO3jAlCW2mUcRT8h9Ik6kQ1Q,5,Hervorragende Köstlichkeiten! In einer sehr fr...,Ole Figelius
x,ChIJwREn7zG2mUcRXmk62UVnVkc,5,"Sehr leckeres Essen, freundliche und schnelle ...",NrOne 1
x,ChIJN7rvnMnJmUcRy8a3wEWV3UE,4,Gute Gelegenheit spontan zu übernachten // WLA...,JOH BORM
x,ChIJ78c1rMnJmUcRUgz2MZN3k3M,4,,Bettina Zecha


In [6]:
# Write to CSV file and Excel file
reviews_df.to_csv('restaurants_step=' + str(step) + '_offset=' + offset + '.csv', index=False)
reviews_df.to_excel('restaurants_step=' + str(step) + '_offset=' + offset + '.xlsx', index=False)

Unnamed: 0.1,Unnamed: 0,id,name,latitude,longitude,types,restaurant,cafe,bar,lodging,meal_delivery,meal_takeaway
0,"0,ChIJJaT2_CW2mUcRlrbbqIJmeKU,Gaststätte Talau...",,,,,,,,,,,
1,"1,ChIJO3jAlCW2mUcRT8h9Ik6kQ1Q,TV Brikmannsweil...",,,,,,,,,,,
2,"2,ChIJwREn7zG2mUcRXmk62UVnVkc,Landhaus Heubach...",,,,,,,,,,,
3,"3,ChIJN7rvnMnJmUcRy8a3wEWV3UE,Schwabenalm Winn...",,,,,,,,,,,
4,"4,ChIJ78c1rMnJmUcRUgz2MZN3k3M,Gaststätte Tenni...",,,,,,,,,,,
