In [1]:
import numpy as np
import pandas as pd

In [2]:
# Load restaurant data from file
restaurant_df = pd.read_csv('reviews_step=-0.02789_offset=1.csv')

In [4]:
def read_file_content(filename):
    """ Given a filename,
        returns the content of this file
    """
    try:
        with open(filename, 'r') as f:
            return f.read()
    except FileNotFoundError:
        print("'%s' file not found" % filename)

In [5]:
# Open Google Places API key file and read keys
# The file is assumed to contain only one single line with the API key
# Configure GooglePlaces and gmaps
WEXTRACTOR_API_KEY = read_file_content("./API Keys/WEXTRACTOR_API_KEY.txt")

In [None]:
# Use Wextractor to download all ratings for each restaurant

import requests
import json

WEXTRACTOR_API_KEY = read_file_content("./API Keys/WEXTRACTOR_API_KEY.txt")

# Each offset step returns 10 reveiws
# For now, set the limit just to 1 to spare API requests
offset_limit = 1

reviews_per_restaurant = dict()

# For each restaurant, ...
# until the offset limit is reached
restaurant_ids = restaurant_df.id.values
for restaurant_id in (restaurant_ids):
    
    offset = 0
    resp_list = []
    
    
    while (offset < offset_limit):
        print (offset)
        offset += 1
        # Create REST query as described here: https://wextractor.com/docs 
        query = ('https://wextractor.com/api/v1/reviews?' +
             'id=' + restaurant_id + 
             '&auth_token=' + WEXTRACTOR_API_KEY + 
             '&offset=' + str(offset) +
             '&sort=relevancy' + 
             'hl=de')# Set 'host language' to German, 
                    # otherwise English reviews will be preferred, and sorting by relevancy will not apply

        resp = requests.get(query)
        if resp.status_code != 200:
            # This means something went wrong
            raise ApiError('GET /tasks/ {}'.format(resp.status_code))
        resp_list.append(resp)

    # For each restaurant, save the response list
    reviews_per_restaurant[restaurant_id] = resp_list



In [None]:
# Create reviews_df DataFrame and fill it while iterating the reviews_per_restaurant dict
reviews_df = pd.DataFrame(columns=['restaurant_id', 'rating', 'text', 'reviewer'])
for restaurant_id, response_list in reviews_per_restaurant.items():
    print('-----------------------------')
    print(restaurant_id)
    for resp in response_list:
        resp_json = resp.json()
        review_list = resp_json['reviews']
        new_row = pd.Series(data={'restaurant_id':restaurant_id, 
                                  'rating':review['rating'], 
                                  'text':review['text'], 
                                  'reviewer':review['reviewer']}, name='x')
        reviews_df = reviews_df.append(new_row, ignore_index=False) #TODO replace 'append'?
        for review in review_list:
            print(review['rating'])
            print(review['reviewer'])
            print(review['text'])

In [None]:
reviews_df.head()

In [None]:
# Write to CSV file and Excel file
reviews_df.to_csv('restaurants_step=' + str(step) + '_offset=' + offset + '.csv', index=False)
reviews_df.to_excel('restaurants_step=' + str(step) + '_offset=' + offset + '.xlsx', index=False)