In [17]:
# standard libraries
import requests
import os
import pandas as pd
import numpy as np
import json
import time
import json

# custom functions
from api_functions.google_nearby_search import gPlaceSearch

In [26]:
# Get the neighborhood data

nyc = pd.read_csv('../data/data/nyc_geo_data_cleaned.csv', index_col=0)

In [5]:
# store response to figure out parsing
test_response = gPlaceSearch(str(nyc['latitude'][50]), str(nyc['longitude'][50]), 500, 'restaurant')

In [6]:
test_response

[{'html_attributions': [],
  'results': [{'business_status': 'OPERATIONAL',
    'geometry': {'location': {'lat': 40.5965038, 'lng': -73.9787432},
     'viewport': {'northeast': {'lat': 40.59779132989272,
       'lng': -73.97738587010728},
      'southwest': {'lat': 40.59509167010728, 'lng': -73.98008552989272}}},
    'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/restaurant-71.png',
    'icon_background_color': '#FF9E67',
    'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/restaurant_pinlet',
    'name': 'Cafe On The Hill',
    'opening_hours': {'open_now': True},
    'photos': [{'height': 4032,
      'html_attributions': ['<a href="https://maps.google.com/maps/contrib/107024899880853954783">A Google User</a>'],
      'photo_reference': 'Aap_uEDd0AxYBlVK1i2pigxQt30lD_NNdY34eJzyccT9ZDiSZSNJrR8dpq-j9qfz9anDcYRANsryIOT4yxljZC-ONezv25zKvF4kYb2szIdoAZlHmdk4zFXntDyTkL7M02HiyAQZ9uSkcAFSfPFHdFJA9zSC8-iuCHjEQJkK-pnHQOPPDbU2',
      'width': 3024

In [7]:
# Let's figure out how to count the # of responses
row_restaurant_counts = []
for page in test_response:
    row_restaurant_counts.append(len(page['results']))

row_restaurant_counts

[19]

In [8]:
# next, a list of review counts

row_review_counts = []
for page in test_response:
    # print(page['results'])
    for result in page['results']:
        row_review_counts.append(result['user_ratings_total'])

print(row_review_counts)

[42, 38, 74, 89, 11, 66, 24, 134, 8425, 76, 214, 71, 58, 3, 562, 266, 50, 185, 223]


In [9]:
# next, a list of ratings

row_ratings = []
for page in test_response:
    # print(page['results'])
    for result in page['results']:
        row_ratings.append(result['rating'])

print(row_ratings)

[4.5, 3.8, 4.2, 4.2, 5, 3.3, 4, 3.8, 4.6, 3.9, 4.3, 4, 4.3, 5, 4.5, 4.3, 3.6, 4.3, 3.8]


In [10]:
# next, a list of price_level

price_levels = []
for page in test_response:
    # print(page['results'])
    for result in page['results']:
        try:
            price_levels.append(result['price_level'])
        except KeyError:
            price_levels.append('?')

print(price_levels)

[2, '?', 1, '?', '?', 1, '?', '?', 2, '?', '?', 1, '?', '?', 2, 1, 1, 1, 1]


In [11]:
# Let's make the dict of results that's a bit more compact, and in a function

def googleEnrich(df=nyc, radius=500, query='restaurant'):
    """
    Takes a dataframe and returns an enriched copy of the df

    df (dataframe): must include columns 'latitude' AND 'longitude' AND an 'id' column
    radius (int): distance in meters to query for
    query (string): query string

    returns a dictionary sorted by neighborhood id:
    query_review_counts = [] - review counts per venue
    query_ratings = [] - ratings per venue
    price_levels = [] - price level per venue, some missing values
    query_counts = [] - venue counts per page
    """
    google_dict = {}

    # go through every row
    for row in df.itertuples():
        print(row.id)
        # get data from API
        response = gPlaceSearch(str(row.latitude), str(row.longitude), radius, query)

        # parse data and assign data to temporary lists
        query_counts = [len(page['results']) for page in response]

        query_review_counts = []
        query_ratings = []
        price_levels = []

        for page in response:
            for result in page['results']:
                query_review_counts.append(result['user_ratings_total'])
                query_ratings.append(result['rating'])

                # price_levels have null values, check and replace
                try:
                   price_levels.append(result['price_level'])
                except KeyError:
                   price_levels.append('?')

        # store lists into dict
        google_dict[row.id] = {
            query + '_counts': query_counts,
            query + '_review_counts': query_review_counts,
            query + '_ratings': query_ratings,
            query + '_price_levels': price_levels
        }
    return google_dict

In [12]:
# call the function and store in a variable
google_dict = googleEnrich()

nyu_2451_34572.1
nyu_2451_34572.2
nyu_2451_34572.3
nyu_2451_34572.4
nyu_2451_34572.5
nyu_2451_34572.6
nyu_2451_34572.7
nyu_2451_34572.8
nyu_2451_34572.9
nyu_2451_34572.10
nyu_2451_34572.11
nyu_2451_34572.12
nyu_2451_34572.13
nyu_2451_34572.14
nyu_2451_34572.15
nyu_2451_34572.16
nyu_2451_34572.17
nyu_2451_34572.18
nyu_2451_34572.19
nyu_2451_34572.20
nyu_2451_34572.21
nyu_2451_34572.22
nyu_2451_34572.23
nyu_2451_34572.24
nyu_2451_34572.25
nyu_2451_34572.26
nyu_2451_34572.27
nyu_2451_34572.28
nyu_2451_34572.29
nyu_2451_34572.30
nyu_2451_34572.31
nyu_2451_34572.32
nyu_2451_34572.33
nyu_2451_34572.34
nyu_2451_34572.35
nyu_2451_34572.36
nyu_2451_34572.37
nyu_2451_34572.38
nyu_2451_34572.39
nyu_2451_34572.40
nyu_2451_34572.41
nyu_2451_34572.42
nyu_2451_34572.43
nyu_2451_34572.44
nyu_2451_34572.45
nyu_2451_34572.46
nyu_2451_34572.47
nyu_2451_34572.48
nyu_2451_34572.49
nyu_2451_34572.50
nyu_2451_34572.51
nyu_2451_34572.52
nyu_2451_34572.53
nyu_2451_34572.54
nyu_2451_34572.55
nyu_2451_34572.56
n

In [25]:
with open('google_restaurants.json', 'w') as fp:
    json.dump(google_dict, fp)

In [None]:
json