In [1]:
# standard libraries
import requests
import os
import pandas as pd
import numpy as np
import json
import time

# custom functions
from api_functions.google_nearby_search import gPlaceSearch

In [2]:
# Get the neighborhood data

nyc = pd.read_csv('../data/nyc_geo_cleaned.csv', index_col=0)

In [3]:
nyc.shape

(306, 5)

In [7]:
nyc

Unnamed: 0,id,neighborhood,borough,longitude,latitude
0,nyu_2451_34572.1,Wakefield,Bronx,-73.847201,40.894705
1,nyu_2451_34572.2,Co-op City,Bronx,-73.829939,40.874294
2,nyu_2451_34572.3,Eastchester,Bronx,-73.827806,40.887556
3,nyu_2451_34572.4,Fieldston,Bronx,-73.905643,40.895437
4,nyu_2451_34572.5,Riverdale,Bronx,-73.912585,40.890834
...,...,...,...,...,...
301,nyu_2451_34572.302,Hudson Yards,Manhattan,-74.000111,40.756658
302,nyu_2451_34572.303,Hammels,Queens,-73.805530,40.587338
303,nyu_2451_34572.304,Bayswater,Queens,-73.765968,40.611322
304,nyu_2451_34572.305,Queensbridge,Queens,-73.945631,40.756091


In [5]:
# ## test the gPlaceSearch() function

restaurants = {}

for row in nyc.iterrows():
    response = gPlaceSearch(str(row['latitude']), str(row['longitude']))


TypeError: 'method' object is not iterable

In [None]:
# store response to figure out parsing
test_response = gPlaceSearch(str(nyc['latitude'][50]), str(nyc['longitude'][50]), 500, 'restaurant')

In [None]:
test_response

[{'html_attributions': [],
  'results': [{'business_status': 'OPERATIONAL',
    'geometry': {'location': {'lat': 40.59624669999999, 'lng': -73.9783571},
     'viewport': {'northeast': {'lat': 40.59759977989273,
       'lng': -73.97704967010728},
      'southwest': {'lat': 40.59490012010728, 'lng': -73.97974932989273}}},
    'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/restaurant-71.png',
    'icon_background_color': '#FF9E67',
    'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/restaurant_pinlet',
    'name': 'Paraiso',
    'opening_hours': {'open_now': True},
    'photos': [{'height': 3024,
      'html_attributions': ['<a href="https://maps.google.com/maps/contrib/102629022838675911100">Cindy Serrano</a>'],
      'photo_reference': 'Aap_uEAy5hBGwb0G2pNJrvxVV7HEhrJpcCAAwPg1VgeHbdoryLKgP-EPJEY6rwDHtKTEu_0O1AAoRz10PA-diGBu1g3bkKzd5gPjVpkoZRlplsoQ2pP89sm2ALWkqo9ze7hzpMbFtihhjE6AIoev4oAcGDYBJomUNnqhnHiWBwQzWsHWCS-D',
      'width': 4032}]

In [None]:
# Let's figure out how to count the # of responses
row_restaurant_counts = []
for page in test_response:
    row_restaurant_counts.append(len(page['results']))

row_restaurant_counts

# row_restaurant_counts = [len(page['results']) for page in test_response]
# for page in test_response:
#     print(len(page['results']))



[19]

In [None]:
# next, a list of review counts

row_review_counts = []
for page in test_response:
    # print(page['results'])
    for result in page['results']:
        row_review_counts.append(result['user_ratings_total'])

print(row_review_counts)

[76, 266, 562, 13, 185, 50, 38, 24, 223, 31, 58, 3, 74, 96, 129, 66, 8425, 788, 214]


In [None]:
# next, a list of ratings

row_ratings = []
for page in test_response:
    # print(page['results'])
    for result in page['results']:
        row_ratings.append(result['rating'])

print(row_ratings)

[3.9, 4.3, 4.5, 4.8, 4.3, 3.6, 3.8, 4, 3.8, 3.6, 3.7, 5, 4.2, 3.7, 4.3, 3.3, 4.6, 4.1, 4.3]


In [None]:
# next, a list of price_level

price_levels = []
for page in test_response:
    # print(page['results'])
    for result in page['results']:
        try:
            price_levels.append(result['price_level'])
        except KeyError:
            price_levels.append('?')



print(price_levels)

['?', 1, 2, '?', 1, 1, '?', '?', 1, 1, 2, '?', 1, '?', 1, 1, 2, 2, '?']


In [39]:
# Let's make the dict of results that's a bit more compact, and in a function

def googleEnrich(df=nyc, radius=500, query='restaurant'):
    """
    Takes a dataframe and returns an enriched copy of the df

    df (dataframe): must include columns 'latitude' AND 'longitude' AND an 'id' column
    radius (int): distance in meters to query for
    query (string): query string

    returns a dictionary sorted by neighborhood id
    """
    google_dict = {}

    # go through every row
    for row in df.itertuples():
        print(row.id)
        # get data from API
        response = gPlaceSearch(str(row.latitude), str(row.longitude), radius, query)

        # parse data and assign data to temporary lists
        query_counts = [len(page['results']) for page in response]

        query_review_counts = []
        query_ratings = []
        price_levels = []

        for page in response:
            for result in page['results']:
                query_review_counts.append(result['user_ratings_total'])
                query_ratings.append(result['rating'])

                # price_levels have null values, check and replace
                try:
                   price_levels.append(result['price_level'])
                except KeyError:
                   price_levels.append('?')

        #         query_review_counts = [result['user_ratings_total'] for result in page['results']]
        #         query_ratings = [result['rating'] for result in page]
        # # for result in page['results']:
        # #     row_ratings.append(result['rating'])

        # store lists into dict
        google_dict[row.id] = {
            query + '_counts': query_counts,
            query + '_review_counts': query_review_counts,
            query + '_ratings': query_ratings,
            query + '_price_levels': price_levels
        }
    return google_dict

In [40]:
# call the function and store in a variable
google_dict = googleEnrich()

nyu_2451_34572.1
nyu_2451_34572.2
nyu_2451_34572.3
nyu_2451_34572.4
nyu_2451_34572.5
nyu_2451_34572.6
nyu_2451_34572.7
nyu_2451_34572.8
nyu_2451_34572.9


KeyboardInterrupt: 

In [34]:
google_dict

NameError: name 'google_dict' is not defined