In [44]:
# Dependencies
import json
import requests
import pandas as pd
import logging
import time

logger = logging.getLogger("root")
logger.setLevel(logging.DEBUG)
# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)

# Google developer API key
from config import g_key

In [33]:
# import dataset
wine_review_df = pd.read_csv("winemag-data-130k-v2.csv")
wine_review_df['winery_search'] = wine_review_df['winery'] + " winery" + " " + wine_review_df['country']
wine_review_df.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,winery_search
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,Nicosia winery Italy
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,Quinta dos Avidagos winery Portugal
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,Rainstorm winery US
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,St. Julian winery US
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,Sweet Cheeks winery US


In [59]:
#create list of winery names to search with google maps function
winery_list = wine_review_df.winery_search.unique()
len(winery_list)

16935

In [50]:
return_full_results = False

def get_googlemaps_results(winery, api_key=None, return_full_response=False):
    
    params = {"key": api_key, "input": winery, "inputtype": "textquery", "fields": "geometry"}

    # Set Geocoding url
    base_url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"
        
    # Ping google for the results:
    response = requests.get(base_url, params=params)
    results = response.json()
    
    # if there are no results or an error, return empty results.

    if len(results['candidates']) == 0:
        output = {
            "latitude": None,
            "longitude": None,
        }
    else:    
        answer = results['candidates'][0]
        output = {
            "latitude": answer.get('geometry').get('location').get('lat'),
            "longitude": answer.get('geometry').get('location').get('lng'),
        }
        
    # Append other details:    
    output['input_string'] = winery
    output['number_of_results'] = len(results['candidates'])
    output['status'] = results.get('status')
    if return_full_response is True:
        output['response'] = results
    
    return output

In [51]:
test_result = get_googlemaps_results("Quinta dos Avidagos winery", g_key, return_full_results)
test_result

{'latitude': 41.183402,
 'longitude': -7.757935499999999,
 'input_string': 'Quinta dos Avidagos winery',
 'number_of_results': 1,
 'status': 'OK'}

In [52]:
test_winery_list = winery_list[0:10]
test_winery_list

array(['Nicosia winery Italy', 'Quinta dos Avidagos winery Portugal',
       'Rainstorm winery US', 'St. Julian winery US',
       'Sweet Cheeks winery US', 'Tandem winery Spain',
       'Terre di Giurfo winery Italy', 'Trimbach winery France',
       'Heinz Eifel winery Germany', 'Jean-Baptiste Adam winery France'],
      dtype=object)

In [57]:
#create empty list that we will populate with for loop
winery_locations = []

#set output file name
output_filename = 'final_winereviews_data.csv'

# Go through each address in turn
for winery in test_winery_list:
    # While the address geocoding is not finished:
    geocoded = False
    while geocoded is not True:
        # Geocode the address with google
        try:
            geocode_result = get_googlemaps_results(winery, g_key, return_full_response=return_full_results)
        except Exception as e:
            logger.exception(e)
            logger.error("Major error with {}".format(winery))
            logger.error("Skipping!")
            geocoded = True
            
        # If we're over the API limit, backoff for a while and try again later.
        if geocode_result['status'] == 'OVER_QUERY_LIMIT':
            logger.info("Hit Query Limit! Backing off for a bit.")
            time.sleep(BACKOFF_TIME * 60) # sleep for 30 minutes
            geocoded = False
        else:
            # If we're ok with API use, save the results
            # Note that the results might be empty / non-ok - log this
            if geocode_result['status'] != 'OK':
                logger.warning("Error geocoding {}: {}".format(winery, geocode_result['status']))
            logger.debug("Geocoded: {}: {}".format(winery, geocode_result['status']))
            winery_locations.append(geocode_result)           
            geocoded = True

    # Print status every 100 addresses
    if len(winery_locations) % 100 == 0:
    	logger.info("Completed {} of {} address".format(len(winery_locations), len(test_winery_list)))
            
    # Every 500 addresses, save progress to file(in case of a failure so you have something!)
    if len(winery_locations) % 500 == 0:
        pd.DataFrame(winery_locations).to_csv("{}_bak".format(output_filename))

# All done
logger.info("Finished geocoding all wineries")
# Write the full results to csv using the pandas library.
pd.DataFrame(winery_locations).to_csv(output_filename)

Geocoded: Nicosia winery Italy: OK
Geocoded: Nicosia winery Italy: OK
Geocoded: Nicosia winery Italy: OK
Completed 1 of 10 address
Completed 1 of 10 address
Completed 1 of 10 address
Geocoded: Quinta dos Avidagos winery Portugal: OK
Geocoded: Quinta dos Avidagos winery Portugal: OK
Geocoded: Quinta dos Avidagos winery Portugal: OK
Completed 2 of 10 address
Completed 2 of 10 address
Completed 2 of 10 address
Error geocoding Rainstorm winery US: ZERO_RESULTS
Error geocoding Rainstorm winery US: ZERO_RESULTS
Error geocoding Rainstorm winery US: ZERO_RESULTS
Geocoded: Rainstorm winery US: ZERO_RESULTS
Geocoded: Rainstorm winery US: ZERO_RESULTS
Geocoded: Rainstorm winery US: ZERO_RESULTS
Completed 3 of 10 address
Completed 3 of 10 address
Completed 3 of 10 address
Geocoded: St. Julian winery US: OK
Geocoded: St. Julian winery US: OK
Geocoded: St. Julian winery US: OK
Completed 4 of 10 address
Completed 4 of 10 address
Completed 4 of 10 address
Geocoded: Sweet Cheeks winery US: OK
Geocoded: