In [None]:
import pandas as pd
import requests
import logging
import time
import re
import string
import pickle
import os

In [None]:
API_KEY = 'YOUR_API_KEY'

In [None]:
# Backoff time sets how many minutes to wait between google pings when your API limit is hit
BACKOFF_TIME = 30

# Return Full Google Results? If True, full JSON results from Google are included in output
RETURN_FULL_RESULTS = True
# Include country bias to increase accuracy
BIAS = False

In [None]:
logger = logging.getLogger("root")
logger.setLevel(logging.DEBUG)

# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)

In [None]:
input_list = ['lille', 'los angeles', 'jakarta']

In [None]:
locations = pd.DataFrame(data=input_list)
locations.columns = ['ADDRESS']
locations.head()

In [None]:
output_folder  = 'YOUR_OUTPUT_FOLDER'

In [None]:
def get_google_results(address, api_key=None, return_full_response=False):
    """
    Get geocode results from Google Maps Geocoding API.
    
    Note, that in the case of multiple google geocode reuslts, this function returns details of the FIRST result.
    
    @param address: String address as accurate as possible. For Example "18 Grafton Street, Dublin, Ireland"
    @param api_key: String API key if present from google. 
                    If supplied, requests will use your allowance from the Google API. If not, you
                    will be limited to the free usage of 2500 requests per day.
    @param return_full_response: Boolean to indicate if you'd like to return the full response from google. This
                    is useful if you'd like additional location details for storage or parsing later.
    """
    # Set up your Geocoding url
    geocode_url = "https://maps.googleapis.com/maps/api/geocode/json?address={}".format(address)
    if api_key is not None:
        geocode_url = geocode_url + "&key={}".format(api_key)
        
    # Ping google for the reuslts:
    results = requests.get(geocode_url)
    # Results will be in JSON format - convert to dict using requests functionality
    results = results.json()
    
    # if there's no results or an error, return empty results.
    if len(results['results']) == 0:
        output = {
            "formatted_address" : None,
            "latitude": None,
            "longitude": None,
            "accuracy": None,
            "google_place_id": None,
            "type": None,
            "postcode": None
        }
    else:    
        answer = results['results'][0]
        output = {
            "formatted_address" : answer.get('formatted_address'),
            "latitude": answer.get('geometry').get('location').get('lat'),
            "longitude": answer.get('geometry').get('location').get('lng'),
            "accuracy": answer.get('geometry').get('location_type'),
            "google_place_id": answer.get("place_id"),
            "type": ",".join(answer.get('types')),
            "postcode": ",".join([x['long_name'] for x in answer.get('address_components') 
                                  if 'postal_code' in x.get('types')])
        }
        
    # Append some other details:    
    output['input_string'] = address
    output['number_of_results'] = len(results['results'])
    output['status'] = results.get('status')
    if return_full_response is True:
        output['response'] = results
    
    return output

In [None]:
# Ensure, before we start, that the API key is ok/valid, and internet access is ok
test_result = get_google_results("London, England", API_KEY, RETURN_FULL_RESULTS)
if (test_result['status'] != 'OK') or (test_result['formatted_address'] != 'London, UK'):
    logger.warning("There was an error when testing the Google Geocoder.")
    raise ConnectionError('Problem with test results from Google Geocode - check your API key and internet connection.')

In [None]:
test_result

In [None]:
geocoded_addresses = list()
# Add existing addresses from the output text file to the set
if os.path.exists('YOUR_OUTPUT_FOLDER/geocoding_results.txt'):
    with open('YOUR_OUTPUT_FOLDER/geocoding_results.txt', 'r') as file:
        for line in file:
            try:
                address, _ = line.strip().split(';-;')
                geocoded_addresses.append(address)
            except Exception as e:
                print(line)

In [None]:
len(geocoded_addresses)

In [None]:
geocoded_addresses

In [None]:
locations.shape

In [None]:
locations.head()

In [None]:
geocoded_addresses

In [None]:
locations['ADDRESS'][0] in geocoded_addresses

In [None]:
# Create a list to hold results
results = []

# Go through each address in turn
for idx,location in locations.iterrows():
    if location['ADDRESS'] not in geocoded_addresses:
        # While the address geocoding is not finished:
        geocoded = False

        while geocoded is not True:

            # Geocode the address with google
            try:

                geocode_result = get_google_results(location['ADDRESS'], 
                                                    API_KEY, 
                                                    return_full_response=RETURN_FULL_RESULTS)

#                 geocode_result['raw_string'] = location['LOCATION']

            except Exception as e:

                logger.exception(e)

                logger.error("Major error with {}".format(location['ADDRESS']))

                logger.error("Skipping!")

                geocoded = True

            # If we're over the API limit, backoff for a while and try again later.
            if geocode_result['status'] == 'OVER_QUERY_LIMIT':

                logger.info("Hit Query Limit! Backing off for a bit.")

                time.sleep(BACKOFF_TIME * 60) # sleep for 30 minutes

                geocoded = False

            else:

                # If we're ok with API use, save the results
                # Note that the results might be empty / non-ok - log this
                if geocode_result['status'] != 'OK':

                    logger.warning("Error geocoding {}: {}".format(location['ADDRESS'], geocode_result['status']))

                logger.debug("Geocoded: {}: {}".format(location['ADDRESS'], geocode_result['status']))

                results.append(geocode_result)           

                geocoded = True
        # Save the geocoding result to the text file
        with open('YOUR_OUTPUT_FOLDER/geocoding_results.txt', 'a') as file:
            file.write(f"{location['ADDRESS']};-;{geocode_result}\n")
        # Print status every 100 addresses
        if len(results) % 100 == 0:

            logger.info("Completed {} of {} address".format(len(results), len(locations)))

        # Every 100 addresses, save progress to file(in case of a failure so you have something!)
    #     if len(results) % 1000 == 0:

    #         pd.DataFrame(results).to_pickle(os.path.join(output_folder, ))

# All done
logger.info("Finished geocoding all addresses")