In [1]:
import pandas as pd
import requests
import logging
import time
#request cache

In [2]:
key = '<GOOGLE_API_KEY>'

In [14]:
logger = logging.getLogger("root")
logger.setLevel(logging.DEBUG)
# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)

In [16]:
API_KEY = '<GOOGLE_API_KEY>'
BACKOFF_TIME = 1
output_filename = 'output/output.csv'
input_filename = 'output/addresses.csv'
address_column_name = 'address'
RETURN_FULL_RESULTS = True

In [17]:
data = pd.read_csv(input_filename, encoding='utf8')

if address_column_name not in data.columns:
    raise ValueError("Missing Address column in input data")

addresses = data[address_column_name].tolist()
addresses = (data[address_column_name]).tolist()

In [18]:
def get_google_results(address, api_key='AIzaSyC72T3bzuR5O9vVB-91FecXNr5nKa8rfdY', return_full_response=True):
    geocode_url = "https://maps.googleapis.com/maps/api/geocode/json?address={}".format(address)
    if api_key is not None:
        geocode_url = geocode_url + '&key={}'.format(api_key)
        
    results = requests.get(geocode_url)
    results = results.json()
    
    if len(results['results']) == 0:
        output = {
            "formatted_address" : None,
            "latitude": None,
            "longitude": None,
            "accuracy": None,
            "google_place_id": None,
            "type": None,
            "postcode": None
        }
    else:    
        answer = results['results'][0]
        output = {
            "formatted_address" : answer.get('formatted_address'),
            "latitude": answer.get('geometry').get('location').get('lat'),
            "longitude": answer.get('geometry').get('location').get('lng'),
            "accuracy": answer.get('geometry').get('location_type'),
            "google_place_id": answer.get("place_id"),
            "type": ",".join(answer.get('types')),
            "postcode": ",".join([x['long_name'] for x in answer.get('address_components') 
                                  if 'postal_code' in x.get('types')])
        }
         
    output['input_string'] = address
    output['number_of_results'] = len(results['results'])
    output['status'] = results.get('status')
    if return_full_response is True:
        output['response'] = results
    
    return output

results = []
for address in addresses:
    geocoded = False
    while geocoded is not True:
        try:
            geocode_result = get_google_results(address, API_KEY, return_full_response=RETURN_FULL_RESULTS)
        except Exception as e:
            logger.exception(e)
            logger.error("Major error with {}".format(address))
            logger.error("Skipping!")
            geocoded = True
            
        if geocode_result['status'] == 'OVER_QUERY_LIMIT':
            logger.info("Hit Query Limit! Backing off for a bit.")
            time.sleep(BACKOFF_TIME * 60) # sleep for 30 minutes
            geocoded = False
        else:
            if geocode_result['status'] != 'OK':
                logger.warning("Error geocoding {}: {}".format(address, geocode_result['status']))
            logger.debug("Geocoded: {}: {}".format(address, geocode_result['status']))
            results.append(geocode_result)           
            geocoded = True

    if len(results) % 500 == 0:
        logger.info("Completed {} of {} address".format(len(results), len(addresses)))
            
    if len(results) % 500 == 0:
        pd.DataFrame(results).to_csv("{}_bak".format(output_filename))

logger.info("Finished geocoding all addresses")
pd.DataFrame(results).to_csv(output_filename, encoding='utf8')

Geocoded: 2450 Ashby Avenue, Berkeley, CA 94705: OK
Geocoded: 2450 Ashby Avenue, Berkeley, CA 94705: OK
Geocoded: 2001 Dwight Way, Berkeley, CA 94704: OK
Geocoded: 2001 Dwight Way, Berkeley, CA 94704: OK
Geocoded: 2920 Telegraph Ave, Berkeley CA 94705: OK
Geocoded: 2920 Telegraph Ave, Berkeley CA 94705: OK
Geocoded: 411 Grand Ave, Oakland, CA 94610: OK
Geocoded: 411 Grand Ave, Oakland, CA 94610: OK
Geocoded: 3918 Fallon Road, Dublin, CA 94568: OK
Geocoded: 3918 Fallon Road, Dublin, CA 94568: OK
Geocoded: 20103 Lake Chabot Road, Castro Valley, CA 94546: OK
Geocoded: 20103 Lake Chabot Road, Castro Valley, CA 94546: OK
Geocoded: 39199 Liberty Street, Building B, Fremont, CA 94538: OK
Geocoded: 39199 Liberty Street, Building B, Fremont, CA 94538: OK
Geocoded: 28270 Huntwood Ave, Hayward, CA 94544: OK
Geocoded: 28270 Huntwood Ave, Hayward, CA 94544: OK
Geocoded: 3100 San Pablo Ave, Berkeley, CA 94702: OK
Geocoded: 3100 San Pablo Ave, Berkeley, CA 94702: OK
Geocoded: 5860 Owens Dr, Pleasanto

Geocoded: 13300 Van Nuys Boulevard, Pacoima, CA 91331: OK
Geocoded: 13300 Van Nuys Boulevard, Pacoima, CA 91331: OK
Geocoded: 11500 Brookshire Ave, Downey, CA 90241: OK
Geocoded: 11500 Brookshire Ave, Downey, CA 90241: OK
Geocoded: 1798 N. Garey Avenue, Pomona, CA 91767: OK
Geocoded: 1798 N. Garey Avenue, Pomona, CA 91767: OK
Geocoded: 15031 Rildi St, Mission Hills, CA 91345: OK
Geocoded: 15031 Rildi St, Mission Hills, CA 91345: OK
Geocoded: 1300 W 7th St, San Pedro, CA 90732: OK
Geocoded: 1300 W 7th St, San Pedro, CA 90732: OK
Geocoded: 4101 Torrance Blvd, Torrance, CA 90503: OK
Geocoded: 4101 Torrance Blvd, Torrance, CA 90503: OK
Geocoded: 2121 Santa Monica Blvd, Santa Monica, CA 90404: OK
Geocoded: 2121 Santa Monica Blvd, Santa Monica, CA 90404: OK
Geocoded: 18321 Clark St, Tarzana, CA 91356: OK
Geocoded: 18321 Clark St, Tarzana, CA 91356: OK
Geocoded: 7 West Foothill Blvd, Arcadia, CA 91006: OK
Geocoded: 7 West Foothill Blvd, Arcadia, CA 91006: OK
Geocoded: 16111 Plummer Street, No

Geocoded: 5342 Dudley Blvd., McClellan, CA 95652: OK
Geocoded: 10535 Hospital Way, Mather, CA 95655: OK
Geocoded: 10535 Hospital Way, Mather, CA 95655: OK
Geocoded: 1600 Exposition Blvd, Sacramento, CA 95815: OK
Geocoded: 1600 Exposition Blvd, Sacramento, CA 95815: OK
Geocoded: 820 E Mountain View St, Barstow, CA 92311: OK
Geocoded: 820 E Mountain View St, Barstow, CA 92311: OK
Geocoded: 8599 Haven Ave., Suite 102, Rancho Cucamonga, CA 91730: OK
Geocoded: 8599 Haven Ave., Suite 102, Rancho Cucamonga, CA 91730: OK
Geocoded: 999 San Bernardino Rd, Upland, CA 91786: OK
Geocoded: 999 San Bernardino Rd, Upland, CA 91786: OK
Geocoded: 26001 Redlands Blvd., Redlands, CA 92373: OK
Geocoded: 26001 Redlands Blvd., Redlands, CA 92373: OK
Geocoded: 11201 Benton Street, Loma Linda, CA 92357: OK
Geocoded: 11201 Benton Street, Loma Linda, CA 92357: OK
Geocoded: 12138 Industrial Boulevard, Victorville,, CA 92395: OK
Geocoded: 12138 Industrial Boulevard, Victorville,, CA 92395: OK
Geocoded: 865 3rd Ave

Geocoded: 4151 Foothill Rd, Santa Barbara, CA 93110: OK
Geocoded: 4440 Calle Real, Santa Barbara, CA 93110: OK
Geocoded: 4440 Calle Real, Santa Barbara, CA 93110: OK
Geocoded: 1550 East Main Street, Santa Maria, CA 93454: OK
Geocoded: 1550 East Main Street, Santa Maria, CA 93454: OK
Geocoded: 795 Willow Road, Menlo Park, CA 94025: OK
Geocoded: 795 Willow Road, Menlo Park, CA 94025: OK
Error geocoding 4735 Hamilton Ave #80, San Jose, CA 95130: REQUEST_DENIED
Error geocoding 4735 Hamilton Ave #80, San Jose, CA 95130: REQUEST_DENIED
Geocoded: 4735 Hamilton Ave #80, San Jose, CA 95130: REQUEST_DENIED
Geocoded: 4735 Hamilton Ave #80, San Jose, CA 95130: REQUEST_DENIED
Geocoded: 4150 N 1st St, San Jose, CA 95134: OK
Geocoded: 4150 N 1st St, San Jose, CA 95134: OK
Geocoded: 1150 West El Camino Real, Mountain View, CA 94040: OK
Geocoded: 1150 West El Camino Real, Mountain View, CA 94040: OK
Geocoded: 2855 Stevens Creek Blvd, Santa Clara, CA 95050: OK
Geocoded: 2855 Stevens Creek Blvd, Santa Cl