In [3]:
import pandas as pd
df = pd.read_csv('../output/people.csv')

First, we will try to piece together addresses.
We can create the address by concatenating 'House Number' and 'Street'.

In [4]:
has_addr = df[pd.notna(df['Street:'])]
has_addr = has_addr[pd.notna(has_addr['House Number:'])]
has_addr['House Number:'] = has_addr['House Number:'].astype(int)
has_addr['House Number:'] = has_addr['House Number:'].astype(str)
addr = has_addr['House Number:'].str.cat(has_addr['Street:'], sep=' ') + ' Ithaca, NY 14850'
print(addr)

18     119 E Buffalo Street Ithaca, NY 14850
19             110 N Aurora Ithaca, NY 14850
38     119 E Buffalo Street Ithaca, NY 14850
39             110 N Aurora Ithaca, NY 14850
40             110 N Aurora Ithaca, NY 14850
                       ...                  
149         319 Dryden Road Ithaca, NY 14850
150         319 Dryden Road Ithaca, NY 14850
151       301 Bryant Avenue Ithaca, NY 14850
152       301 Bryant Avenue Ithaca, NY 14850
153       301 Bryant Avenue Ithaca, NY 14850
Name: House Number:, Length: 66, dtype: object


Next, we will run the GMaps Geocoding API to get place IDs for each address.

In [5]:
%reload_ext dotenv
%dotenv
import googlemaps
import os

gmaps_api_key = os.environ.get('GMAPS_API_KEY')
gmaps = googlemaps.Client(key=gmaps_api_key)

addrs = list(addr.iteritems())
print(addrs[0])

(18, '119 E Buffalo Street Ithaca, NY 14850')


In [6]:
id, full_addr = addrs[0]
print(full_addr)
geocode_result = gmaps.geocode(full_addr)
print(geocode_result)

119 E Buffalo Street Ithaca, NY 14850
[{'address_components': [{'long_name': '119', 'short_name': '119', 'types': ['street_number']}, {'long_name': 'East Buffalo Street', 'short_name': 'E Buffalo St', 'types': ['route']}, {'long_name': 'Downtown', 'short_name': 'Downtown', 'types': ['neighborhood', 'political']}, {'long_name': 'Ithaca', 'short_name': 'Ithaca', 'types': ['locality', 'political']}, {'long_name': 'Tompkins County', 'short_name': 'Tompkins County', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'New York', 'short_name': 'NY', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'United States', 'short_name': 'US', 'types': ['country', 'political']}, {'long_name': '14850', 'short_name': '14850', 'types': ['postal_code']}, {'long_name': '4287', 'short_name': '4287', 'types': ['postal_code_suffix']}], 'formatted_address': '119 E Buffalo St, Ithaca, NY 14850, USA', 'geometry': {'location': {'lat': 42.4411844, 'lng': -76.4978987}, 'locati

In [32]:
print(geocode_result[0]['geometry']['location'])

{'lat': 42.4411844, 'lng': -76.4978987}


In [34]:
def get_coordinates(addr):
    location = gmaps.geocode(addr)[0]['geometry']['location']
    return '{' + ' lat: {}, lng: {} '.format(location['lat'], location['lng']) + '}'
place_ids = addr.map(get_coordinates)
place_ids_df = pd.DataFrame(place_ids)
place_ids_df.to_csv('../output/place_ids.csv')

In [37]:
# pretty print for json
place_ids_list = list(place_ids)
print('[')
for id in place_ids_list:
    print(" {},".format(id))
print(']')

[
 { lat: 42.4411844, lng: -76.4978987 },
 { lat: 42.4400126, lng: -76.4959011 },
 { lat: 42.4411844, lng: -76.4978987 },
 { lat: 42.4400126, lng: -76.4959011 },
 { lat: 42.4400126, lng: -76.4959011 },
 { lat: 42.4400126, lng: -76.4959011 },
 { lat: 42.4400126, lng: -76.4959011 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4401429, lng: -76.4949048 },
 { lat: 42.4355762, lng: -76.4838034 },
 { lat: 42.4419942, lng: -76.4873186 },
 { lat: 42.4419942, lng: -76.4873186 },
 { lat: 42.4414247, lng: -76.4864379 },
 { lat: 42.439359, lng: -76.4861075 },
 { lat: 42.43996610000001, lng: -76.4826739 },
 { lat: 42.4408551, lng: -76.483

In [36]:
# find unique values
print(len(place_ids_df['House Number:'].unique()))

32
