# Geocode

Map the standardized dataset.

Import Python tools

In [5]:
import os
import time
import json
import pandas as pd
from googlegeocoder import GoogleGeocoder
from geojson import Feature, Point, FeatureCollection
from googlegeocoder import GeocoderResult, AddressComponent, Geometry, Coordinates, Bounds

Read in standardized data

In [6]:
df = pd.read_csv(
    "output/trees.csv",
    parse_dates=["date"],
    dtype={"ward": str}
)

Pull out unique addresses

In [7]:
address_df = pd.DataFrame(df['address'].unique(), columns=["address"])

In [8]:
address_list = list(address_df.address)

Initialize the Google geocoder

In [9]:
geocoder = GoogleGeocoder(os.getenv("GOOGLE_GEOCODER_API_KEY"))

Load any addresses that have been previously geocoded.

In [10]:
geocoding_cache = json.load(open("output/geocoding.json", "r"))

Filter down to addresses that haven't already been geocoded.

In [11]:
unmapped_list = [a for a in address_list if a not in geocoding_cache and not pd.isnull(a)]

Loop through them, geocode them and add them to the cache.

In [12]:
for address in unmapped_list:
    try:
        search = geocoder.get(f"{address}, Chicago, IL")
    except:
        continue
    geocoding_cache[address] = search
    time.sleep(0.1)

Write out the results

In [13]:
class ComplexEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (GeocoderResult,AddressComponent, Geometry, Coordinates, Bounds)):
            return obj.__dict__
        return json.JSONEncoder.default(self, obj)

In [14]:
json.dump(
    geocoding_cache,
    open("output/geocoding.json", "w"),
    cls=ComplexEncoder,
    indent=2
)

Convert the results to geojson

In [15]:
def to_geojson(address, obj):
    """
    Convert Google Geocoder result to GeoJSON
    """
    geom = obj['geometry']['location']
    return Feature(
        geometry=Point((geom['lng'], geom['lat'])),
        properties={
            'address': address,
        }
    )

In [16]:
geocoding_cache = json.load(open("output/geocoding.json", "r"))

In [17]:
feature_list = []
for address, obj_list in geocoding_cache.items():
    feature_list.append(to_geojson(address, obj_list[0]))

In [18]:
feature_collex = FeatureCollection(feature_list)

Write out the GeoJSON

In [19]:
json.dump(
    feature_collex,
    open("output/geocoding.geojson", "w"),
    indent=2
)