In [1]:
"""
A filesystem cache of previous queries to Geocod.io, so that we don't waste too many queries
(I only get 2500 free queries per day).
Assumes that the project root (or wherever you started your python shell/Jupyter notebook from) has
a file "geocodio_api_key.json" with the contents:
    {
        "key": "API_KEY_HERE"
    }
"""
import json
import os
from pathlib import Path
from typing import Dict, Iterable, List
from geocodio import GeocodioClient
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# A cache of format "address -> Geocode response dict" that we will keep committed
# in the repo.
CACHE_PATH = Path('./data/geocode_cache.json')

def get_client():
    return GeocodioClient(json.loads(Path('./geocodio_api_key.json').read_text())['key'])

def load_cache() -> Dict[str, dict]:
    if not CACHE_PATH.exists():
        return {}
    with CACHE_PATH.open() as f:
        return json.load(f)

def overwrite_cache(cache: Dict[str, dict]) -> None:
    with CACHE_PATH.open('w') as f:
        json.dump(cache, f)

def lookup(addresses: Iterable[str]) -> List[dict]:
    """
    Please don't run this function in parallel, because the cache isn't thread-safe.
    Since it takes an Iterable, it's easy to use this with a Pandas series:
        df['geocode_results'] = geocode_cache.lookup(df['address'])
    """
    cache = load_cache()
    addresses_to_lookup = list(set(addresses) - set(cache.keys()))

    if len(addresses_to_lookup):
        api_results = get_client().geocode(addresses_to_lookup)
        for address, response in zip(addresses_to_lookup, api_results):
            cache[address] = dict(response)

        overwrite_cache(cache)

    return [cache[address] for address in addresses]



In [2]:
results = lookup(['137  Fair Oaks St, Mountain View, CA'])
points = [Point(r['results'][0]['location']['lat'], r['results'][0]['location']['lng']) for r in results]

In [3]:
newer_permits = pd.read_csv('./data/APRs.csv')

In [4]:
newer_permits['Address'] = newer_permits['Address'].str.title() + ', Mountain View, CA'

In [5]:
newer_permits['Address']

0             137  Fair Oaks St, Mountain View, CA
1           285 Carmelita Drive, Mountain View, CA
2                423  Loreto St, Mountain View, CA
3             125  Fair Oaks St, Mountain View, CA
4      545 Mountain View Avenue, Mountain View, CA
                          ...                     
615             333  Apricot Ln, Mountain View, CA
616         257 Calderon Avenue, Mountain View, CA
617     315 Sierra Vista Avenue, Mountain View, CA
618             351  Martens Av, Mountain View, CA
619           135  Fair Oaks St, Mountain View, CA
Name: Address, Length: 620, dtype: object

In [6]:
all_results = lookup(newer_permits['Address'])

In [17]:
def geocode_results_to_geoseries(georesults, df):
    return gpd.GeoSeries([geocode_result_to_point(result) for result in georesults], index=df.index)

In [18]:
def geocode_result_to_point(georesult):
    if not georesult.get('results'):
        return np.nan
    loc = georesult['results'][0]['location']
    return Point(loc['lng'], loc['lat'])

In [19]:
newer_permits = gpd.GeoDataFrame(newer_permits, 
                                 geometry=geocode_results_to_geoseries(all_results, newer_permits), 
                                 crs='EPSG:4326')

In [20]:
newer_permits.to_file("./data/newer_permits.json", driver="GeoJSON")