# Package setup

In [39]:
import pandas as pd
import geopandas as gpd
import numpy as np
import requests
import json
from requests.utils import requote_uri
import asyncio
from unsync import unsync

# Define function to lookup each address via CAMS

In [2]:
def search_address_cams(address):
  carto_url = 'https://us-central1-cartodb-gcp-solutions-eng-team.cloudfunctions.net/getAddress'
  payload = {'address': address}
  headers = {'content-type':'application/json'}
  res = requests.post(carto_url, data=json.dumps(payload), headers=headers)
  data = json.loads(res.text)['rows'][0]
  return (address, data['address'], data['lat'], data['lon'], data['dist'])

# Define function to lookup each address via CARTO (HERE)

In [48]:
def search_address_carto(address, city, state='CA', country='United States'):
  api_key = '551d05a437c7f6f81640f7fc87d9a2c62afc9f08'
  query = f"select ST_AsText(cdb_geocode_street_point) as geom from cdb_geocode_street_point('{address}', '{city}', '{state}', '{country}')"
  query_encode = requote_uri(query)
  full_url = f"https://tnederlof.carto.com/api/v2/sql?q={query_encode}&api_key={api_key}"
  result = json.loads(requests.get(full_url).text)['rows'][0]['geom']
  geo_result = gpd.GeoSeries.from_wkt([result])
  return (address, geo_result.y[0], geo_result.x[0])

 

# Lookup test 3 addresses

### Using CAMS search
Note that a low distance is better in this case (better text match)

In [44]:
test_addresses = ['10020 Loftus Dr Apt G, El Monte, CA, 91731', '10047 1/4 Samoa Ave, Tujunga, CA, 91042', '1008 S Kern Ave, Los Angeles, CA, 90022	']
df_result_cams = pd.DataFrame([search_address_cams(address) for address in test_addresses],
                         columns=['input_address', 'found_address', 'lat', 'lon', 'dist'])
df_result_cams.head()

Unnamed: 0,input_address,found_address,lat,lon,dist
0,"10020 Loftus Dr Apt G, El Monte, CA, 91731","10020 Loftus Drive, El Monte, CA 91731",34.074073,-118.052863,0.25
1,"10047 1/4 Samoa Ave, Tujunga, CA, 91042","10047 Samoa Avenue, Tujunga, CA 91042",34.251551,-118.287433,0.2
2,"1008 S Kern Ave, Los Angeles, CA, 90022\t","1008 South Kern Avenue, Los Angeles, CA 90022",34.020141,-118.166141,0.244444


### Using CARTO (HERE)

In [49]:
test_addresses = ['10020 Loftus Dr Apt G, El Monte, CA, 91731', '10047 1/4 Samoa Ave, Tujunga, CA, 91042', '1008 S Kern Ave, Los Angeles, CA, 90022	']
test_cities = ['El Monte', 'Tujunga', 'Los Angeles']
df_result_here = pd.DataFrame([search_address_carto(address, city) for address, city in zip(test_addresses, test_cities)],
                         columns=['input_address', 'lat', 'lon'])
df_result_here.head()

Unnamed: 0,input_address,lat,lon
0,"10020 Loftus Dr Apt G, El Monte, CA, 91731",34.07409,-118.05303
1,"10047 1/4 Samoa Ave, Tujunga, CA, 91042",34.26007,-118.28748
2,"1008 S Kern Ave, Los Angeles, CA, 90022\t",34.02009,-118.16604


# Batch Geocoding

### CAMS

In [52]:
@unsync
def search_address_cams_bulk(address):
  carto_url = 'https://us-central1-cartodb-gcp-solutions-eng-team.cloudfunctions.net/getAddress'
  payload = {'address': address}
  headers = {'content-type':'application/json'}
  res = requests.post(carto_url, data=json.dumps(payload), headers=headers)
  data = json.loads(res.text)['rows'][0]
  return (address, data['address'], data['lat'], data['lon'], data['dist'])

In [53]:
tasks = [search_address_cams_bulk(address) for address in test_addresses]
df_result_cams_bulk = pd.DataFrame([task.result() for task in tasks],
                         columns=['input_address', 'found_address', 'lat', 'lon', 'dist'])
df_result_cams_bulk.head(5)

Unnamed: 0,input_address,found_address,lat,lon,dist
0,"10020 Loftus Dr Apt G, El Monte, CA, 91731","10020 Loftus Drive, El Monte, CA 91731",34.074073,-118.052863,0.25
1,"10047 1/4 Samoa Ave, Tujunga, CA, 91042","10047 Samoa Avenue, Tujunga, CA 91042",34.251551,-118.287433,0.2
2,"1008 S Kern Ave, Los Angeles, CA, 90022\t","1008 South Kern Avenue, Los Angeles, CA 90022",34.020141,-118.166141,0.244444


### CARTO (HERE)

In [54]:
@unsync
def search_address_carto_bulk(address, city, state='CA', country='United States'):
  api_key = '551d05a437c7f6f81640f7fc87d9a2c62afc9f08'
  query = f"select ST_AsText(cdb_geocode_street_point) as geom from cdb_geocode_street_point('{address}', '{city}', '{state}', '{country}')"
  query_encode = requote_uri(query)
  full_url = f"https://tnederlof.carto.com/api/v2/sql?q={query_encode}&api_key={api_key}"
  result = json.loads(requests.get(full_url).text)['rows'][0]['geom']
  geo_result = gpd.GeoSeries.from_wkt([result])
  return (address, geo_result.y[0], geo_result.x[0])
 

In [57]:
test_addresses = ['10020 Loftus Dr Apt G, El Monte, CA, 91731', '10047 1/4 Samoa Ave, Tujunga, CA, 91042', '1008 S Kern Ave, Los Angeles, CA, 90022	']
test_cities = ['El Monte', 'Tujunga', 'Los Angeles']
carto_tasks = [search_address_carto_bulk(address, city) for address, city in zip(test_addresses, test_cities)]
df_result_here_bulk = pd.DataFrame([task.result() for task in carto_tasks],
                         columns=['input_address', 'lat', 'lon'])
df_result_here_bulk.head()

Unnamed: 0,input_address,lat,lon
0,"10020 Loftus Dr Apt G, El Monte, CA, 91731",34.07409,-118.05303
1,"10047 1/4 Samoa Ave, Tujunga, CA, 91042",34.26007,-118.28748
2,"1008 S Kern Ave, Los Angeles, CA, 90022\t",34.02009,-118.16604
