# Get Latitude and Longitude for a given address

### 1. Install docker (https://hub.docker.com/editions/community/docker-ce-desktop-mac )

In [1]:
import pandas as pd

bartstations = pd.read_csv('bartstations.csv')

In [2]:
bartstations.head()

Unnamed: 0,address,station_name
0,"1245 Broadway, Oakland, CA 94612",12th St. Oakland City Center
1,"2000 Mission Street, San Francisco, CA 94110",16th St. Mission (SF)
2,"1900 Broadway, Oakland, CA 94612",19th St. Oakland
3,"2800 Mission Street, San Francisco, CA 94110",24th St. Mission (SF)
4,"3100 Adeline Street, Berkeley, CA 94703",Ashby (Berkeley)


### 2. You can process the file which require geocoding. Docker image will be downloaded for the first time and you can continue using it.

References https://hub.docker.com/r/degauss/dist_to_major_roadway/

In [3]:
%%bash

docker run --rm=TRUE -v "$PWD":/tmp degauss/geocoder bartstations.csv address

Process is interrupted.


In [80]:
df = pd.read_csv('bartstations_geocoded.csv')

In [82]:
df[['address','lat','lon']].head()

Unnamed: 0,address,lat,lon
0,"10 Union Square, Union City, CA 94587",37.586275,-122.01732
1,"11 Camino Pablo, Orinda, CA 94563",37.877681,-122.181858
2,"1150 Market Street, San Francisco, CA 94102",37.779522,-122.413811
3,"1151 Huntington Avenue, San Bruno, CA 94066",37.63856,-122.416501
4,"1245 Broadway, Oakland, CA 94612",37.803316,-122.272008


# Another method to perform the same geocoding.

#### Using DataScienceToolKit to analyze latitude and longitude in python.

#### Setup instructions are for Mac, may work for other OS too.

####  1.	Download and install Oracle VirtualBox (https://www.virtualbox.org/)

####  2.	Download and install Vagrant (http://www.vagrantup.com/)

####  3.	Download  dstk_0.51.box(https://goo.gl/XPiaza)  VM its 22G file be patient If you encounter issues refer to link (https://github.com/petewarden/dstk/issues/57)

#### 4.	Setup python 2.7 virtual environment. (https://uoa-eresearch.github.io/eresearch-cookbook/recipe/2014/11/20/conda/)

#### 5. Bring up the VM and start using downloaded file. Instructions are at this page(http://www.datasciencetoolkit.org/developerdocs#vagrant)

    vagrant box add dstk dstk_0.51.box  
    vagrant init
    vagrant up
    Access the application from http://localhost:8080 or vagrant ssh

#### 6.	Download and Install python library dstk
    python setup.py build 
    python setup.py install

#### 7.	Start accessing the dstk from the python.

References
https://github.com/dlab-geo/geocoding-workshop-sp2015/blob/master/intro-geocoding.ipynb

# Warning: 
## After installation I had noticied the VirtualBox was trying to access photos & contacts, Disabled the VM, not sure if there are any security issues.

In [4]:
import dstk
dstk = dstk.DSTK()
dstk.street2coordinates('2543 Graystone Place, Simi Valley, CA 93065')
import json
import requests
import pandas as pd
from pprint import pprint

In [2]:
def single_address(address, api='dstk'):
    # API check
    assert api in ('dstk','google')
    
    # Type check
    assert type(address) == str
    
    # /street2coordinates
    dstk_dstk = 'http://localhost:8080/street2coordinates/'
    
    # Google-style
    dstk_google = 'http://localhost:8080/maps/api/geocode/json?sensor=false&address='
    
    # API
    if api == 'dstk':
        url_prefix = dstk_dstk
    elif api == 'google':
        url_prefix = dstk_google
    
    # URL
    url = url_prefix + address.replace(' ', '+')
    
    # Response
    response = requests.get(url)
    return json.loads(response.text)

In [5]:
google_hq = single_address('1600 Amphitheatre Pkwy, Mountain View, CA')
pprint(google_hq)

{u'1600 Amphitheatre Pkwy, Mountain View, CA': {u'confidence': 0.902,
                                                u'country_code': u'US',
                                                u'country_code3': u'USA',
                                                u'country_name': u'United States',
                                                u'fips_county': u'06085',
                                                u'latitude': 37.423471,
                                                u'locality': u'Mountain View',
                                                u'longitude': -122.086546,
                                                u'region': u'CA',
                                                u'street_address': u'1600 Amphitheatre Pkwy',
                                                u'street_name': u'Amphitheatre Pkwy',
                                                u'street_number': u'1600'}}


In [6]:
google = single_address('1600 Amphitheatre Pkwy, Mountain View, CA', 'google')
pprint(google)

{u'results': [{u'address_components': [{u'long_name': u'1600',
                                        u'short_name': u'1600',
                                        u'types': [u'street_number']},
                                       {u'long_name': u'Amphitheatre Pkwy',
                                        u'short_name': u'Amphitheatre Pkwy',
                                        u'types': [u'route']},
                                       {u'long_name': u'Mountain View',
                                        u'short_name': u'Mountain View',
                                        u'types': [u'locality',
                                                   u'political']},
                                       {u'long_name': u'CA',
                                        u'short_name': u'CA',
                                        u'types': [u'administrative_area_level_1',
                                                   u'political']},
                                     

In [7]:
%%bash

curl -d "1600 Amphitheatre Pkwy, Mountain View, CA" \
     http://localhost:8080/street2coordinates

{
  "1600 Amphitheatre Pkwy, Mountain View, CA": {
    "country_code3": "USA",
    "latitude": 37.423471,
    "country_name": "United States",
    "longitude": -122.086546,
    "street_address": "1600 Amphitheatre Pkwy",
    "region": "CA",
    "confidence": 0.902,
    "street_number": "1600",
    "locality": "Mountain View",
    "street_name": "Amphitheatre Pkwy",
    "fips_county": "06085",
    "country_code": "US"
  }
}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100   467  100   426  100    41  42600   4100 --:--:-- --:--:-- --:--:-- 51888


In [11]:
%%bash
curl -o coordinates.json -d "1245 Broadway, Oakland, CA 94612" \
http://localhost:8080/street2coordinates

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100   423  100   391  100    32   6109    500 --:--:-- --:--:-- --:--:--  6609


In [17]:
%%bash
cat coordinates.json

{
  "200 Ygnacio Valley Road, Walnut Creek, CA 94596": {
    "country_code3": "USA",
    "latitude": 37.904526,
    "country_name": "United States",
    "longitude": -122.068036,
    "street_address": "200 Ygnacio Valley Rd",
    "region": "CA",
    "confidence": 0.902,
    "street_number": "200",
    "locality": "Walnut Creek",
    "street_name": "Ygnacio Valley Rd",
    "fips_county": "06013",
    "country_code": "US"
  },
  "1451 7th Street, Oakland, CA 94607": {
    "country_code3": "USA",
    "latitude": 37.805352,
    "country_name": "United States",
    "longitude": -122.294959,
    "street_address": "1451 7th St",
    "region": "CA",
    "confidence": 0.902,
    "street_number": "1451",
    "locality": "Oakland",
    "street_name": "7th St",
    "fips_county": "06001",
    "country_code": "US"
  },
  "1900 Broadway, Oakland, CA 94612": {
    "country_code3": "USA",
    "latitude": 37.807795,
    "country_name": "United States",
    "longitude": -122.269113,
    "street_address"

In [59]:
%%bash
curl -o coordinates.json -d @bartaddresses.txt \
http://localhost:8080/street2coordinates


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0 37 20151   31  5642  100  1940   8522   2930  0:00:02 --:--:--  0:00:02 11435100 20151  100 18211  100  1940  27384   2917 --:--:-- --:--:-- --:--:-- 30256


In [60]:
json_data = pd.read_json('coordinates.json').T

In [61]:
json_data = json_data.reset_index()
json_data = json_data.rename(columns = {'index':'address'})

In [62]:
json_data['address'] = json_data['address'].str.lower()


In [83]:
json_data.head()

Unnamed: 0,address,confidence,country_code,country_code3,country_name,fips_county,latitude,locality,longitude,region,street_address,street_name,street_number
0,"10 union square, union city, ca 94587",1.0,US,USA,United States,6001,37.5863,Union City,-122.017,CA,10 Union Sq,Union Sq,10
1,"11 camino pablo, orinda, ca 94563",0.878,US,USA,United States,6013,37.8777,Orinda,-122.182,CA,11 Cam Pablo,Cam Pablo,11
2,"1150 market street, san francisco, ca 94102",1.0,US,USA,United States,6075,37.7791,San Francisco,-122.414,CA,1150 Market St,Market St,1150
3,"1151 huntington avenue, san bruno, ca 94066",1.0,US,USA,United States,6081,37.6386,San Bruno,-122.417,CA,1151 Huntington Ave,Huntington Ave,1151
4,"1245 broadway, oakland, ca 94612",1.0,US,USA,United States,6001,37.8033,Oakland,-122.272,CA,1245 Broadway,Broadway,1245


In [64]:
stations = pd.read_csv('bartstations.csv')

In [65]:
stations['address'] = stations['address'].str.lower()

In [66]:
bart = json_data.merge(stations, on = 'address', how='inner')

In [84]:
bart[['address','latitude','longitude']].head()

Unnamed: 0,address,latitude,longitude
0,"10 union square, union city, ca 94587",37.5863,-122.017
1,"11 camino pablo, orinda, ca 94563",37.8777,-122.182
2,"1150 market street, san francisco, ca 94102",37.7791,-122.414
3,"1151 huntington avenue, san bruno, ca 94066",37.6386,-122.417
4,"1245 broadway, oakland, ca 94612",37.8033,-122.272


In [85]:
import numpy as np

omaha_point = np.array((-95.995102, 41.257160))

In [87]:
import pandas as pd
from sklearn.neighbors import KDTree

In [88]:
url = 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/2016_Gazetteer/2016_Gaz_zcta_national.zip'
df_locations = pd.read_csv(url, dtype={'GEOID' : 'str'},sep='\t', usecols=[0,5,6])
df_locations.columns = df_locations.columns.str.strip()  #some column cleanup
print (len(df_locations))
df_locations.head()

33144


Unnamed: 0,GEOID,INTPTLAT,INTPTLONG
0,601,18.180555,-66.749961
1,602,18.361945,-67.175597
2,603,18.455183,-67.119887
3,606,18.158345,-66.932911
4,610,18.295366,-67.125135


In [89]:
kdt = KDTree(df_locations[['INTPTLONG', 'INTPTLAT']])

In [90]:
omaha_point_kdt = np.expand_dims(omaha_point, axis=0)

nearest_point_index = kdt.query(omaha_point_kdt, k=1, return_distance=False)
print(df_locations.loc[nearest_point_index[0], 'GEOID'])

23609    68132
Name: GEOID, dtype: object


In [94]:
import geopandas as gpd
from shapely.geometry import Point

ImportError: No module named geopandas

In [95]:
%%bash
pip install geopandas shapely

Collecting geopandas
  Using cached https://files.pythonhosted.org/packages/24/11/d77c157c16909bd77557d00798b05a5b6615ed60acb5900fbe6a65d35e93/geopandas-0.4.0-py2.py3-none-any.whl
Collecting shapely
  Using cached https://files.pythonhosted.org/packages/73/44/feb112a6fe682bfbdbe707d470fb46a958bb3923c2d4ea74acbbd852936d/Shapely-1.6.4.post2-cp27-cp27m-macosx_10_9_x86_64.whl
Collecting fiona (from geopandas)
  Using cached https://files.pythonhosted.org/packages/af/51/72e16c1506c8f71cf57db355b17e8eeca0ea1455a0aca09d11cf3d40b2af/Fiona-1.8.4-cp27-cp27m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl
Collecting pyproj (from geopandas)
  Using cached https://files.pythonhosted.org/packages/29/72/5c1888c4948a0c7b736d10e0f0f69966e7c0874a660222ed0a2c2c6daa9f/pyproj-1.9.5.1.tar.gz
    Complete output from command python setup.py egg_info:
    xcrun: error: active developer path ("/Applications/Xcode.app/Contents/Developer") does not exist
    Use 

Command "python setup.py egg_info" failed with error code 1 in /private/var/folders/nn/chv0mpnn4x95mr6fj3y6h5y40000gp/T/pip-install-moz0Hn/pyproj/
