# Collecting Reverse Geocoded Addresses
    ** Written by Stephn Scherrer 5 April 2020 **
    ** Last modified 6 April 2020 **

This notebook is used to collect intermediate data consisting of a dataframe of evenly gridded lon/lat coordinates and the nearest corrosponding address. This data will be used in an additional notebook to query google's transit API and get transit time information for each address.

In [1]:
## Workspace setup
import os
import time
import numpy as np
import pandas as pd
import geopandas as gpd
import geopy
from geopy.geocoders import Nominatim

"""
from geopy.extra.rate_limiter import RateLimiter
import matplotlib.pyplot as plt
# import plotly_express as px
import tqdm
from tqdm._tqdm_notebook import tqdm_notebook
"""

'\nfrom geopy.extra.rate_limiter import RateLimiter\nimport matplotlib.pyplot as plt\n# import plotly_express as px\nimport tqdm\nfrom tqdm._tqdm_notebook import tqdm_notebook\n'

In [2]:
#### Workspace Setup
## Establish Directory Heirarchy
projDir = os.path.dirname(os.getcwd())
dataDir = os.path.join(projDir, 'Data')

In [3]:
#### Defining Utility Functions
def composeAddress(location):
    """
    A function to create a string address from location information collected using reverse geocoding
    inputs: 
    """
    components = ['road', 'city', 'state', 'postcode', 'country_code']
    address = ''
    for component in components:
        try: 
            address = address + (location.raw['address'][component])
            if component != components[-1]:
                address = address + ', '
        except: 
            print (component)
    return address

In [13]:
#### Defining global parameters
### Destination Address
destCoord = [37.7911, -122.3961] # Lat, Lon in Decimal Degrees

### Maximum Walking Time 
## Will be used to define resolution of geocoded points
maxWalk = 1.5 # minutes

### Miles from destination
## Used to determine which locations to get addresses
minRad = 7.0 # Miles

In [14]:
### Calculating grid resolution
## Given the maximum walking time, determine grid resolution. 
## Assume a walking speed of 3 mph
walking_speed = 3.0 # miles per hour
resDist = (float(maxWalk) / 60.0) * walking_speed * 2 # in miles. Multiply by 2 because you can get to the intermediate point of any two locations in half the distance to traverse between the two
resDeg = resDist / (60 * 1.15)
# print(resDeg)

### Creating Grid of Lat Lon positions
startLat = destCoord[0] - (1.15 / 60) * minRad
endLat = destCoord[0] + (1.15 / 60) * minRad
startLon = destCoord[1] - (1.15 / 60) * minRad
endLon = destCoord[1] + (1.15 / 60) * minRad

### Construct arrays of longitude and latitude points
lats = np.sort(np.append(np.linspace(start = startLat, stop = endLat, num = (minRad / resDist)), destCoord[0]))
lons = np.sort(np.append(np.linspace(start = startLon, stop = endLon, num = (minRad / resDist)), destCoord[1]))

### Get all permutations of lat and lons
latLonDF = pd.DataFrame()
## Looping through all lats and lons and appending to dataframe
counter = 0 # Used to pass an index value for pd.DataFrame
for i in range(len(lats)):
    for j in range(len(lons)):
        latLonDF = pd.concat([latLonDF,  pd.DataFrame({'lat': lats[i], 'lon': lons[j], 'address': None}, index = [counter])])
        counter += 1 # used to pass index value to pd.DataFrame above

### Calculating cost of Query
print ('Running this Analysis will cost $' + str(round((latLonDF.shape[0] + 1) * 0.01, 2)))

Running this Analysis will cost $22.1


In [15]:
## Creating a geocoder
locator = Nominatim(user_agent = "myGeocoder")

## Loop through our latitude and longitudes
for i in range(latLonDF.shape[0]):
    coordinates = str(latLonDF.loc[i, 'lat']) + ', ' + str(latLonDF.loc[i, 'lon'])
    location = locator.reverse(coordinates)
    latLonDF.loc[i, 'address'] = composeAddress(location)
    time.sleep(1)

road
city
postcode
road
city
postcode
road
city
postcode
road
city
road
city
road
city
road
city
city
city
city
city
city
city
city
city
city
city
city
city
city
city
city
city
city
city
city
road
city
road
city
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
city
postcode
road
city
postcode
road
city
postcode
road
city
road
city
road
city
city
city
city
city
city
city
city
city
city
city
city
city
road
city
road
city
city
city
city
city
city
city
city
city
city
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
postcode
road
city
postcode
road
city
postcode
road
city
postcode
r

GeocoderTimedOut: Service timed out

In [16]:
### Save dataframe as intermediate Data
latLonDF.to_csv(os.path.join(dataDir, 'interim/lat_lon_address_grid.csv'))