# Geocoding raw address data for the Who Owns Hoboken? app

This script utilizes the Nominatim API and geopy to append latitude and longitude to the raw data compiled for the Who Owns Hoboken? app.

In [51]:
## Import libraries
import pandas as pd
from geopy import Nominatim
from geopy.extra.rate_limiter import RateLimiter
pd.options.mode.chained_assignment = None

In [58]:
## Read data
data = pd.read_csv('./raw_data.csv')
data.head()

Unnamed: 0,Block,Lot,Qual,Class,Property Location,Building name,LLC/Owner,LLC or private individual,Total Units,Company,...,Building Class,Prior Block,Prior Lot,Prior Qual,Updated,Additional Lots,Rent Control,Building Desc,units2,Sale Date
0,75.0,1.0,C004D,2,601 MONROE ST,,"OCONNOR, JAMES P JR",Individual,2.0,James O'Connor,...,,,,,,,YES,2BED,1.0,0000-00-00
1,219.0,8.0,C0002,2,815 WASHINGTON ST,,"OCONNOR, JAMES & BERNADETTE",Individual,2.0,James O'Connor,...,,,,,,,YES,2BR,1.0,8/8/11
2,207.0,25.0,,2,808 WASHINGTON ST,,808 WASHINGTON ST LLC,LLC,4.0,,...,49.0,,,,12/27/19,,YES,4B-4U-H-BA,4.0,8/16/13
3,180.0,6.0,,2,511 PARK AVE,,"RAD, MOHAMMAD TRUSTEE OF JOODI",Trust,2.0,Rad,...,49.0,,,,12/27/19,,YES,3B-2U-FX-H,2.0,5/12/14
4,88.0,1.0,,4C,800 MADISON/801 MONROE ST,Avalon,DSF IV HOBOKEN OWNER LLC % AVALON B,LLC,220.0,AvalonBay,...,,,,,,,YES,6B-220U-4C-G,220.0,


In [59]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2773 entries, 0 to 2772
Data columns (total 24 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Block                      2773 non-null   float64
 1   Lot                        2773 non-null   float64
 2   Qual                       1067 non-null   object 
 3   Class                      2773 non-null   object 
 4   Property Location          2773 non-null   object 
 5   Building name              60 non-null     object 
 6   LLC/Owner                  2773 non-null   object 
 7   LLC or private individual  2773 non-null   object 
 8   Total Units                2772 non-null   float64
 9   Company                    1012 non-null   object 
 10  Owner's Mailing Address    2773 non-null   object 
 11  City/State/Zip             2773 non-null   object 
 12  Sq. Ft.                    2411 non-null   float64
 13  Yr. Built                  2052 non-null   float

In [60]:
## Create column to be used for address lookup in Nominatim API
data['search_address'] = data['Property Location'] + ', Hoboken, NJ, 07030'
data.head()

Unnamed: 0,Block,Lot,Qual,Class,Property Location,Building name,LLC/Owner,LLC or private individual,Total Units,Company,...,Prior Block,Prior Lot,Prior Qual,Updated,Additional Lots,Rent Control,Building Desc,units2,Sale Date,search_address
0,75.0,1.0,C004D,2,601 MONROE ST,,"OCONNOR, JAMES P JR",Individual,2.0,James O'Connor,...,,,,,,YES,2BED,1.0,0000-00-00,"601 MONROE ST, Hoboken, NJ, 07030"
1,219.0,8.0,C0002,2,815 WASHINGTON ST,,"OCONNOR, JAMES & BERNADETTE",Individual,2.0,James O'Connor,...,,,,,,YES,2BR,1.0,8/8/11,"815 WASHINGTON ST, Hoboken, NJ, 07030"
2,207.0,25.0,,2,808 WASHINGTON ST,,808 WASHINGTON ST LLC,LLC,4.0,,...,,,,12/27/19,,YES,4B-4U-H-BA,4.0,8/16/13,"808 WASHINGTON ST, Hoboken, NJ, 07030"
3,180.0,6.0,,2,511 PARK AVE,,"RAD, MOHAMMAD TRUSTEE OF JOODI",Trust,2.0,Rad,...,,,,12/27/19,,YES,3B-2U-FX-H,2.0,5/12/14,"511 PARK AVE, Hoboken, NJ, 07030"
4,88.0,1.0,,4C,800 MADISON/801 MONROE ST,Avalon,DSF IV HOBOKEN OWNER LLC % AVALON B,LLC,220.0,AvalonBay,...,,,,,,YES,6B-220U-4C-G,220.0,,"800 MADISON/801 MONROE ST, Hoboken, NJ, 07030"


In [61]:
## Define Nominatim API instance
locator = Nominatim(user_agent='who_owns_hoboken')

In [None]:
## Call API for each row of dataset
## Store output from API call in 'location column'
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
data['location'] = data['search_address'].apply(geocode)

In [52]:
## Extract lat/long information from API output
data['point'] = data['location'].apply(lambda loc: tuple(loc.point) if loc else None)

In [56]:
## Turn lat/long information into new columns in the dataset
data[['latitude', 'longitude', 'altitude']] = pd.DataFrame(data['point'].tolist(), index=data.index)

Next steps:

1. Drop unnecessary columns
2. Output as geojson