In [1]:
# Import Python dependencies
import pandas as pd
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import GoogleV3

In [25]:
# Read the Total Sales CSV file
df = pd.read_csv("data/Total_Sales.csv")
df.head()

Unnamed: 0,PARCEL,Neighborhood,OWNER 1 FIRST NAME,OWNER 1 LAST NAME,OWNER 2 FIRST NAME,OWNER 2 LAST NAME,SITUS HOUSE NUMBER,SITUS STREET NAME,SITUS STREET ADDRESS,SITUS UNIT NUMBER,...,STORIES NO.,PROPERTY TAX,LMS-1ST MTG AMOUNT,LMS-1ST MTG TYPE,LMS-LENDER,LMS-TITLE COMPANY,ASSESSED TOTAL VALUE,ASSESSED LAND VALUE,ASSESSED IMPROVEMENT VALUE,SCHOOL DISTRICT 1
0,5089002004,Beverly Grove,Astrid,Meghrigian,,,,,,,...,,,,,,,,,,
1,5086013015,Beverly Grove,,6000 Sv Holdings Llc,,,,,,,...,,,,,,,,,,
2,5512016010,Beverly Grove,Scott,Strumwasser,,,5971.0,3Rd St,5971 W 3Rd St,,...,,,,,,,,,,
3,5511028030,Beverly Grove,,Hart Third Street Llc,,,8038.0,3Rd St,8038 W 3Rd St,,...,,,,,,,,,,
4,5511029032,Beverly Grove,,Triple Heights Llc,,,8124.0,3Rd St,8124 W 3Rd St,,...,,,,,,,,,,


In [26]:
# Create dataframe with address columns from total sales
address_df = df.loc[:, ["PARCEL", "Neighborhood", "SITUS HOUSE NUMBER", "SITUS STREET NAME", 
                        "SITUS STREET ADDRESS", "SITUS CITY", "SITUS STATE", "SITUS ZIP CODE"]]

In [27]:
# Drop any remaining NA values to prevent issues with the geo location
address_df = address_df.dropna()

In [28]:
# Change the ZIP code data type from float to int 
address_df["SITUS ZIP CODE"] = address_df["SITUS ZIP CODE"].astype(int)

In [29]:
# Create a new column with the Street Address, City, State and Zip Code
address_df["FULL ADDRESS"] = address_df["SITUS STREET ADDRESS"] + ", " + \
                            address_df["SITUS CITY"] + ", " + \
                            address_df["SITUS STATE"] + " " + \
                            address_df["SITUS ZIP CODE"].astype(str)
address_df.head()

Unnamed: 0,PARCEL,Neighborhood,SITUS HOUSE NUMBER,SITUS STREET NAME,SITUS STREET ADDRESS,SITUS CITY,SITUS STATE,SITUS ZIP CODE,FULL ADDRESS
2,5512016010,Beverly Grove,5971,3Rd St,5971 W 3Rd St,Los Angeles,CA,90036,"5971 W 3Rd St, Los Angeles, CA 90036"
3,5511028030,Beverly Grove,8038,3Rd St,8038 W 3Rd St,Los Angeles,CA,90048,"8038 W 3Rd St, Los Angeles, CA 90048"
4,5511029032,Beverly Grove,8124,3Rd St,8124 W 3Rd St,Los Angeles,CA,90048,"8124 W 3Rd St, Los Angeles, CA 90048"
5,5511031038,Beverly Grove,8304,3Rd St,8304 W 3Rd St,Los Angeles,CA,90048,"8304 W 3Rd St, Los Angeles, CA 90048"
6,5511014019,Beverly Grove,8401,3Rd St,8401 W 3Rd St,Los Angeles,CA,90048,"8401 W 3Rd St, Los Angeles, CA 90048"


In [7]:
# Split the sales by neighborhood to make smaller dataframes for the goelocator
beverly_grove_df = address_df[address_df["Neighborhood"] == "Beverly Grove"]
hollywood_df = address_df[address_df["Neighborhood"] == "Hollywood"]
hollywood_hills_df = address_df[address_df["Neighborhood"] == "Hollywood Hills"]
hollywood_hills_east_df = address_df[address_df["Neighborhood"] == "Hollywood Hills East"]
los_feliz_df = address_df[address_df["Neighborhood"] == "Los Feliz"]
west_hollywood_df = address_df[address_df["Neighborhood"] == "West Hollywood"]

In [30]:
hancock_park_df = address_df[address_df["Neighborhood"] == "Hancock Park"]

In [8]:
g_key = ENTER GOOGLE API KEY HERE TO RE-RUN

In [9]:
# Define the geo locator
locator = GoogleV3(api_key = g_key)

In [10]:
# Add a delay between geocoding calls to prevent service rejecting the geo call
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)

In [11]:
pd.options.mode.chained_assignment = None

In [12]:
# Create the location column applying the goe location
beverly_grove_df["location"] = beverly_grove_df["FULL ADDRESS"].apply(geocode)

In [31]:
hancock_park_df["location"] = hancock_park_df["FULL ADDRESS"].apply(geocode)

In [14]:
hollywood_df["location"] = hollywood_df["FULL ADDRESS"].apply(geocode)

In [16]:
hollywood_hills_df["location"] = hollywood_hills_df["FULL ADDRESS"].apply(geocode)

In [17]:
hollywood_hills_east_df["location"] = hollywood_hills_east_df["FULL ADDRESS"].apply(geocode)

In [18]:
los_feliz_df["location"] = los_feliz_df["FULL ADDRESS"].apply(geocode)

In [19]:
west_hollywood_df["location"] = west_hollywood_df["FULL ADDRESS"].apply(geocode)

In [32]:
frames = [beverly_grove_df, hancock_park_df, hollywood_df, hollywood_hills_df, hollywood_hills_east_df, los_feliz_df, west_hollywood_df]
final_address_df = pd.concat(frames)

In [33]:
# Create longitude, laatitude and altitude from location column (returns tuple)
final_address_df['point'] = final_address_df['location'].apply(lambda loc: tuple(loc.point) if loc else None)

In [34]:
# Split point column into latitude, longitude and altitude columns
final_address_df[['latitude', 'longitude', 'altitude']] = pd.DataFrame(final_address_df['point'].tolist(), index=final_address_df.index)

In [35]:
final_address_df.head()

Unnamed: 0,PARCEL,Neighborhood,SITUS HOUSE NUMBER,SITUS STREET NAME,SITUS STREET ADDRESS,SITUS CITY,SITUS STATE,SITUS ZIP CODE,FULL ADDRESS,location,point,latitude,longitude,altitude
4960,5512016010,Beverly Grove,5971,3Rd St,5971 W 3Rd St,Los Angeles,CA,90036,"5971 W 3Rd St, Los Angeles, CA 90036","(5971 W 3rd St, Los Angeles, CA 90036, USA, (3...","(34.0701147, -118.3505768, 0.0)",34.070115,-118.350577,0.0
4961,5511028030,Beverly Grove,8038,3Rd St,8038 W 3Rd St,Los Angeles,CA,90048,"8038 W 3Rd St, Los Angeles, CA 90048","(8038 W 3rd St, Los Angeles, CA 90048, USA, (3...","(34.0718417, -118.365178, 0.0)",34.071842,-118.365178,0.0
4962,5511029032,Beverly Grove,8124,3Rd St,8124 W 3Rd St,Los Angeles,CA,90048,"8124 W 3Rd St, Los Angeles, CA 90048","(8124 W 3rd St, Los Angeles, CA 90048, USA, (3...","(34.0720263, -118.3667876, 0.0)",34.072026,-118.366788,0.0
4963,5511031038,Beverly Grove,8304,3Rd St,8304 W 3Rd St,Los Angeles,CA,90048,"8304 W 3Rd St, Los Angeles, CA 90048","(8304 W 3rd St, Los Angeles, CA 90048, USA, (3...","(34.07256479999999, -118.370363, 0.0)",34.072565,-118.370363,0.0
4964,5511014019,Beverly Grove,8401,3Rd St,8401 W 3Rd St,Los Angeles,CA,90048,"8401 W 3Rd St, Los Angeles, CA 90048","(8401 W 3rd St, Los Angeles, CA 90048, USA, (3...","(34.0731793, -118.3732953, 0.0)",34.073179,-118.373295,0.0


In [36]:
final_address_df.to_csv('address_loc.csv')