In [1]:
# Import Python dependencies
import pandas as pd
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import GoogleV3

In [57]:
# Read the Total Sales CSV file
df = pd.read_csv("data/missing_address.csv")
df.head()

Unnamed: 0,Order,Parcel,Neighborhood,House Type,Address,Lat,Lng,Did it sell?,Prediction,Actual,Prediction Result
0,14,4333013059,Beverly Grove,Condo,"811 S Bedford St #101, Los Angeles, CA 90035",,,0,1,0.0,Prediction Incorrect - Not Sold
1,24,4332021069,Beverly Grove,Condo,"1046 S Bedford St #1, Los Angeles, CA 90035",,,0,1,0.0,Prediction Incorrect - Not Sold
2,28,4335003031,Beverly Grove,Condo,"146 N Almont Dr #2, West Hollywood, CA 90048",,,0,1,0.0,Prediction Incorrect - Not Sold
3,35,4335014047,Beverly Grove,Condo,"8963 Burton Way #101, Los Angeles, CA 90048",,,0,1,0.0,Prediction Incorrect - Not Sold
4,77,4335008231,Beverly Grove,Condo,"100 S Doheny Dr #101, Los Angeles, CA 90048",,,0,1,0.0,Prediction Incorrect - Not Sold


In [58]:
# Create dataframe with address columns from total sales
address_df = df.loc[:, ["Parcel", "Neighborhood", "Address"]]

In [59]:
address_df.head()

Unnamed: 0,Parcel,Neighborhood,Address
0,4333013059,Beverly Grove,"811 S Bedford St #101, Los Angeles, CA 90035"
1,4332021069,Beverly Grove,"1046 S Bedford St #1, Los Angeles, CA 90035"
2,4335003031,Beverly Grove,"146 N Almont Dr #2, West Hollywood, CA 90048"
3,4335014047,Beverly Grove,"8963 Burton Way #101, Los Angeles, CA 90048"
4,4335008231,Beverly Grove,"100 S Doheny Dr #101, Los Angeles, CA 90048"


In [60]:
# Split the sales by neighborhood to make smaller dataframes for the goelocator
beverly_grove_df = address_df[address_df["Neighborhood"] == "Beverly Grove"]
hancock_park_df = address_df[address_df["Neighborhood"] == "Hancock Park"]
hollywood_df = address_df[address_df["Neighborhood"] == "Hollywood"]
hollywood_hills_df = address_df[address_df["Neighborhood"] == "Hollywood Hills"]
hollywood_hills_east_df = address_df[address_df["Neighborhood"] == "Hollywood Hills East"]
los_feliz_df = address_df[address_df["Neighborhood"] == "Los Feliz"]
west_hollywood_df = address_df[address_df["Neighborhood"] == "West Hollywood"]

In [61]:
g_key = ENTER GOOGLE API KEY HERE TO RE-RUN

In [62]:
# Define the geo locator
locator = GoogleV3(api_key = g_key)

In [63]:
# Add a delay between geocoding calls to prevent service rejecting the geo call
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)

In [64]:
pd.options.mode.chained_assignment = None

In [65]:
# Create the location column applying the goe location
beverly_grove_df["location"] = beverly_grove_df["Address"].apply(geocode)

In [66]:
hancock_park_df["location"] = hancock_park_df["Address"].apply(geocode)

In [67]:
hollywood_df["location"] = hollywood_df["Address"].apply(geocode)

In [68]:
hollywood_hills_df["location"] = hollywood_hills_df["Address"].apply(geocode)

In [69]:
hollywood_hills_east_df["location"] = hollywood_hills_east_df["Address"].apply(geocode)

In [70]:
los_feliz_df["location"] = los_feliz_df["Address"].apply(geocode)

In [71]:
west_hollywood_df["location"] = west_hollywood_df["Address"].apply(geocode)

In [72]:
frames = [beverly_grove_df, hancock_park_df, hollywood_df, hollywood_hills_df, hollywood_hills_east_df, los_feliz_df, west_hollywood_df]
final_address_df = pd.concat(frames)

In [73]:
# Create longitude, laatitude and altitude from location column (returns tuple)
final_address_df['point'] = final_address_df['location'].apply(lambda loc: tuple(loc.point) if loc else None)

In [74]:
# Split point column into latitude, longitude and altitude columns
final_address_df[['latitude', 'longitude', 'altitude']] = pd.DataFrame(final_address_df['point'].tolist(), index=final_address_df.index)

In [75]:
final_address_df.head()

Unnamed: 0,Parcel,Neighborhood,Address,location,point,latitude,longitude,altitude
0,4333013059,Beverly Grove,"811 S Bedford St #101, Los Angeles, CA 90035","(811 S Bedford St APT 101, Los Angeles, CA 900...","(34.0626041, -118.3806122, 0.0)",34.062604,-118.380612,0.0
1,4332021069,Beverly Grove,"1046 S Bedford St #1, Los Angeles, CA 90035","(1046 S Bedford St APT 1, Los Angeles, CA 9003...","(34.0579256, -118.3800809, 0.0)",34.057926,-118.380081,0.0
2,4335003031,Beverly Grove,"146 N Almont Dr #2, West Hollywood, CA 90048","(146 N Almont Dr APT 2, West Hollywood, CA 900...","(34.076612, -118.3874706, 0.0)",34.076612,-118.387471,0.0
3,4335014047,Beverly Grove,"8963 Burton Way #101, Los Angeles, CA 90048","(8963 Burton Way APT 101, Los Angeles, CA 9004...","(34.0726158, -118.3872768, 0.0)",34.072616,-118.387277,0.0
4,4335008231,Beverly Grove,"100 S Doheny Dr #101, Los Angeles, CA 90048","(100 S Doheny Dr #101, Los Angeles, CA 90048, ...","(34.0746665, -118.3892218, 0.0)",34.074666,-118.389222,0.0


In [76]:
final_address_df.to_csv('missing_address_loc.csv')