In [1]:
import pandas as pd
import folium
import geopandas as gpd
from shapely.geometry import Point, MultiPolygon, Polygon

### Add the neighborhood information for the houses

In [2]:
#Load the data
houses = pd.read_csv('Data/houses.csv')
houses.head(2)

Unnamed: 0,latitude,longitude,address,property_type,home_size,lot_size,year_built,parcel_number,zoning,sex_offenders,...,url,bedrooms,bathrooms,date,sale_price,observation_date,year,cpi,sale_price_cpi,age
0,34.190532,-118.145081,2694 Raymond Ave,Single Family Residence,1199.0,7207.0,1950.0,5835035008,LCR175,5,...,https://www.realtytrac.com/property/ca/altaden...,3.0,2.0,2020-11-01,191.143369,2020-11-01,2020,260.4,13.823924,70.0
1,33.899958,-117.990815,14520 Dunnet Ave,Single Family Residence,1254.0,6033.0,1957.0,8065036010,LMR1,2,...,https://www.realtytrac.com/property/ca/la-mira...,3.0,3.0,2020-11-01,180.557648,2020-11-01,2020,260.4,13.435637,63.0


In [3]:
hoods = gpd.read_file('Los Angeles Neighborhood Map.geojson')
hoods['polygon'] = hoods['geometry'].apply(lambda x: Polygon(x[0]))
hoods.head(2)

Unnamed: 0,external_i,name,location,latitude,slug_1,sqmi,display_na,set,slug,longitude,name_1,kind,type,geometry,polygon
0,acton,Acton,POINT(34.497355239240846 -118.16981019229348),-118.16981019229348,,39.3391089485,Acton L.A. County Neighborhood (Current),L.A. County Neighborhoods (Current),acton,34.49735523924085,,L.A. County Neighborhood (Current),unincorporated-area,"MULTIPOLYGON (((-118.20262 34.53899, -118.1894...","POLYGON ((-118.20262 34.53899, -118.18947 34.5..."
1,adams-normandie,Adams-Normandie,POINT(34.031461499124156 -118.30020800000011),-118.30020800000013,,0.805350187789,Adams-Normandie L.A. County Neighborhood (Curr...,L.A. County Neighborhoods (Current),adams-normandie,34.03146149912416,,L.A. County Neighborhood (Current),segment-of-a-city,"MULTIPOLYGON (((-118.30901 34.03741, -118.3004...","POLYGON ((-118.30901 34.03741, -118.30041 34.0..."


In [4]:
#Turn the lat, long info into a geometry point
gdf = gpd.GeoDataFrame(houses, geometry=[Point(xy) for xy in zip(houses.longitude, houses.latitude)])
gdf.head(2)

Unnamed: 0,latitude,longitude,address,property_type,home_size,lot_size,year_built,parcel_number,zoning,sex_offenders,...,bedrooms,bathrooms,date,sale_price,observation_date,year,cpi,sale_price_cpi,age,geometry
0,34.190532,-118.145081,2694 Raymond Ave,Single Family Residence,1199.0,7207.0,1950.0,5835035008,LCR175,5,...,3.0,2.0,2020-11-01,191.143369,2020-11-01,2020,260.4,13.823924,70.0,POINT (-118.14508 34.19053)
1,33.899958,-117.990815,14520 Dunnet Ave,Single Family Residence,1254.0,6033.0,1957.0,8065036010,LMR1,2,...,3.0,3.0,2020-11-01,180.557648,2020-11-01,2020,260.4,13.435637,63.0,POINT (-117.99081 33.89996)


In [6]:
#This function will classify every house and label it an appropriate neighborhood
def find_neighborhood(coordinates):
    for index, hood in hoods.iterrows():
        if hood['polygon'].contains(coordinates):
            return hood['name']
        else:
            continue 

gdf['neighborhood'] = gdf['geometry'].apply(find_neighborhood)
gdf.head()

Unnamed: 0,latitude,longitude,address,property_type,home_size,lot_size,year_built,parcel_number,zoning,sex_offenders,...,bathrooms,date,sale_price,observation_date,year,cpi,sale_price_cpi,age,geometry,neighborhood
0,34.190532,-118.145081,2694 Raymond Ave,Single Family Residence,1199.0,7207.0,1950.0,5835035008,LCR175,5,...,2.0,2020-11-01,191.143369,2020-11-01,2020,260.4,13.823924,70.0,POINT (-118.14508 34.19053),Altadena
1,33.899958,-117.990815,14520 Dunnet Ave,Single Family Residence,1254.0,6033.0,1957.0,8065036010,LMR1,2,...,3.0,2020-11-01,180.557648,2020-11-01,2020,260.4,13.435637,63.0,POINT (-117.99081 33.89996),La Mirada
2,34.219352,-118.205306,5195 La Canada Blvd,Single Family Residence,2075.0,16153.0,1955.0,5811014021,LFR120,0,...,2.0,2020-11-01,197.083809,2020-11-01,2020,260.4,14.037117,65.0,POINT (-118.20531 34.21935),La Canada Flintridge
3,33.894516,-118.245489,1365 W 152nd St,Single Family Residence,912.0,5241.0,1911.0,6142024036,CORL,22,...,1.0,2020-11-01,169.442364,2020-11-01,2020,260.4,13.015466,109.0,POINT (-118.24549 33.89452),Compton
4,34.16245,-117.901663,27 Rock Springs Way,Single Family Residence,2580.0,6360.0,2003.0,8684049004,AZRA,2,...,3.0,2020-11-01,184.752441,2020-11-01,2020,260.4,13.59083,17.0,POINT (-117.90166 34.16245),Azusa


In [7]:
print(f'There are  {gdf.neighborhood.isnull().sum()} out of {gdf.shape[0]} missing classifications.')
gdf.drop('geometry', axis=1, inplace=True)
gdf['neighborhood'].fillna('Missing', inplace=True)
gdf.to_csv('Data/houses_neighborhood_info.csv', index=False)
gdf.head()

There are  555 out of 5947 missing classifications.


Unnamed: 0,latitude,longitude,address,property_type,home_size,lot_size,year_built,parcel_number,zoning,sex_offenders,...,bedrooms,bathrooms,date,sale_price,observation_date,year,cpi,sale_price_cpi,age,neighborhood
0,34.190532,-118.145081,2694 Raymond Ave,Single Family Residence,1199.0,7207.0,1950.0,5835035008,LCR175,5,...,3.0,2.0,2020-11-01,191.143369,2020-11-01,2020,260.4,13.823924,70.0,Altadena
1,33.899958,-117.990815,14520 Dunnet Ave,Single Family Residence,1254.0,6033.0,1957.0,8065036010,LMR1,2,...,3.0,3.0,2020-11-01,180.557648,2020-11-01,2020,260.4,13.435637,63.0,La Mirada
2,34.219352,-118.205306,5195 La Canada Blvd,Single Family Residence,2075.0,16153.0,1955.0,5811014021,LFR120,0,...,3.0,2.0,2020-11-01,197.083809,2020-11-01,2020,260.4,14.037117,65.0,La Canada Flintridge
3,33.894516,-118.245489,1365 W 152nd St,Single Family Residence,912.0,5241.0,1911.0,6142024036,CORL,22,...,2.0,1.0,2020-11-01,169.442364,2020-11-01,2020,260.4,13.015466,109.0,Compton
4,34.16245,-117.901663,27 Rock Springs Way,Single Family Residence,2580.0,6360.0,2003.0,8684049004,AZRA,2,...,4.0,3.0,2020-11-01,184.752441,2020-11-01,2020,260.4,13.59083,17.0,Azusa


### Create a DataFrame for the Basic Search component in the app

In [17]:
#Create a dataframe that will be used by the app
#Sale Price per neighborhood

avg = gdf.groupby('neighborhood')[['home_size', 'lot_size', 'sex_offenders', 
                                   'enviornmental_hazards', 'age', 'sale_price']].mean()

avg['sale_price'] = avg['sale_price'].apply(lambda x: '${:,.2f}'.format(x))

avg.to_csv('Data/Neighborhoods_final.csv')