In [1]:
import pandas as pd
import folium
import geopandas as gpd
from shapely.geometry import Point, MultiPolygon, Polygon

### Add the neighborhood information for the houses

In [47]:
#Load the data
houses = pd.read_csv('Data/houses.csv')
houses.head(2)

Unnamed: 0,latitude,longitude,address,property_type,home_size,lot_size,year_built,parcel_number,zoning,sex_offenders,...,natural_disasters,school_quality,url,bedrooms,bathrooms,date,sale_price,observation_date,cpi,age
0,33.854152,-118.386173,808 N Maria Ave,Single Family Residence,3434.0,6636.0,2004.0,7502006011,RBR-1,0,...,1,Excellent,https://www.realtytrac.com/property/ca/redondo...,4.0,3.0,2020-11-20,2475000.0,2020-11-01,260.4,16.0
1,34.046556,-118.36607,1623 S Ogden Dr,Single Family Residence,1858.0,6375.0,1930.0,5068025003,LAR1,10,...,1,Average,https://www.realtytrac.com/property/ca/los-ang...,3.0,2.0,2020-11-20,1095000.0,2020-11-01,260.4,90.0


In [48]:
hoods = gpd.read_file('Los Angeles Neighborhood Map.geojson')
hoods['polygon'] = hoods['geometry'].apply(lambda x: Polygon(x[0]))
hoods.head(2)

Unnamed: 0,external_i,name,location,latitude,slug_1,sqmi,display_na,set,slug,longitude,name_1,kind,type,geometry,polygon
0,acton,Acton,POINT(34.497355239240846 -118.16981019229348),-118.16981019229348,,39.3391089485,Acton L.A. County Neighborhood (Current),L.A. County Neighborhoods (Current),acton,34.49735523924085,,L.A. County Neighborhood (Current),unincorporated-area,"MULTIPOLYGON (((-118.20262 34.53899, -118.1894...","POLYGON ((-118.20262 34.53899, -118.18947 34.5..."
1,adams-normandie,Adams-Normandie,POINT(34.031461499124156 -118.30020800000011),-118.30020800000013,,0.805350187789,Adams-Normandie L.A. County Neighborhood (Curr...,L.A. County Neighborhoods (Current),adams-normandie,34.03146149912416,,L.A. County Neighborhood (Current),segment-of-a-city,"MULTIPOLYGON (((-118.30901 34.03741, -118.3004...","POLYGON ((-118.30901 34.03741, -118.30041 34.0..."


In [49]:
#Turn the lat, long info into a geometry point
gdf = gpd.GeoDataFrame(houses, geometry=[Point(xy) for xy in zip(houses.longitude, houses.latitude)])
gdf.head(2)

Unnamed: 0,latitude,longitude,address,property_type,home_size,lot_size,year_built,parcel_number,zoning,sex_offenders,...,school_quality,url,bedrooms,bathrooms,date,sale_price,observation_date,cpi,age,geometry
0,33.854152,-118.386173,808 N Maria Ave,Single Family Residence,3434.0,6636.0,2004.0,7502006011,RBR-1,0,...,Excellent,https://www.realtytrac.com/property/ca/redondo...,4.0,3.0,2020-11-20,2475000.0,2020-11-01,260.4,16.0,POINT (-118.38617 33.85415)
1,34.046556,-118.36607,1623 S Ogden Dr,Single Family Residence,1858.0,6375.0,1930.0,5068025003,LAR1,10,...,Average,https://www.realtytrac.com/property/ca/los-ang...,3.0,2.0,2020-11-20,1095000.0,2020-11-01,260.4,90.0,POINT (-118.36607 34.04656)


In [35]:
def find_neighborhood(coordinates):
    for index, hood in hoods.iterrows():
        if hood['polygon'].contains(coordinates):
            return hood['name']
        else:
            continue
            
            

In [36]:
gdf['neighborhood'] = gdf['geometry'].apply(find_neighborhood)
gdf.head()

Unnamed: 0,latitude,longitude,address,property_type,home_size,lot_size,year_built,parcel_number,zoning,sex_offenders,...,url,bedrooms,bathrooms,date,sale_price,observation_date,cpi,age,geometry,neighborhood
0,33.854152,-118.386173,808 N Maria Ave,Single Family Residence,3434.0,6636.0,2004.0,7502006011,RBR-1,0,...,https://www.realtytrac.com/property/ca/redondo...,4.0,3.0,2020-11-20,2475000.0,2020-11-01,260.4,16.0,POINT (-118.38617 33.85415),Redondo Beach
1,34.046556,-118.36607,1623 S Ogden Dr,Single Family Residence,1858.0,6375.0,1930.0,5068025003,LAR1,10,...,https://www.realtytrac.com/property/ca/los-ang...,3.0,2.0,2020-11-20,1095000.0,2020-11-01,260.4,90.0,POINT (-118.36607 34.04656),Mid-City
2,33.891158,-118.190228,15613 S Williams Ave,Single Family Residence,910.0,4323.0,1947.0,6181033028,LCR1YY,11,...,https://www.realtytrac.com/property/ca/compton...,2.0,1.0,2020-11-20,425000.0,2020-11-01,260.4,73.0,POINT (-118.19023 33.89116),
3,33.810718,-118.111017,3004 Snowden Ave,Single Family Residence,2022.0,6277.0,1981.0,7191027001,LBR1N,5,...,https://www.realtytrac.com/property/ca/long-be...,3.0,3.0,2020-11-20,755000.0,2020-11-01,260.4,39.0,POINT (-118.11102 33.81072),Long Beach
4,34.119959,-118.02513,185 E Norman Ave,Single Family Residence,4010.0,11301.0,2012.0,5789001017,ARR1YY,0,...,https://www.realtytrac.com/property/ca/arcadia...,5.0,6.0,2020-11-20,2100000.0,2020-11-01,260.4,8.0,POINT (-118.02513 34.11996),Arcadia


In [None]:
print(f'There are  {gdf.neighborhood.isnull().sum()} out of {gdf.shape[0]} missing classifications.')
gdf.drop('geometry', axis=1, inplace=True)
gdf['neighborhood'].fillna('Missing', inplace=True)
gdf.to_csv('Data/houses_neighborhood_info.csv', index=False)
gdf.head()

### Create a DataFrame for the Basic Search component in the app

In [None]:
#Create a dataframe for the markers in the map with Average
#Sale Price per neighborhood

neighborhoods = pd.read_csv('Data/la_neighborhoods.csv')
houses = pd.read_csv('Data/houses_neighborhood_info.csv')

#Determine the average price per neighborhood
avg = houses.groupby('neighborhood')['sale_price'].mean()
avg = pd.DataFrame(avg).reset_index()

df = pd.merge(neighborhoods, avg, on='neighborhood')
df['sale_price'] = df['sale_price'].apply(lambda x: '${:,.2f}'.format(x))

df.to_csv('Data/Neighborhoods_final.csv', index=False)