In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas
import folium
from sklearn import preprocessing
from folium.plugins import HeatMap

In [2]:
# df_nyc = pd.get_dummies(df_nyc, columns = ['room_type', 'neighbourhood'])
# df_nyc = df_nyc[df_nyc['availability_365'] != 0]

In [3]:
# reading and cleaning data
df_nyc = pd.read_csv('airbnb_nyc.csv')

df_nyc = df_nyc.drop(columns = ['calculated_host_listings_count', 'last_review', 'reviews_per_month', 'host_name'])

df_nyc.dropna(inplace = True)
df_nyc = df_nyc.reset_index()

In [4]:
percentile_05 = df_nyc['price'].quantile(.05)
percentile_95 = df_nyc['price'].quantile(.95)

df_nyc = df_nyc[(percentile_05 <= df_nyc['price']) & (df_nyc['price'] <= percentile_95)]
df_nyc

Unnamed: 0,index,id,name,host_id,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,availability_365
0,0,2539,Clean & quiet apt home by the park,2787,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,365
1,1,2595,Skylit Midtown Castle,2845,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,355
2,2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Manhattan,Harlem,40.80902,-73.94190,Private room,150,3,0,365
3,3,3831,Cozy Entire Floor of Brownstone,4869,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,194
4,4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
48874,48890,36484665,Charming one bedroom - newly renovated rowhouse,8232441,Brooklyn,Bedford-Stuyvesant,40.67853,-73.94995,Private room,70,2,0,9
48875,48891,36485057,Affordable room in Bushwick/East Williamsburg,6570630,Brooklyn,Bushwick,40.70184,-73.93317,Private room,40,4,0,36
48876,48892,36485431,Sunny Studio at Historical Neighborhood,23492952,Manhattan,Harlem,40.81475,-73.94867,Entire home/apt,115,10,0,27
48877,48893,36485609,43rd St. Time Square-cozy single bed,30985759,Manhattan,Hell's Kitchen,40.75751,-73.99112,Shared room,55,1,0,2


In [6]:
# gdf_nyc.set_geometry('geometry')

In [7]:
#gdf_nyc.plot(column = 'price', legend = True, markersize = 3)

In [8]:
# base = nyc.plot(color = 'white', edgecolor = 'black')
# gdf_nyc.plot(ax = base, column = 'price', marker = '^', markersize = 5, legend = True)

In [9]:
neighborhoods = geopandas.read_file('ZillowNeighborhoods-NY.shp')

In [10]:
nyc = pd.DataFrame()

boroughs = ['Queens', 'Kings', 'New York', 'Bronx', 'Richmond']

for borough in boroughs:
    nyc = pd.concat([nyc, neighborhoods[neighborhoods['County'] == borough]])
    
nyc = nyc.reset_index(drop = True)
nyc = nyc[['Name', 'geometry']].copy()
nyc

Unnamed: 0,Name,geometry
0,Rego Park,"POLYGON ((-73.85630 40.72303, -73.85654 40.722..."
1,Belle Harbor,"POLYGON ((-73.85618 40.56811, -73.85628 40.568..."
2,Howard Beach,"POLYGON ((-73.85609 40.65037, -73.85675 40.651..."
3,Breezy Point,"POLYGON ((-73.90963 40.56553, -73.91017 40.564..."
4,Neponsit,"POLYGON ((-73.85991 40.56701, -73.86071 40.566..."
...,...,...
273,Richmond Town,"POLYGON ((-74.12087 40.57049, -74.12191 40.569..."
274,Meiers Corners,"POLYGON ((-74.13093 40.61271, -74.13093 40.612..."
275,Bloomfield,"POLYGON ((-74.16888 40.62113, -74.16885 40.621..."
276,Richmond Valley,"POLYGON ((-74.22507 40.52073, -74.22380 40.518..."


In [11]:
# collecting averages for neighborhood
avg_price = df_nyc.groupby('neighbourhood').mean()['price']
max_price = df_nyc.groupby('neighbourhood').max()['price']

# avg_price
nyc = nyc.merge(avg_price, left_on = 'Name', right_on = 'neighbourhood')
nyc = nyc.merge(max_price, left_on = 'Name', right_on = 'neighbourhood')
# nyc
# neigh.first()

In [12]:
nyc = nyc.rename(columns = {'price_x': 'avg_price', 'price_y': 'max_price'})
nyc = nyc.round({'avg_price': 2, 'max_price': 2})
nyc

Unnamed: 0,Name,geometry,avg_price,max_price
0,Rego Park,"POLYGON ((-73.85630 40.72303, -73.85654 40.722...",91.88,300
1,Belle Harbor,"POLYGON ((-73.85618 40.56811, -73.85628 40.568...",171.50,350
2,Howard Beach,"POLYGON ((-73.85609 40.65037, -73.85675 40.651...",115.40,250
3,Breezy Point,"POLYGON ((-73.90963 40.56553, -73.91017 40.564...",213.33,250
4,Neponsit,"POLYGON ((-73.85991 40.56701, -73.86071 40.566...",274.67,350
...,...,...,...,...
180,West Brighton,"POLYGON ((-74.10905 40.64646, -74.10884 40.645...",83.12,200
181,Castleton Corners,"POLYGON ((-74.12047 40.62204, -74.12012 40.621...",139.75,299
182,Arden Heights,"POLYGON ((-74.18997 40.55472, -74.18768 40.554...",67.25,83
183,Eltingville,"POLYGON ((-74.16266 40.56059, -74.16234 40.559...",141.67,299


In [13]:
# gdf_nyc.to_crs('EPSG:4326')

# df_nyc = df_nyc.merge(nyc, left_on = 'neighbourhood', right_on = 'Name')
# df_nyc = df_nyc.drop(columns=['Name', 'index'])
# points = geopandas.sjoin(left_df=gdf_nyc, right_df=nyc, how='left')
# points

In [19]:
# df_nyc = df_nyc.set_geometry('geometry')
airbnbs = []
for idx, row in df_nyc.iterrows():
    airbnbs.append((row['latitude'], row['longitude']))
                   
airbnbs

[(40.64749, -73.97237),
 (40.75362, -73.98377),
 (40.80902, -73.9419),
 (40.68514, -73.95976),
 (40.79851, -73.94399),
 (40.74767, -73.975),
 (40.68688, -73.95596),
 (40.76489, -73.98493),
 (40.80178, -73.96723),
 (40.71344, -73.99037),
 (40.80316, -73.96545),
 (40.76076, -73.98867),
 (40.66829, -73.98779),
 (40.79826, -73.96113),
 (40.7353, -74.00525),
 (40.70837, -73.95352),
 (40.69169, -73.97185),
 (40.74192, -73.99501),
 (40.67592, -73.94694),
 (40.79685, -73.94872),
 (40.71842, -73.95718),
 (40.68069, -73.97706),
 (40.67989, -73.97798),
 (40.68001, -73.97865),
 (40.68371, -73.94028),
 (40.65599, -73.97519),
 (40.86754, -73.92639),
 (40.76715, -73.98533),
 (40.86482, -73.92106),
 (40.7292, -73.98542),
 (40.82245, -73.95104),
 (40.81305, -73.95466),
 (40.72219, -73.93762),
 (40.8213, -73.95318),
 (40.6831, -73.95473),
 (40.66869, -73.9878),
 (40.70186, -73.92745),
 (40.63702, -73.96327),
 (40.71401, -73.98917),
 (40.7229, -73.98199),
 (40.66278, -73.97966),
 (40.69673, -73.97584),
 

In [15]:
# joined = geopandas.sjoin(left_df=gdf_nyc, right_df=nyc, how='left')

In [37]:
neighborhoods = geopandas.read_file('ZillowNeighborhoods-NY.shp')

In [38]:
nyc = pd.DataFrame()

boroughs = ['Queens', 'Kings', 'New York', 'Bronx', 'Richmond']

for borough in boroughs:
    nyc = pd.concat([nyc, neighborhoods[neighborhoods['County'] == borough]])
    
nyc = nyc.reset_index(drop = True)
nyc = nyc[['Name', 'geometry']].copy()
nyc

Unnamed: 0,Name,geometry
0,Rego Park,"POLYGON ((-73.85630 40.72303, -73.85654 40.722..."
1,Belle Harbor,"POLYGON ((-73.85618 40.56811, -73.85628 40.568..."
2,Howard Beach,"POLYGON ((-73.85609 40.65037, -73.85675 40.651..."
3,Breezy Point,"POLYGON ((-73.90963 40.56553, -73.91017 40.564..."
4,Neponsit,"POLYGON ((-73.85991 40.56701, -73.86071 40.566..."
...,...,...
273,Richmond Town,"POLYGON ((-74.12087 40.57049, -74.12191 40.569..."
274,Meiers Corners,"POLYGON ((-74.13093 40.61271, -74.13093 40.612..."
275,Bloomfield,"POLYGON ((-74.16888 40.62113, -74.16885 40.621..."
276,Richmond Valley,"POLYGON ((-74.22507 40.52073, -74.22380 40.518..."


In [44]:
neighborhood_map = {'Allerton': 'Williamsbridge', 'Battery Park City': 'Battery Park', 'Bay Terrace, Staten Island': 'Bay Terrace',
                   'Bayswater': 'Far Rockaway', 'Bedford-Stuyvesant': 'Bedford Stuyvesant', 'Bronxdale': 'Van Nest',
                   "Bull's Head": 'Bulls Head', 'Civic Center': 'Lower East Side', 'Claremont Village': 'South Bronx',
                   'Columbia St': 'Columbia Street Waterfront District', 'Concord': 'Grasmere - Concord',
                   'Concourse Village': 'Concourse', 'Cypress Hills': 'East New York', 'Ditmars Steinway': 'Astoria',
                   'Douglaston': 'Douglaston-Little Neck', 'Downtown Brooklyn': 'Downtown', 'East Morrisania': 'South Bronx',
                   'Edenwald': 'Eastchester', 'Edgemere': 'Far Rockaway', "Hell's Kitchen": 'Clinton', 'Highbridge': 'High Bridge',
                   'Howland Hook': 'Port Ivory', 'Kips Bay': 'Gramercy', 'Little Neck': 'Douglaston-Little Neck',
                   'Long Island City': 'Hunters Point', 'Mariners Harbor': "Mariner's Harbor", 'Morrisania': 'South Bronx',
                   'Mount Eden': 'Concourse', 'Mount Hope': 'Tremont', 'Nolita': 'Little Italy', 'Olinville': 'Williamsbridge',
                   'Prospect-Lefferts Gardens': 'Prospect Lefferts Gardens', 'Richmondtown': 'Richmond Town', 
                    'Schuylerville': 'Pelham Bay', 'Sea Gate':'Coney Island', 'South Slope': 'Greenwood', 
                    'Theater District': 'Midtown', 'Throgs Neck': 'Throggs Neck', 'Two Bridges': 'Lower East Side',
                    'Westchester Square': 'Westchester Heights'}

df_nyc = df_nyc.replace(neighborhood_map) 

In [48]:
gdf_nyc = geopandas.GeoDataFrame(
    df_nyc, geometry=geopandas.points_from_xy(df_nyc.longitude, df_nyc.latitude, crs = 'EPSG:4269'))

In [50]:
m = nyc.explore(
    column = 'avg_price',
    tooltip = ['Name', 'avg_price'],
#     popup = ['NAME'],
    legend = True,
    name = 'Average AirBnB Price'
)
# # airbnbs.explore(m = m, color = 'blue', marker_kwds = dict(radius = 3, fill = True), name = 'Airbnbs')

m

KeyError: 'avg_price'

In [51]:
gdf_nyc.to_crs('EPSG:4326')

df_nyc = df_nyc.merge(nyc, left_on = 'neighbourhood', right_on = 'Name')
df_nyc = df_nyc.drop(columns=['Name', 'index'])
points = geopandas.sjoin(left_df=gdf_nyc, right_df=nyc, how='left')
points

KeyError: "['index'] not found in axis"