In [None]:
# data manipulation
import pandas as pd
import numpy as np

# Chloropleth Map
import plotly.express as px
import json
import math
import plotly.express as px
from area import area
from pandas_geojson import to_geojson
from shapely.geometry import shape, Point
from geopandas import points_from_xy
import geopandas as gpd

In [None]:
# Read the neighborhood population data into a DataFrame and load the GeoJSON data
nyc_boroughs = json.load(open("new-york-city-boroughs.geojson"))
# https://www.kaggle.com/datasets/dgomonov/new-york-city-airbnb-open-data

In [None]:
# Read the NYC Airbnb data into a Dataframe
nyc_air_bnb_data = pd.read_csv('AB_NYC_2019.csv')

# Group the data by NYC borough and calculate the mean listing price and count of listings in each borough
avg_listing_price = nyc_air_bnb_data.groupby('neighbourhood_group').agg({'price':'mean','id':'count'}).reset_index()

# Rename the columns for clarity
avg_listing_price = avg_listing_price.rename(columns={"price": "Average Listing Price Per Night", "id": "Total Listings","neighbourhood_group":"NYC Borough"})

# Round the average price and total listings columns to the nearest integer
avg_listing_price = avg_listing_price.round(0)

In [None]:
# read the neighborhood population data into a DataFrame and load the GeoJSON data
nyc_boroughs = json.load(open("new-york-city-boroughs.geojson"))

In [None]:
# Create an empty dictionary called nycmap_id_map
nycmap_id_map = {}

# Loop over each feature (borough) in the nyc_burrros GeoJSON file
for feature in nyc_boroughs["features"]:
    # Extract the "cartodb_id" value from the "properties" field of the feature
    # and assign it to a new key called "id" in the feature dictionary
    feature["id"] = feature["properties"]["cartodb_id"]
    
    # Extract the borough name from the "properties" field of the feature
    # and use it as the key in the nycmap_id_map dictionary
    # with the "cartodb_id" value as its corresponding value
    nycmap_id_map[feature["properties"]["name"]] = feature["id"]
    
# Create a new column called "id" in the avg_price_boro dataframe
# apply a lambda function to the "NYC Borough" column of the dataframe
# to look up the borough name in the nycmap_id_map dictionary
# and return its corresponding "cartodb_id" value
avg_listing_price["id"] = avg_listing_price["NYC Borough"].apply(lambda x: nycmap_id_map[x])

In [None]:
# Call Plotly Express choropleth function to visualize data
fig = px.choropleth_mapbox(
    avg_listing_price, # Dataframe containing the data to be plotted
    geojson=nyc_boroughs, # GeoJSON file with borough boundaries
    locations="id", # Column in dataframe containing the borough IDs
    color="Average Listing Price Per Night", # Column in dataframe containing the data to be visualized
    color_continuous_scale="viridis", # Color scale to use
    mapbox_style="carto-positron", # Map style
    zoom=7.7, # Initial zoom level
    center={"lat": 40.7, "lon": -73.9}, # Initial map center coordinates
    opacity=0.7, # Opacity of the choropleth map
    hover_name="NYC Borough", # Column in dataframe to use for hover information
    hover_data=["Average Listing Price Per Night", "Total Listings"], # Additional columns in dataframe to include in hover information
    title="Average Listing Price Per Night by NYC Borough", # Title of the plot
    labels={"Average Listing Price Per Night":"Average Listing Price Per Night ($)", "Total Listings": "Total Listings"} # Add $ sign to legend
)

# Update the plot to fit the borough boundaries
fig.update_geos(fitbounds="locations", visible=False)

# Update the plot layout
fig.update_layout(margin={"r":100,"t":25,"l":100,"b":100})

# Show the plot
fig.show()


In [None]:
# Read the NYC neighborhood population data into a DataFrame and load the GeoJSON data
nyc_nhoods = json.load(open("NYC Neighborhoods.geojson"))

# Group the data by NYC neighborhood and calculate the mean listing price and count of listings in each borough
avg_listing_price_nhood = nyc_air_bnb_data.groupby('neighbourhood').agg({'price':'mean','id':'count'}).reset_index()

# Round the average price and total listings columns to the nearest integer
avg_listing_price_nhood = avg_listing_price_nhood.round(0)

# Rename selected columns
avg_listing_price_nhood = avg_listing_price_nhood.rename(columns={"price": "Average Listing Price Per Night", 
                                                  "id": "Total Listings"})


# Merge aggregated data back to NYC neighborhood dataframe 
avg_listing_price_nhood = avg_listing_price_nhood.merge(nyc_air_bnb_data, on="neighbourhood",how="inner")


# Rename selected columns
avg_listing_price_nhood = avg_listing_price_nhood.rename(columns={"neighbourhood":"NYC Neighborhoood","neighbourhood_group":"NYC Borough"})



In [None]:
# Read the GeoJSON file for the NYC neighborhoods into a GeoDataFrame
data_poly = gpd.read_file("NYC Neighborhoods.geojson")

# Create a new GeoDataFrame from the merged DataFrame and add a 'geometry' column with Point objects created from longitude and latitude columns
gdf = gpd.GeoDataFrame(
    avg_listing_price_nhood, 
    geometry=gpd.points_from_xy(avg_listing_price_nhood.longitude, avg_listing_price_nhood.latitude))

# Perform a spatial join between the point data (gdf) and the polygon data (data_poly) using the 'within' operation to assign each point to a polygon
joined_gdf = gpd.sjoin(gdf, data_poly, op='within')

# Create a dictionary mapping neighborhood names to their corresponding GeoJSON id's
nycmap_id_nhood_map = {}
for feature in nycmap_nhoods["features"]:
    feature["id"] = feature["properties"]["ntacode"]
    nycmap_id_nhood_map[feature["properties"]["ntaname"]] = feature["id"]
    
# Create a new 'id' column in the joined GeoDataFrame by applying the neighborhood id from the dictionary to the 'ntaname' column
joined_gdf["id"] = joined_gdf["ntaname"].apply(lambda x: nycmap_id_nhood_map[x])

In [None]:
# call Plotly Express choropleth function to visualize data
fig = px.choropleth_mapbox(joined_gdf,
                           geojson=nycmap_nhoods,
                           locations="id",
                           color="Average Listing Price Per Night",
                           color_continuous_scale="viridis",
                           mapbox_style="carto-positron",
                           zoom=7.7, 
                           center={"lat": 40.7, "lon": -73.9},
                           opacity=0.7,
                           hover_name="NYC Neighborhoood",
                           hover_data=["Average Listing Price Per Night","Total Listings","NYC Borough"],
                           title="Average Listing Price Per Night by NYC Neighborhood",
                           labels={"Average Listing Price Per Night":"Average Listing Price Per Night ($)"}
                           )
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":100,"t":25,"l":100,"b":100})
fig.show()
### References ###

# https://gis.stackexchange.com/questions/358373/extract-polygon-name-dataframe-if-the-geo-point-is-inside-polygon
# https://github.com/nikhilkumarsingh/choropleth-python-tutorial/blob/master/ChoroplethTutorial.ipynb
