
Demos - Geo - Constructing Hexagon Maps with H3 and Plotly: A comprehensive tutorial
====================================================================================


# Overview
That notebook needs a DataBricks cluster with machine learning (ML) capability, _i.e._ a cluster with name having `-ds-`(DS standing for Data Science) in it.



# References
* [Notebook - Geo - Demos - GeoPandas - NYC taxi trips](https://github.com/data-engineering-helpers/databricks-examples/blob/main/ipython-notebooks/demos-geo-geopandas-nyc-taxi-trips.ipynb)
* [Notebook - Geo - Constructing Hexagon Maps with H3 and Plotly: A Comprehensive Tutorial (this notebook)](https://github.com/data-engineering-helpers/databricks-examples/blob/main/ipython-notebooks/demos-geo-constructing-hexagon-maps-with-h3-and-plotly.ipynb)
* Original article:
  + [Medium - Constructing Hexagon Maps with H3 and Plotly: A Comprehensive Tutorial](https://towardsdatascience.com/constructing-hexagon-maps-with-h3-and-plotly-a-comprehensive-tutorial-8f37a91573bb)
  + Date: 1 November 2023
  + Author: [Amanda Iglesias Moreno](https://www.linkedin.com/in/amanda-iglesias-moreno-55029417a/)
* H3:
  + Home page: https://h3geo.org/
  + H3 bindings for Python (`h3-py`): https://github.com/uber/h3-py
  + [DataBricks doc - Native H3 support by DataBricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-h3-geospatial-functions.html#language-python)
* Data:
  + Home page: https://opendata-ajuntament.barcelona.cat/data/en/dataset/allotjaments-hotels
  + [URL to download the CSV](https://opendata-ajuntament.barcelona.cat/data/dataset/88efe464-2bcd-4794-85b0-8b0bbfd9e4c0/resource/9bccce1b-0b9d-4cc6-94a7-459cb99450de/download)
  The resulting CSV is encoded with UTF-16
  + Convert it to UTF-8 with `dos2unix opendatabcn_allotjament_hotels-csv.csv`
  + Upload it onto the `/Workspace/Shared/examples/` folder


In [None]:
%pip install h3 geopandas

In [None]:
dbutils.library.restartPython()

In [None]:
%sh

mkdir -p /dbfs/Workspace/Shared/examples
curl -kL https://opendata-ajuntament.barcelona.cat/data/dataset/88efe464-2bcd-4794-85b0-8b0bbfd9e4c0/resource/9bccce1b-0b9d-4cc6-94a7-459cb99450de/download -o /dbfs/Workspace/Shared/examples/opendatabcn_allotjament_hotels-csv.csv

In [None]:
%fs ls /Workspace/Shared/examples/opendatabcn_allotjament_hotels-csv.csv

In [None]:
import pandas as pd

# Read the CSV file into a Pandas DataFrame
file_path = "/Workspace/Shared/examples/opendatabcn_allotjament_hotels-csv.csv"
df = pd.read_csv(file_path, encoding = 'utf-8')

# Define a list of column names to select
selected_column_names = ['name', 'addresses_neighborhood_name', 'addresses_district_name', 'geo_epgs_4326_x', 'geo_epgs_4326_y']

# Select the specified columns from the DataFrame
df = df[selected_column_names]

# Define a dictionary to map the old column names to the new names
column_name_mapping = {
    'name': 'Name',
    'addresses_neighborhood_name': 'Neighborhood',
    'addresses_district_name': 'District',
    'geo_epgs_4326_x': 'Latitude',
    'geo_epgs_4326_y': 'Longitude'
}

# Use the rename method to rename the columns
df.rename(columns=column_name_mapping, inplace=True)

# Extract the name of the hotel
df['Name'] = df['Name'].str.split(' - ').str[0]

# Display the first few rows of the DataFrame
df.head()

In [None]:
import h3, shapely
import geopandas as gpd

def get_hexagon_grid(latitude, longitude, resolution, ring_size):
    """
    Generate a hexagonal grid GeoDataFrame centered around a specified location.

    Parameters:
    - latitude (float): Latitude of the center point.
    - longitude (float): Longitude of the center point.
    - resolution (int): H3 resolution for hexagons.
    - ring_size (int): Number of rings to create around the center hexagon.

    Returns:
    - hexagon_df (geopandas.GeoDataFrame): GeoDataFrame containing hexagons and their geometries.
    """

    # Get the H3 hexagons covering the specified location
    center_h3 = h3.geo_to_h3(latitude, longitude, resolution)
    hexagons = list(h3.k_ring(center_h3, ring_size))  # Convert the set to a list

    # Create a GeoDataFrame with hexagons and their corresponding geometries
    hexagon_geometries = [shapely.geometry.Polygon(h3.h3_to_geo_boundary(hexagon, geo_json=True)) for hexagon in hexagons]
    hexagon_df = gpd.GeoDataFrame({'Hexagon_ID': hexagons, 'geometry': hexagon_geometries})

    return hexagon_df

In [None]:
# Latitude and longitude coordinates for the center of Barcelona
barcelona_lat = 41.3851
barcelona_lng = 2.1734

# Generate H3 hexagons at a specified resolution (e.g., 9)
resolution = 9

# Indicate the number of rings around the central hexagon
ring_size = 45

# Hexagon grid around barcelona
hexagon_df = get_hexagon_grid(barcelona_lat, barcelona_lng, resolution, ring_size)

# Visualize the first rows of the GeoDataFrame
hexagon_df.head()

In [None]:
def calculate_hexagon_ids(df, hexagon_df):
    """
    Calculate Hexagon IDs for each hotel in a DataFrame based on their geographic coordinates.

    Args:
        df (pd.DataFrame): DataFrame containing hotel data with "Latitude" and "Longitude" columns.
        hexagon_df (gpd.GeoDataFrame): GeoDataFrame with hexagon geometries and associated Hexagon IDs.

    Returns:
        pd.DataFrame: The input DataFrame with an additional "Hexagon_ID" column indicating the Hexagon ID for each hotel.
    """

    # Create a column Hexagon_ID with the ID of the hexagon
    df['Hexagon_ID'] = None

    # Iterate through the hotels in the df DataFrame and calculate hotel counts within each hexagon
    for i, hotel in df.iterrows():
        point = shapely.geometry.Point(hotel["Longitude"], hotel["Latitude"])  # Latitude and Longitude switched
        for _, row in hexagon_df.iterrows():
            if point.within(row['geometry']):
                df.loc[i, 'Hexagon_ID'] = row['Hexagon_ID']
    
    return df

# Use the function to calculate the hexagon_ids
df = calculate_hexagon_ids(df, hexagon_df)

# Visualize the first rows of the DataFrame
df.head()

In [None]:
# Group by Hexagon_ID and perform the operations
grouped_df = df.groupby('Hexagon_ID').agg({
    'Name': ['count', '<br>'.join]
}).reset_index()

# Rename columns for clarity
grouped_df.columns = ['Hexagon_ID', 'Count', 'Hotels']

# Visualize the first rows of the DataFrame
grouped_df.head()

In [None]:
import plotly.express as px

def create_choropleth_map(geojson_df, data_df, alpha=0.4, map_style="carto-positron", color_scale="Viridis"):
    """
    Create an interactive choropleth map using Plotly Express.

    Parameters:
    - geojson_df (GeoDataFrame): GeoJSON data containing polygon geometries.
    - data_df (DataFrame): DataFrame containing data to be visualized on the map.
    - alpha (float): Opacity level for the map polygons (0.0 to 1.0).
    - map_style (str): Map style for the Plotly map (e.g., "carto-positron").
    - color_scale (str): Color scale for the choropleth map.

    Returns:
    None
    """
    # Merge the GeoJSON data with your DataFrame
    merged_df = geojson_df.merge(data_df, on="Hexagon_ID", how="left")

    # Create a choropleth map using px.choropleth_mapbox
    fig = px.choropleth_mapbox(
        merged_df,
        geojson=merged_df.geometry,
        locations=merged_df.index,  # Use index as locations to avoid duplicate rows
        color="Count",
        color_continuous_scale=color_scale,
        title="Hotel Distribution Heatmap in Barcelona City",
        mapbox_style=map_style,
        center={"lat": 41.395, "lon": 2.18},  # Adjust the center as needed
        zoom=11.5,
    )

    # Customize the opacity of the hexagons
    fig.update_traces(marker=dict(opacity=alpha))

    # Add hover data for hotel names
    fig.update_traces(customdata=merged_df["Hotels"])

    # Define the hover template 
    hover_template = "<b>Hotels:</b> %{customdata}<extra></extra>"
    fig.update_traces(hovertemplate=hover_template)

    # Set margins to 25 on all sides
    fig.update_layout(margin=dict(l=35, r=35, t=45, b=35))
    
    # Adjust the width of the visualization
    fig.update_layout(width=1000) 

    fig.show()

# Call the function with your GeoJSON and DataFrame
create_choropleth_map(geojson_df=hexagon_df, data_df=grouped_df)