In [32]:
!pip install vaex
!pip install pandas h3 folium geojson matplotlib



In [122]:
import pandas as pd
import h3
import folium
from folium import Map, Marker, GeoJson
import json
from geojson.feature import *

In [123]:
# constants
resolution = 7
city_coords = [41.8781, -87.6298]

In [124]:
df_cleaned = pd.read_csv('./data/cleaned_trips_with_location_small.csv')
print(df_cleaned.columns)

Index(['trip_id', 'taxi_id', 'trip_start_timestamp', 'trip_end_timestamp',
       'trip_seconds', 'trip_miles', 'pickup_census_tract',
       'dropoff_census_tract', 'pickup_community_area',
       'dropoff_community_area', 'fare', 'tips', 'tolls', 'extras',
       'trip_total', 'payment_type', 'company', 'pickup_centroid_latitude',
       'pickup_centroid_longitude', 'dropoff_centroid_latitude',
       'dropoff_centroid_longitude'],
      dtype='object')


In [125]:
# Step 1: For each pickup and drop-off calculate the correct hexagon in the resolution 7
df_cleaned['pickup_hex'] = df_cleaned.apply(
    lambda row: h3.geo_to_h3(row['pickup_centroid_latitude'], row['pickup_centroid_longitude'], resolution), axis=1)
df_cleaned['dropoff_hex'] = df_cleaned.apply(
    lambda row: h3.geo_to_h3(row['dropoff_centroid_latitude'], row['dropoff_centroid_longitude'], resolution), axis=1)

In [126]:
df_hex = df_cleaned[['pickup_hex', 'dropoff_hex']]
unique_hexes = df_hex.values.flatten()
unique_hexes = pd.Series(unique_hexes).unique()

hex_geo = []

for nh in unique_hexes:
    hex_geo.append({"type": "Polygon",
                    "coordinates": [h3.h3_to_geo_boundary(h=nh, geo_json=True)]
                    })

In [127]:
m = folium.Map(location=[41.8781, -87.6298], zoom_start=11)

list_features = []

for i in range(0, len(hex_geo) -1):
    feature = Feature(geometry = hex_geo[i],
                      id = unique_hexes[i],
                      properties = {"resolution": 7})
    list_features.append(feature)


feat_collection = FeatureCollection(list_features)
geojson_result = json.dumps(feat_collection)


GeoJson(
        geojson_result,
        style_function = lambda feature: {
            'fillColor': None,
            'color': "green",
            'weight': 2,
            'fillOpacity': 0.05
        },
        name = "Example"
    ).add_to(m)

<folium.features.GeoJson at 0x3de214730>

In [128]:
m

In [129]:
pickup_counts = df_cleaned.groupby('pickup_hex').size()
dropoff_counts = df_cleaned.groupby('dropoff_hex').size()


# Create DataFrame with hex IDs as columns and pickup and dropoff counts as rows
result_df = pd.DataFrame([pickup_counts, dropoff_counts],
                         index=['pickup_counts', 'dropoff_counts'])

# Fill NaN values with 0
result_df.fillna(0, inplace=True)

In [130]:
for nh in unique_hexes:
    count_pickup = result_df[nh]["pickup_counts"]
    count_dropoff = result_df[nh]["dropoff_counts"]

    folium.Marker(h3.h3_to_geo(nh),
                      popup=f"Pickups: {int(count_pickup)} - Dropoffs: {int(count_dropoff)}").add_to(m)

In [131]:
m