In [1]:
import vaex
import pandas as pd
import numpy as np
import geopandas as gp
import folium
from shapely import Polygon
import contextily as cx
import h3

In [2]:
df_trips = vaex.open('data/trips/trips_prepared.hdf5')

In [3]:
# Method to get h3 polygons
def convert_hex_to_polygon(hex):
        polygon = None
        if hex:
               polygon = Polygon(h3.h3_to_geo_boundary(hex, geo_json=True))         
        return polygon

In [4]:
# Get GeoDataFrame with count for pickup or dropoff for every hexagon of the selected resolution
def getRidesHexagon(df, pickup, resolution):
    # Decide for Pickup or dropoff
    var = "dropoff"
    if pickup:
        var = "pickup"
    
    # Groupby count
    x = df.groupby(by=var+"_hex_" + str(resolution), progress=True).agg({ 'ts_start':'count' }).to_pandas_df()

    # Warum ist dieser Schritt Nötig? @Leo: Warum gibt es Fahrten ohne Hexagon Zuordnung?
    x = x[(x[var+"_hex_" + str(resolution)].notna()) & (x[var+"_hex_" + str(resolution)] != "None")]

    # Add Polygons
    x["geometry"] = x[var+"_hex_" + str(resolution)].apply(convert_hex_to_polygon)

    #Creating proper GeoDataFrame
    x = x.reset_index().set_index(var+"_hex_" + str(resolution)).rename(columns = {"ts_start": "count"}).drop(columns = ["index"])
    gdf = gp.GeoDataFrame(x, geometry='geometry', crs = "EPSG:4326")
    
    return gdf
    


In [5]:
def plotRidesHexagon(gdf, column, text):
    # Preperations for choropleth-plotting
    gdf.index.name = None
    gdf = gdf.reset_index()
    gjson = gdf.to_json()

    # Plotting with Folium
    m = folium.Map([41.88065, -87.62642], zoom_start=10,width=700,height=700)
    folium.Choropleth(
        geo_data=gjson,
        name="choropleth",
        data=gdf,
        columns=["index", column],  #Here we tell folium to get the county fips and plot new_cases_7days metric for each county
        key_on='feature.properties.index', #Here we grab the geometries/county boundaries from the geojson file using the key 'coty_code' which is the same as county fips
        fill_opacity=0.7,
        line_opacity=0.2,
        fill_color="YlGn",
        legend_name=text, #title of the legend
        line_color='black').add_to(m)
    return m


In [22]:
gdf = getRidesHexagon(df_trips, True, 7)
plotRidesHexagon(gdf, "count", "Rides with pickup location in hexgon")

groupby [########################################] 100.00% elapsed time  :     1.55s =  0.0m =  0.0h
 

In [23]:
gdf = getRidesHexagon(df_trips, False, 7)
plotRidesHexagon(gdf, "count", "Rides with dropoff location in hexgon")

groupby [########################################] 100.00% elapsed time  :     1.43s =  0.0m =  0.0h
 

In [24]:
gdf = getRidesHexagon(df_trips, True, 7)
gdf["logCount"] = np.log(gdf['count'])
plotRidesHexagon(gdf, "logCount", "Natural Logarithm of rides with pickup location in hexgon")

groupby [########################################] 100.00% elapsed time  :     1.50s =  0.0m =  0.0h
 

In [25]:
gdf = getRidesHexagon(df_trips, False, 7)
gdf["logCount"] = np.log(gdf['count'])
plotRidesHexagon(gdf, "logCount", "Natural Logarithm of rides with dropoff location in hexgon")

groupby [########################################] 100.00% elapsed time  :     1.49s =  0.0m =  0.0h 
 

In [26]:
gdf = getRidesHexagon(df_trips, True, 7)
gdf["logCount"] = np.log2(gdf['count'])
plotRidesHexagon(gdf, "logCount", "Log2 of count of rides with pickup location in hexgon")

groupby [########################################] 100.00% elapsed time  :     1.46s =  0.0m =  0.0h
 

In [27]:
gdf = getRidesHexagon(df_trips, False, 7)
gdf["logCount"] = np.log2(gdf['count'])
plotRidesHexagon(gdf, "logCount", "Log2 of count of rides with dropoff location in hexgon")

groupby [########################################] 100.00% elapsed time  :     1.50s =  0.0m =  0.0h
 

In [31]:
gdf = getRidesHexagon(df_trips, False, 8)
plotRidesHexagon(gdf, "count", "count of rides with dropoff location in hexgon")

groupby [########################################] 100.00% elapsed time  :     1.57s =  0.0m =  0.0h
 

In [30]:
gdf = getRidesHexagon(df_trips, False, 8)
gdf["logCount"] = np.log2(gdf['count'])
plotRidesHexagon(gdf, "logCount", "Log2of count of rides with dropoff location in hexgon")

groupby [########################################] 100.00% elapsed time  :     1.45s =  0.0m =  0.0h 
 

### Offene Fragen

- Warum gibt es für manche Rides None Werte in den Hexogon Spalten, heißt kein Hexgon für die Start oder Dropoff Location? Wenn es eine Dropoff Location gibt, warum kann es dann sein, dass es kein Hexogon zugeteilt bekommt?
- Warum zwei verschiedene "None" Werte? (Einmal als None und einmal als "None")

- 
- 

- Ist es richtig, dass in manchen Hexagons keine Rides gestartet sind?