In [57]:
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
from folium.plugins import HeatMap
import folium
import plotly.express as px
import pydeck as pdk
import streamlit as st

##### Join two datasets and have pickup and drop off details with lat/lon

In [58]:
table1 = pd.read_excel(r"C:\Projects\VGI Challenge\vgi_hackathon_2024\FLEXI_bus_stops.xls")
table2 = pd.read_excel(r"C:\Projects\VGI Challenge\vgi_hackathon_2024\FLEXI_trip_data.xls")

merged_data = table2.merge(table1, left_on="Pickup ID", right_on="index") \
                    .merge(table1, left_on="Dropoff ID", right_on="index", suffixes=('', '_dropoff')) \
                    .rename(columns={
                        "index": "pickup_index",
                        "name": "pickup_name",
                        "district": "pickup_district",
                        "latitude": "pickup_latitude",
                        "longitude": "pickup_longitude"
                    }) \
                    .drop(columns=["Pickup ID", "Dropoff ID"])


In [59]:
table2.info()
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3838 entries, 0 to 3837
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Booking ID           3838 non-null   int64         
 1   Status               3838 non-null   object        
 2   Passenger status     3838 non-null   object        
 3   Passengers           3838 non-null   int64         
 4   Pickup ID            3838 non-null   int64         
 5   Dropoff ID           3838 non-null   int64         
 6   Actual Pickup Time   3838 non-null   datetime64[ns]
 7   Actual Dropoff Time  3838 non-null   datetime64[ns]
dtypes: datetime64[ns](2), int64(4), object(2)
memory usage: 240.0+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3434 entries, 0 to 3433
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Booking ID           3434 n

In [60]:
merged_data['Actual Pickup Time'] = pd.to_datetime(merged_data['Actual Pickup Time'])
merged_data['Actual Dropoff Time'] = pd.to_datetime(merged_data['Actual Dropoff Time'])
merged_data['Pickup Hour'] = merged_data['Actual Pickup Time'].dt.hour
merged_data['Pickup Day'] = merged_data['Actual Pickup Time'].dt.dayofweek  # 0 = Monday, 1= Tuesday, 2 = Wednesday ... 

In [61]:
merged_data.head()

Unnamed: 0,Booking ID,Status,Passenger status,Passengers,Actual Pickup Time,Actual Dropoff Time,pickup_index,pickup_name,pickup_district,pickup_latitude,pickup_longitude,index_dropoff,name_dropoff,district_dropoff,latitude_dropoff,longitude_dropoff,Pickup Hour,Pickup Day
0,253212,Cancelled by client,Cancelled,1,2024-09-01 08:18:00,2024-09-01 08:34:00,40,"Irfersdorf, Am Kirchplatz",Irfersdorf,48.994215,11.461103,0,"Kinding, Bahnhof",Kinding,48.992168,11.377365,8,6
1,253369,Cancelled by client,Cancelled,3,2024-09-01 08:46:00,2024-09-01 09:09:00,66,"Wiesenhofen, Kirche",Litterzhofen,49.043035,11.412738,0,"Kinding, Bahnhof",Kinding,48.992168,11.377365,8,6
2,253808,Cancelled by driver,Cancelled,2,2024-09-01 08:56:00,2024-09-01 09:09:00,30,"Beilngries, Frauenkirche",Beilngries,49.036378,11.470632,0,"Kinding, Bahnhof",Kinding,48.992168,11.377365,8,6
3,253782,Validated,Trip completed,2,2024-09-01 09:14:00,2024-09-01 09:25:00,40,"Irfersdorf, Am Kirchplatz",Irfersdorf,48.994215,11.461103,21,"Beilngries, Deutscher Hof",Beilngries,49.033525,11.475793,9,6
4,252382,Cancelled by driver,Cancelled,1,2024-09-01 09:41:00,2024-09-01 09:49:00,46,"Aschbuch, Waldsiedlung",Aschbuch,48.976207,11.491877,22,"Beilngries, Kelheimer Stra√üe",Beilngries,49.032928,11.479163,9,6


In [62]:
#Find most common drop off for each pickup location

pickup_demand_counts = (
                        merged_data.groupby(['pickup_index']) \
                        .size() \
                        .reset_index(name = "count"))

# most_common_dropoff = pickup_dropoff_counts.loc[
#             pickup_dropoff_counts.groupby('pickup_index')['count'].idxmax()
# ]

pickup_demand_counts

Unnamed: 0,pickup_index,count
0,0,680
1,1,72
2,2,20
3,3,29
4,4,70
...,...,...
59,64,73
60,65,6
61,66,22
62,67,29


In [63]:
#Number of passengers for each passenger

passenger_count_by_status = merged_data.groupby('Status')['Passengers'].sum().reset_index()

passenger_count_by_status

Unnamed: 0,Status,Passengers
0,Cancelled by call center,5
1,Cancelled by client,1320
2,Cancelled by driver,541
3,Changed by admin,8
4,Changed by client,5
5,Validated,2473


In [64]:
#Number of passenger for each day and each hour

passenger_count_by_day_hour_pickup = (
    merged_data.groupby(['Pickup Day', 'Pickup Hour', 'pickup_index'])['Passengers']
    .sum()
    .reset_index()
)

passenger_count_by_day_hour_pickup.sort_values(by='Passengers', ascending=False)

Unnamed: 0,Pickup Day,Pickup Hour,pickup_index,Passengers
1362,5,20,0,35
183,0,16,0,29
1370,5,21,0,27
146,0,14,0,26
1159,4,20,0,25
...,...,...,...,...
710,3,6,53,1
712,3,6,59,1
714,3,7,3,1
716,3,7,8,1


In [65]:
# Group by Pickup Demand
pickup_demand = merged_data.groupby(['pickup_name', 'pickup_latitude', 'pickup_longitude'])['Passengers'].sum().reset_index(name='demand')

# Interactive map heatmap
map_center = [pickup_demand['pickup_latitude'].mean(), pickup_demand['pickup_longitude'].mean()]
map_ = folium.Map(location=map_center, zoom_start=10)
HeatMap(data=pickup_demand[['pickup_latitude', 'pickup_longitude', 'demand']].values, radius=15).add_to(map_)
map_.save("pickup_demand_heatmap.html")

In [66]:
#hourly demand
hourly_demand = merged_data.groupby('Pickup Hour').size().reset_index(name='demand')

hourly_demand

Unnamed: 0,Pickup Hour,demand
0,5,117
1,6,166
2,7,179
3,8,207
4,9,145
5,10,143
6,11,186
7,12,195
8,13,219
9,14,323


In [67]:
merged_data

Unnamed: 0,Booking ID,Status,Passenger status,Passengers,Actual Pickup Time,Actual Dropoff Time,pickup_index,pickup_name,pickup_district,pickup_latitude,pickup_longitude,index_dropoff,name_dropoff,district_dropoff,latitude_dropoff,longitude_dropoff,Pickup Hour,Pickup Day
0,253212,Cancelled by client,Cancelled,1,2024-09-01 08:18:00,2024-09-01 08:34:00,40,"Irfersdorf, Am Kirchplatz",Irfersdorf,48.994215,11.461103,0,"Kinding, Bahnhof",Kinding,48.992168,11.377365,8,6
1,253369,Cancelled by client,Cancelled,3,2024-09-01 08:46:00,2024-09-01 09:09:00,66,"Wiesenhofen, Kirche",Litterzhofen,49.043035,11.412738,0,"Kinding, Bahnhof",Kinding,48.992168,11.377365,8,6
2,253808,Cancelled by driver,Cancelled,2,2024-09-01 08:56:00,2024-09-01 09:09:00,30,"Beilngries, Frauenkirche",Beilngries,49.036378,11.470632,0,"Kinding, Bahnhof",Kinding,48.992168,11.377365,8,6
3,253782,Validated,Trip completed,2,2024-09-01 09:14:00,2024-09-01 09:25:00,40,"Irfersdorf, Am Kirchplatz",Irfersdorf,48.994215,11.461103,21,"Beilngries, Deutscher Hof",Beilngries,49.033525,11.475793,9,6
4,252382,Cancelled by driver,Cancelled,1,2024-09-01 09:41:00,2024-09-01 09:49:00,46,"Aschbuch, Waldsiedlung",Aschbuch,48.976207,11.491877,22,"Beilngries, Kelheimer Stra√üe",Beilngries,49.032928,11.479163,9,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3429,270319,Cancelled by driver,Cancelled,3,2024-09-30 20:55:00,2024-09-30 21:42:00,0,"Kinding, Bahnhof",Kinding,48.992168,11.377365,64,"Plankstetten, Biberbacher Stra√üe",Biberbach,49.068532,11.455142,20,0
3430,268219,Validated,Trip completed,1,2024-09-30 21:20:00,2024-09-30 21:23:00,4,"Kinding, Marktplatz",Kinding,49.000562,11.382790,1,"Enkering, Maibaum",Enkering,48.992498,11.363953,21,0
3431,269630,Cancelled by client,Cancelled,1,2024-09-30 21:25:00,2024-09-30 21:33:00,2,"Enkering, Feuerwehrhaus",Enkering,48.993652,11.361228,10,"Pfraundorf, Dorfplatz",Pfraundorf,49.005142,11.445253,21,0
3432,269524,Cancelled by client,Cancelled,1,2024-09-30 21:37:00,2024-09-30 21:47:00,19,"Beilngries, Ringstrasse",Beilngries,49.033832,11.471982,42,"Grampersdorf, Landstrasse",Grampersdorf,48.970778,11.476563,21,0


In [68]:
map_center = [merged_data['pickup_latitude'].mean(), merged_data['pickup_longitude'].mean()]

hex_layer = pdk.Layer(
    "HexagonLayer",
    data=merged_data,
    get_position=["pickup_longitude", "pickup_latitude"],
    radius=150, 
    elevation_scale=30,  
    elevation_range=[0, 100],  
    extruded=True,
    pickable=True,
    coverage=0.5,  
)

view_state = pdk.ViewState(
    latitude=map_center[0],
    longitude=map_center[1],
    zoom=10,
    pitch=45  # angle to tilt the view
)

deck_map = pdk.Deck(
    layers=[hex_layer],
    initial_view_state=view_state,
    #map_style="mapbox://styles/mapbox/light-v9",
)

deck_map.to_html("3d_pickup_demand_heatmap.html")


In [69]:
map_center = [merged_data['latitude_dropoff'].mean(), merged_data['longitude_dropoff'].mean()]

hex_layer = pdk.Layer(
    "HexagonLayer",
    data=merged_data,
    get_position=["longitude_dropoff", "latitude_dropoff"],
    radius=150, 
    elevation_scale=30,  
    elevation_range=[0, 100],  
    extruded=True,
    pickable=True,
    coverage=0.5,  
)

view_state = pdk.ViewState(
    latitude=map_center[0],
    longitude=map_center[1],
    zoom=10,
    pitch=45  # angle to tilt the view
)

deck_map = pdk.Deck(
    layers=[hex_layer],
    initial_view_state=view_state,
    #map_style="mapbox://styles/mapbox/light-v9",
)

deck_map.to_html("3d_dropoff_demand_heatmap.html")

In [74]:
os.environ["MAPBOX_API_KEY"] = "pk.eyJ1Ijoiam5haXIiLCJhIjoiY20zYTh1bTRrMTdxbTJscjZyd2Jrbjk5aCJ9.4HXOmaBtz_1udpXKAAf9bA"


# Sidebar filters
st.sidebar.header("Filter Options")
min_demand = st.sidebar.slider("Minimum Demand", int(pickup_demand_counts['count'].min()), int(pickup_demand_counts['count'].max()), 0)
filtered_data = pickup_demand_counts[pickup_demand_counts['count'] >= min_demand]

# Define the 3D HexagonLayer
hex_layer = pdk.Layer(
    "HexagonLayer",
    data=merged_data,
    get_position=["pickup_longitude", "pickup_latitude"],
    radius=200,
    elevation_scale=50,
    elevation_range=[0, 100],
    extruded=True,
    coverage=1,
)

# Define the view settings for the map
view_state = pdk.ViewState(
    latitude=merged_data['pickup_latitude'].mean(),
    longitude=merged_data['pickup_longitude'].mean(),
    zoom=10,
    pitch=45,
)

# Create the PyDeck map
deck_map = pdk.Deck(
    layers=[hex_layer],
    initial_view_state=view_state,
    map_style="mapbox://styles/mapbox/light-v9",
    #mapbox_key=os.getenv("MAPBOX_API_KEY")
)

# Display the map in Streamlit
st.title("VGI-Flexi Demand Dashboard")
st.write("### 3D Heatmap of Demand")
st.pydeck_chart(deck_map)



DeltaGenerator()