In [1]:
import pandas as pd
atrai_bike_data = pd.read_csv ('atrai_bike_data_ohne_Büro_21_01_25.csv')

# Group by 'device_id' and count the number of rows for each device
device_counts = atrai_bike_data.groupby('device_id').size()

# Filter out device_ids with fewer than 10 data entries
valid_device_ids = device_counts[device_counts >= 10].index

# Filter the original DataFrame to keep only rows with valid device_ids
atrai_bike_data = atrai_bike_data[atrai_bike_data['device_id'].isin(valid_device_ids)]

In [2]:
import osmnx as ox

# Download road network for Münster, Germany
road_network_muenster = ox.graph_from_place("Münster, Germany", network_type='bike')

# Get the nodes and edges (roads) of the network
nodes, edges = ox.graph_to_gdfs(road_network_muenster)

# Filter out 'service' and 'residential' roads
edges_filtered = edges[~edges['highway'].isin(['primary', 'secondary', 'tertiary'])]

edges_filtered = edges_filtered.to_crs(epsg=32632)

# Apply simplification
#edges_filtered.loc[:, 'geometry'] = edges_filtered['geometry'].apply(lambda x: x.simplify(tolerance=0.5))

# Remove roads shorter than a specified length (in meters, for example)
#edges_filtered = edges_filtered[edges_filtered['geometry'].length > 10]

edges_filtered = edges_filtered.to_crs(epsg=4326)

In [3]:
import folium
from shapely.geometry import Point, LineString
import geopandas as gpd
import numpy as np
from sklearn.neighbors import BallTree
import matplotlib.colors as mcolors
import matplotlib.cm as cm

# Function to find the nearest road segment (returns index of the segment)
def find_nearest_road_segment(point, road_network):
    distances = road_network.geometry.apply(lambda x: point.distance(x))
    nearest_idx = distances.idxmin()
    return nearest_idx  # Return the index instead of the name

# Define geographical bounds for Münster
min_lat, max_lat = 51.840, 52.061
min_lon, max_lon = 7.473, 7.775

# Filter bike data to include only points within Münster
filtered_data_MS = atrai_bike_data[
    (atrai_bike_data['lat'] >= min_lat) & (atrai_bike_data['lat'] <= max_lat) &
    (atrai_bike_data['lng'] >= min_lon) & (atrai_bike_data['lng'] <= max_lon)
].copy()

filtered_data_MS = filtered_data_MS[['createdAt', 'lat', 'lng', 'device_id', 'Overtaking Distance', 'Overtaking Manoeuvre']]
filtered_data_MS['createdAt'] = pd.to_datetime(filtered_data_MS['createdAt'])
filtered_data_MS = filtered_data_MS.dropna(subset=["Overtaking Distance"])

filtered_data_MS = filtered_data_MS[filtered_data_MS["Overtaking Manoeuvre"] > 0.05]
filtered_data_MS['Normalized Overtaking Distance'] = (atrai_bike_data['Overtaking Distance'] / 200).clip(upper=1) 

# Step 1: Reproject edges to a projected CRS (e.g., EPSG:3857)
edges_projected = edges_filtered.to_crs("EPSG:3857")

# Step 2: Calculate centroids in the projected CRS
centroids = edges_projected.geometry.centroid

# Step 3: Reproject centroids back to EPSG:4326 for correct visualization
centroids = centroids.to_crs("EPSG:4326")

# Step 4: Reproject the edges back to EPSG:4326 after centroid calculation
edges_filtered = edges_projected.to_crs("EPSG:4326")

# Reindex edges to ensure alignment with BallTree
edges_filtered = edges_filtered.reset_index(drop=True)

# BallTree for nearest-neighbor search
road_coords = np.deg2rad(np.array([
    centroids.x.values,
    centroids.y.values
]).T)
tree = BallTree(road_coords, metric='haversine')

# Filter bike data and convert coordinates to radians
bike_coords = np.deg2rad(filtered_data_MS[['lng', 'lat']].values)
_, indices = tree.query(bike_coords, k=1)
filtered_data_MS['road_segment'] = indices.flatten()

# Aggregate data
segment_data = filtered_data_MS.groupby('road_segment').agg(
    avg_dist=('Normalized Overtaking Distance', 'mean'),
    points_in_segment=('road_segment', 'size')
).reset_index()

# Filter sparse segments
#segment_data = segment_data[segment_data['points_in_segment'] > 5]

# Set up color map
cmap = cm.get_cmap("RdYlGn")

# Create the folium map
m_distance = folium.Map(location=[51.95, 7.63], zoom_start=14)

segment_data['avg_distance_unnorm'] = (segment_data['avg_dist'] * 200)

# Add road segments to the map
for _, row in segment_data.iterrows():
    road_segment_idx = row['road_segment']
    road_segment = edges_filtered.loc[road_segment_idx]  # Use index to retrieve the segment
    
    if not road_segment.geometry.is_empty:
        line = road_segment.geometry
        color = mcolors.to_hex(cmap(row['avg_dist']))
        tooltip_text = f"Data Points: {row['points_in_segment']}<br>Avg Distance: {row['avg_distance_unnorm']:.2f} cm"
            
        folium.PolyLine(
                locations=[(lat, lng) for lng, lat in line.coords],  # Convert LineString to (lat, lng)
                color=color,
                weight=4,
                tooltip=folium.Tooltip(tooltip_text)
            ).add_to(m_distance)

m_distance.save ("Distance_Flow_Map_Münster.html")

  cmap = cm.get_cmap("RdYlGn")
