In [1]:
import pandas as pd
initial_data = pd.read_csv ('combined_data_21_01_2025.csv')

In [2]:
# Group by 'device_id' and count the number of rows for each device
device_counts = initial_data.groupby('device_id').size()

# Filter out device_ids with fewer than 10 data entries
valid_device_ids = device_counts[device_counts >= 10].index

# Filter the original DataFrame to keep only rows with valid device_ids
initial_data = initial_data[initial_data['device_id'].isin(valid_device_ids)]

In [3]:
from geopy.geocoders import Nominatim
from shapely.geometry import Point
from shapely.ops import transform
import pyproj
from shapely import wkt

# Function to get coordinates of the building address
def get_coordinates(address):
    geolocator = Nominatim(user_agent="Buffer_Creation")
    location = geolocator.geocode(address)
    if location:
        return location.latitude, location.longitude
    else:
        raise ValueError(f"Address '{address}' not found.")

# Function to filter out rows within the buffer
def filter_within_buffer(initial_data_w, address, radius_m):
    # Get the building's coordinates (latitude, longitude)
    building_lat, building_lon = get_coordinates(address)
    
    # Create a Point (building location)
    building_point = Point(building_lon, building_lat)
    
    # Set up the projection for UTM (meter-based projection)
    proj_wgs84 = pyproj.CRS('EPSG:4326')  # WGS84 (lat/lon)
    proj_utm = pyproj.CRS('EPSG:32632')  # UTM zone 32N (adjust for your location)

    # Transform the building point to UTM (to get meter-based coordinates)
    transformer = pyproj.Transformer.from_crs(proj_wgs84, proj_utm, always_xy=True)
    building_point_utm = transform(transformer.transform, building_point)

    # Create a buffer in meters (UTM system uses meters)
    building_buffer = building_point_utm.buffer(radius_m)

    # Ensure the geometry column is in the correct format (Shapely geometries)
    def safe_wkt_load(x):
        if isinstance(x, str):  # Only try to load WKT strings
            try:
                return wkt.loads(x)
            except:
                return None  # Return None if the WKT is invalid
        return None  # Return None for non-string entries

    # Apply the safe WKT loading function
    initial_data_w['geometry'] = initial_data_w['geometry'].apply(safe_wkt_load)

    # Reproject the geometry column to UTM
    initial_data_w['geometry_utm'] = initial_data_w['geometry'].apply(lambda point: transform(transformer.transform, point) if point is not None else None)
    
    # Filter rows where the UTM geometry is outside the buffer
    filtered_data = initial_data_w[~initial_data_w['geometry_utm'].apply(lambda point: building_buffer.contains(point) if point is not None else False)]

    # Drop the 'geometry_utm' column as it's no longer needed
    filtered_data = filtered_data.drop(columns=['geometry_utm'])
    
    return filtered_data

# Example usage
address = "Von-Steuben-Straße 21, 48143 Münster"
initial_data_w = initial_data.copy(deep=True)  # Explicitly create a deep copy of the DataFrame

# Apply the filter to get bike data within the buffer
atrai_bike_data = filter_within_buffer(initial_data_w, address, radius_m=15)

In [4]:
atrai_bike_data_dngr = atrai_bike_data[['createdAt', 'Overtaking Manoeuvre', 'Overtaking Distance', 'Standing', 'geometry', 'device_id', 'lng', 'lat']]
atrai_bike_data_dngr_PM = atrai_bike_data[['createdAt', 'Overtaking Manoeuvre', 'Overtaking Distance', 'Rel. Humidity', 'Finedust PM1', 'Finedust PM2.5', 'Finedust PM4', 'Finedust PM10', 'geometry', 'device_id', 'lng', 'lat']]

In [5]:
atrai_bike_data_dngr_filtered = atrai_bike_data_dngr.dropna().loc[atrai_bike_data_dngr['Standing'] >= 0.75]

In [6]:
# Define geographical bounds for Münster
min_lat, max_lat = 51.5, 52.5
min_lon, max_lon = 7.4, 7.9

# Filter the data to only include points within Münster
atrai_bike_dngr_MS = atrai_bike_data_dngr[
    (atrai_bike_data_dngr['lat'] >= min_lat) & (atrai_bike_data_dngr['lat'] <= max_lat) &
    (atrai_bike_data_dngr['lng'] >= min_lon) & (atrai_bike_data_dngr['lng'] <= max_lon)
].copy()

In [7]:
import folium
from folium.plugins import HeatMap

# Define the center of the map (e.g., Münster, Germany)
map_center = [51.9625, 7.6256]  # Latitude and longitude of Münster

# Create a Folium map
m_om = folium.Map(location=map_center, zoom_start=13)

# Prepare data for the heatmap
# Extract necessary columns: 'lat', 'lng', and optionally a weight column like 'Standing'
heatmap_data_om = atrai_bike_data_dngr[['lat', 'lng', 'Overtaking Manoeuvre']].dropna()

# Add the heatmap layer to the map
HeatMap(
    data=heatmap_data_om.values,  # Provide lat, lng, and weight (if available)
    radius=15,                 # Adjust the radius for sensitivity
    blur=15,                   # Adjust the blur for better visuals
    max_zoom=1                 # Max zoom level for rendering
).add_to(m_om)

m_om.save("manoeuvre_overtaking.html")

In [8]:
# Create the map
m_od = folium.Map(location=map_center, zoom_start=13)

# Data for the heatmap
heatmap_data_od = atrai_bike_data_dngr[['lat', 'lng', 'Overtaking Distance']].dropna()

# Reverse the 'Overtaking Distance' weights
heatmap_data_od['Reversed Distance'] = 1 - (
    (heatmap_data_od['Overtaking Distance'] / (heatmap_data_od['Overtaking Distance'].max()))
)

# Convert the data into the required format for HeatMap
heatmap_data_list_od = heatmap_data_od[['lat', 'lng', 'Reversed Distance']].values.tolist()

# Add the reversed heatmap layer
HeatMap(
    data=heatmap_data_list_od,  # Use the list of [lat, lng, weight]
    radius=15,               # Adjust the radius for sensitivity
    blur=15,                 # Adjust the blur for better visuals
    max_zoom=1               # Max zoom level for rendering
).add_to(m_od)

m_od.save("distance_overtaking.html")

In [9]:
# Assuming 'atray_bike_data_dngr' contains the data with 'Overtaking Manoeuvre' and 'Overtaking Distance'
# Normalize 'Overtaking Distance' (e.g., max distance = 100)
max_distance = atrai_bike_data_dngr['Overtaking Distance'].max()
atrai_bike_data_dngr_zones = atrai_bike_data_dngr.copy() 

# Normalize Overtaking Distance to a 0-1 range
atrai_bike_data_dngr_zones['Normalized Distance'] = 1 - (atrai_bike_data_dngr_zones['Overtaking Distance'] / max_distance)

# Set weights for the two factors (adjust these based on importance)
alpha = 0.3  # Weight for Overtaking Manoeuvre
beta = 0.7  # Weight for Overtaking Distance

# Calculate Risk Index
atrai_bike_data_dngr_zones['Risk Index Overtaking'] = (alpha * atrai_bike_data_dngr_zones['Overtaking Manoeuvre'] +
                                      beta * atrai_bike_data_dngr_zones['Normalized Distance'])

# Define the center of the map (e.g., Münster, Germany)
map_center = [51.9625, 7.6256]  # Latitude and longitude of Münster

# Create a Folium map
m_dz = folium.Map(location=map_center, zoom_start=13)

# Prepare data for the heatmap with the Risk Index as weight
heatmap_data_dz = atrai_bike_data_dngr_zones[['lat', 'lng', 'Risk Index Overtaking']].dropna()

# Add the heatmap layer to the map
HeatMap(
    data=heatmap_data_dz.values,  # Provide lat, lng, and the Risk Index as weight
    radius=15,                   # Adjust radius based on map scale
    blur=15,                      # Adjust blur for better visuals
    max_zoom=1                   # Max zoom level for rendering
).add_to(m_dz)

m_dz.save("dangerzones.html")

In [10]:
import geopandas as gpd
import folium
from shapely.geometry import Point

# Load your DataFrame (assuming it's already in a variable called `df`)

# Step 1: Filter the DataFrame
dngr_points = atrai_bike_data_dngr[(atrai_bike_data_dngr['Overtaking Manoeuvre'] >= 0.5) & (atrai_bike_data_dngr['Overtaking Distance'] <= 150)].copy()
dngr_points = dngr_points.dropna(subset=['lat', 'lng'])


# Step 3: Create a map centered around Münster
# Münster coordinates: 51.9607° N, 7.6261° E
map_dngr_points = folium.Map(location=[51.9607, 7.6261], zoom_start=13)

# Step 4: Add points to the map
for _, row in dngr_points.iterrows():
    folium.CircleMarker(
        location=[row['lat'], row['lng']],  # Latitude and Longitude
        radius=3,  # Marker size
        color="red",
        fill=True,
        fill_opacity=1,
    ).add_to(map_dngr_points)

map_dngr_points.save("Dangerous_Points.html")  # Save as an HTML file

In [11]:
import numpy as np

atrai_bike_data_dngr_zones_PM = atrai_bike_data_dngr_PM.copy()
atrai_bike_data_dngr_zones_PM = atrai_bike_data_dngr_zones_PM.dropna()

atrai_bike_data_dngr_zones_PM = atrai_bike_data_dngr_zones_PM[
    (atrai_bike_data_dngr_zones_PM['Rel. Humidity'] <= 75) & 
    (atrai_bike_data_dngr_zones_PM['Rel. Humidity'].notna())
]

# Function to remove outliers using the IQR method

def replace_outliers_with_nan_by_device(atrai_bike_data_dngr_zones_PM, column):
    # Group by 'device_id' (sensebox) and apply the IQR calculation to each group
    def calculate_and_replace_outliers(group):
        # Calculate Q1, Q3, and IQR for the current column (outlier detection)
        Q1 = group[column].quantile(0.25)
        Q3 = group[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR

        # Replace values outside the bounds with NaN
        group[column] = group[column].apply(lambda x: x if lower_bound <= x <= upper_bound else np.nan)
        return group

    # Apply the outlier replacement to each group (grouped by 'device_id')
    atrai_bike_data_dngr_zones_PM = atrai_bike_data_dngr_zones_PM.groupby('device_id', group_keys=False).apply(calculate_and_replace_outliers)
    
    return atrai_bike_data_dngr_zones_PM

# List of columns for PM values to check for outliers
pm_columns = ['Finedust PM1', 'Finedust PM2.5', 'Finedust PM4', 'Finedust PM10']

# Loop through each column and replace outliers for each PM column
#for column in pm_columns:
#    atrai_bike_data_dngr_zones_PM = replace_outliers_with_nan_by_device(atrai_bike_data_dngr_zones_PM, column)

# Normalize 'Overtaking Distance'
max_distance = atrai_bike_data_dngr_zones_PM['Overtaking Distance'].max()
atrai_bike_data_dngr_zones_PM['Normalized Distance'] = 1 - (atrai_bike_data_dngr_zones_PM['Overtaking Distance'] / max_distance)

# Normalize PM values after outlier removal
max_pm1 = atrai_bike_data_dngr_zones_PM['Finedust PM1'].max()
max_pm2_5 = atrai_bike_data_dngr_zones_PM['Finedust PM2.5'].max()
max_pm4 = atrai_bike_data_dngr_zones_PM['Finedust PM4'].max()
max_pm10 = atrai_bike_data_dngr_zones_PM['Finedust PM10'].max()

atrai_bike_data_dngr_zones_PM['Normalized PM1'] = atrai_bike_data_dngr_zones_PM['Finedust PM1'] / max_pm1
atrai_bike_data_dngr_zones_PM['Normalized PM2.5'] = atrai_bike_data_dngr_zones_PM['Finedust PM2.5'] / max_pm2_5
atrai_bike_data_dngr_zones_PM['Normalized PM4'] = atrai_bike_data_dngr_zones_PM['Finedust PM4'] / max_pm4
atrai_bike_data_dngr_zones_PM['Normalized PM10'] = atrai_bike_data_dngr_zones_PM['Finedust PM10'] / max_pm10

# Set weights for the factors (adjust based on importance)
alpha = 0.15  # Weight for Overtaking Manoeuvre
beta = 0.35  # Weight for Overtaking Distance
gamma1 = 0.2  # Weight for PM1
gamma2 = 0.15  # Weight for PM2.5
gamma3 = 0.1  # Weight for PM4
gamma4 = 0.05  # Weight for PM10

# Calculate the Risk Index by combining all factors
atrai_bike_data_dngr_zones_PM['Risk Index'] = (
    alpha * atrai_bike_data_dngr_zones_PM['Overtaking Manoeuvre'] +
    beta * atrai_bike_data_dngr_zones_PM['Normalized Distance'] +
    gamma1 * atrai_bike_data_dngr_zones_PM['Normalized PM1'] +
    gamma2 * atrai_bike_data_dngr_zones_PM['Normalized PM2.5'] +
    gamma3 * atrai_bike_data_dngr_zones_PM['Normalized PM4'] +
    gamma4 * atrai_bike_data_dngr_zones_PM['Normalized PM10']
)

# Define the center of the map (e.g., Münster, Germany)
map_center = [51.9625, 7.6256]  # Latitude and longitude of Münster

# Create a Folium map
m_dz_PM = folium.Map(location=map_center, zoom_start=13)

# Prepare data for the heatmap with the Risk Index as weight
heatmap_data_dz_PM = atrai_bike_data_dngr_zones_PM[['lat', 'lng', 'Risk Index']]

heatmap_data_dz_PM = heatmap_data_dz_PM.dropna()

# Add the heatmap layer to the map
HeatMap(
    data=heatmap_data_dz_PM.values,  # Provide lat, lng, and the Risk Index as weight
    radius=15,                   # Adjust radius based on map scale
    blur=15,                     # Adjust blur for better visuals
    max_zoom=1                   # Max zoom level for rendering
).add_to(m_dz_PM)

m_dz_PM.save("PM_dangerzones.html")