In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import DBSCAN
df = pd.read_csv("23608577_probe_data.csv")

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,heading,latitude,longitude,traceid,sampledate,speed,filename
0,0,0.0,53.465914,9.955577,QZCVJ3euSqmIImQxM493jg,2024-07-08T09:59:04Z,1.0,1720418400000-probes608.csv
1,1,98.0,53.46591,9.955598,QZCVJ3euSqmIImQxM493jg,2024-07-08T09:59:05Z,2.0,1720418400000-probes608.csv
2,2,92.0,53.465907,9.955601,QZCVJ3euSqmIImQxM493jg,2024-07-08T09:59:06Z,2.0,1720418400000-probes608.csv
3,3,91.0,53.4659,9.95562,QZCVJ3euSqmIImQxM493jg,2024-07-08T09:59:07Z,3.0,1720418400000-probes608.csv
4,4,101.0,53.465892,9.955652,QZCVJ3euSqmIImQxM493jg,2024-07-08T09:59:08Z,6.0,1720418400000-probes608.csv


In [None]:
filtered_ids = df.groupby('traceid').filter(lambda group: (group['speed'] < 3).all())['traceid'].unique()
# filtered_ids

array(['9qdiOHxqRjmWqTnNY6wLfg', 'wrF1nmICTiOa88QQgW8eQ',
       'GhflD8lXTa6LlvLbqCyZKQ', 'Dr67oMKR8eMTfR30tVaZQ',
       'Itg7YAMZQdGFnWVfXSHYVQ', 'UlkZwyn2Rjeq1yutqnS4gA',
       'vrdTgbmvT0GRH6ieNgvy3A', '614FxYwxQiegdZk8gJ06UQ',
       'VxnHZ8SUOOpfKdUxFDrg', 'mcBJ3P7tTaWZvQue3lzkA',
       'veNkO1KmRXiOAs65Rydw', '65wMcw0qSM2ClaXdxjN5w',
       '18fN7O0uQZG6iAfQTCu9w', 'U4BiyFvOTuqSdY6ee1y9w',
       'LJuGFMfPSGlf9thnBAymQ', '5COvz4HISd2QPLkEK37yLA',
       'IjHhy6h4TfC29bPixMJ2Sw', 'Ea8nownvSdiCnE22YmClmQ',
       'R6VzXlxQGqiAItw7udW8g', 'NV94cuJ4REaucVaU8BkZww',
       'QAHoraL9SYiNUBY6Bbw80Q', 'TSt5hFQ0RUHJePL08cCjA',
       'eCxfUaAQD2k6Vb896iMJA', 'F7sloS4WQAe2d34RwK2Oiw',
       'L0Eyw8KKR26tc3wfHQuCuQ', 'S9tPre65SemWwxojMgdBsA'], dtype=object)

In [None]:
def find_slowdown(df, threshold=10, decel=-2):
  df['prev_speed'] = df.groupby('traceid')['speed'].shift(1)
  df['time_diff'] = (df['sampledate'] - df.groupby('traceid')['sampledate'].shift(1)).dt.total_seconds()
  df['acc'] = (df['speed'] - df['prev_speed']) / df['time_diff']

  slowdowns = df[(df['speed'] < threshold) & (df['acceleration'] < decel)].copy()
  return slowdowns

In [None]:
def load_probe_data(file_path):
    """
    Load probe data from CSV file
    Expected columns: traceid, sampledate, latitude, longitude, speed, heading
    """
    df = pd.read_csv(file_path)

    # Convert sampledate to datetime if needed
    if not pd.api.types.is_datetime64_any_dtype(df['sampledate']):
        df['sampledate'] = pd.to_datetime(df['sampledate'])

    # Sort by traceid and sampledate
    df = df.sort_values(['traceid', 'sampledate'])

    return df

# Step 2: Identify speed changes and slowdowns
def identify_slowdowns(df, speed_threshold=10, deceleration_threshold=-2):
    # Calculate speed change between consecutive points for each vehicle
    df['prev_speed'] = df.groupby('traceid')['speed'].shift(1)
    df['time_diff'] = (df['sampledate'] - df.groupby('traceid')['sampledate'].shift(1)).dt.total_seconds()

    # Calculate acceleration/deceleration (km/h per second)
    df['acceleration'] = (df['speed'] - df['prev_speed']) / df['time_diff']

    # Identify slowdown events
    slowdowns = df[
        (df['speed'] < speed_threshold) &
        (df['acceleration'] < deceleration_threshold)
    ].copy()

    return slowdowns

In [None]:
def cluster_slowdowns(slowdowns, eps=0.0001, min_samples=5):
  coords = slowdowns[['latitude', 'longitude']].values
  clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(coords)
  slowdowns['cluster'] = clustering.labels_
  clustered_slowdowns = slowdowns[slowdowns['cluster'] >= 0]
  return clustered_slowdowns

In [None]:
def analyze_clusters(clustered_slowdowns):
    # Group by cluster and aggregate statistics
    cluster_stats = clustered_slowdowns.groupby('cluster').agg({
        'traceid': 'nunique',  # Number of unique vehicles slowing down
        'sampledate': ['min', 'max'],  # Time range of slowdowns
        'latitude': ['mean', 'std'],  # Central location and spread
        'longitude': ['mean', 'std'],
        'speed': ['mean', 'min', 'count']  # Speed statistics and number of events
    })

    # Calculate time span of the cluster (in days)
    cluster_stats['time_span_days'] = (
        cluster_stats[('sampledate', 'max')] -
        cluster_stats[('sampledate', 'min')]
    ).dt.total_seconds() / (60 * 60 * 24)

    # Calculate events per day for each cluster
    cluster_stats['events_per_day'] = (
        cluster_stats[('speed', 'count')] /
        cluster_stats['time_span_days']
    )

    # Create a score for each cluster (this can be customized)
    cluster_stats['pedestrian_crossing_score'] = (
        cluster_stats[('traceid', 'nunique')] * 0.5 +  # More unique vehicles = higher score
        cluster_stats['events_per_day'] * 0.3 +           # More events per day = higher score
        (20 - cluster_stats[('speed', 'mean')]) * 0.2     # Lower mean speed = higher score
    )

    # Sort clusters by score
    sorted_clusters = cluster_stats.sort_values('pedestrian_crossing_score', ascending=False)

    # Prepare the results in a simpler format
    potential_crossings = pd.DataFrame({
        'cluster': sorted_clusters.index,
        'latitude': sorted_clusters[('latitude', 'mean')],
        'longitude': sorted_clusters[('longitude', 'mean')],
        'num_vehicles': sorted_clusters[('traceid', 'nunique')],
        'num_events': sorted_clusters[('speed', 'count')],
        'events_per_day': sorted_clusters['events_per_day'],
        'avg_speed': sorted_clusters[('speed', 'mean')],
        'score': sorted_clusters['pedestrian_crossing_score']
    })

    return potential_crossings

In [None]:
def visualize_potential_crossings(potential_crossings, original_slowdowns):
    # Create a map centered on the mean location
    map_center = [
        potential_crossings['latitude'].mean(),
        potential_crossings['longitude'].mean()
    ]

    m = folium.Map(location=map_center, zoom_start=14)

    # Add markers for each potential crossing
    for idx, row in potential_crossings.iterrows():
        popup_text = f"""
        <b>Potential Pedestrian Crossing</b><br>
        Score: {row['score']:.2f}<br>
        Vehicles: {row['num_vehicles']}<br>
        Events: {row['num_events']}<br>
        Events/Day: {row['events_per_day']:.2f}<br>
        Avg Speed: {row['avg_speed']:.2f} km/h
        """

        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=row['score'] / 2,  # Size based on score
            popup=folium.Popup(popup_text, max_width=300),
            color='red',
            fill=True,
            fill_color='red',
            fill_opacity=0.7
        ).add_to(m)

        # Add the original slowdown points for this cluster
        cluster_points = original_slowdowns[original_slowdowns['cluster'] == row['cluster']]
        for _, pt in cluster_points.iterrows():
            folium.CircleMarker(
                location=[pt['latitude'], pt['longitude']],
                radius=2,
                color='blue',
                fill=True,
                fill_opacity=0.5,
                opacity=0.5
            ).add_to(m)

    # Save the map
    m.save('potential_pedestrian_crossings.html')

    return m

In [None]:
def analyze_probe_data_for_pedestrian_crossings(probe_data_path,
                                              speed_threshold=10,
                                              deceleration_threshold=-2,
                                              eps=0.0001,
                                              min_samples=5):
    print("Loading probe data...")
    df = load_probe_data("23608577_probe_data.csv")

    print("Identifying slowdown events...")
    slowdowns = identify_slowdowns(df, speed_threshold, deceleration_threshold)


    print("Clustering slowdown locations...")
    clustered_slowdowns = cluster_slowdowns(slowdowns, eps, min_samples)

    print("Analyzing clusters for potential pedestrian crossings...")
    potential_crossings = analyze_clusters(clustered_slowdowns)

    # print("Visualizing results...")
    # map_vis = visualize_potential_crossings(potential_crossings, clustered_slowdowns)

    print(f"Analysis complete. Found {len(potential_crossings)} potential pedestrian crossing locations.")
    print("Top 10 locations by score:")
    print(potential_crossings.head(10)[['latitude', 'longitude', 'score', 'num_vehicles', 'events_per_day']])

    return potential_crossings, clustered_slowdowns

if __name__ == "__main__":
    probe_data_path = "path/to/your/probe_data.csv"
    results, clusters = analyze_probe_data_for_pedestrian_crossings(probe_data_path)

Loading probe data...
Identifying slowdown events...
Clustering slowdown locations...
Analyzing clusters for potential pedestrian crossings...
Analysis complete. Found 1937 potential pedestrian crossing locations.
Top 10 locations by score:
          latitude  longitude         score  num_vehicles  events_per_day
cluster                                                                  
782      53.438867   9.936947  77764.233333             3   259200.000000
1065     53.446088   9.994326  38883.366667             1   129600.000000
1766     53.510123   9.984984  16203.780000             1    54000.000000
1500     53.438597   9.957137  16203.740000             1    54000.000000
1931     53.485878   9.981121  11112.638095             1    37028.571429
1755     53.466052   9.993797  10803.980000             1    36000.000000
1916     53.485907   9.984205   9973.050769             1    33230.769231
1862     53.440721   9.936610   8103.900000             1    27000.000000
14       53.457736 

In [None]:
import geopandas as gpd
import pandas as pd

# Load the GeoJSON file
gdf = gpd.read_file('23608577_combined.geojson')

# Extract coordinates from geometry into separate columns
if 'Point' in str(gdf.geometry.iloc[0]):
    # For Point geometries
    gdf['longitude'] = gdf.geometry.x
    gdf['latitude'] = gdf.geometry.y
elif 'LineString' in str(gdf.geometry.iloc[0]) or 'Polygon' in str(gdf.geometry.iloc[0]):
    # For LineStrings or Polygons (first point only for example)
    gdf['coordinates'] = gdf.geometry.apply(lambda x: str(list(x.coords)))

# Drop the geometry column or keep it as string representation
gdf = gdf.drop(columns='geometry')

# Save to CSV
gdf.to_csv('output.csv', index=False)

In [None]:
import requests
import math

def lat_lon_to_tile(lat, lon, zoom):
    """
    Convert latitude and longitude to tile indices (x, y) at a given zoom level.

    :param lat: Latitude in degrees
    :param lon: Longitude in degrees
    :param zoom: Zoom level (0-19)
    :return: Tuple (x, y) representing the tile indices
    """
    # Convert latitude and longitude to radians
    lat_rad = math.radians(lat)
    lon_rad = math.radians(lon)

    # Calculate n (number of tiles at the given zoom level)
    n = 2.0 ** zoom

    # Calculate x and y tile indices
    x = int((lon_rad - (-math.pi)) / (2 * math.pi) * n)
    y = int((1 - math.log(math.tan(lat_rad) + 1 / math.cos(lat_rad)) / math.pi) / 2 * n)

    return (x, y)

def tile_coords_to_lat_lon(x, y, zoom):
    n = 2.0 ** zoom
    lon_deg = x / n * 360.0 - 180.0
    lat_rad = math.atan(math.sinh(math.pi * (1-2 * y/n)))
    lat_def = math.degrees(lat_rad)
    return (lat_def, lon_deg)

def get_tile_bounds(x, y, zoom):
    lat1, lon1 = tile_coords_to_lat_lon(x,y,zoom)
    lat2, lon2 = tile_coords_to_lat_lon(x+1, y, zoom)
    lat3, lon3 = tile_coords_to_lat_lon(x+1,y+1,zoom)
    lat4, lon4 = tile_coords_to_lat_lon(x,y+1,zoom)
    return (lat1, lon1), (lat2, lon2), (lat3, lon3), (lat4, lon4)

def create_wkt_polygon(bounds):
    (lat1, lon1), (lat2, lon2), (lat3, lon3), (lat4, lon4) = bounds
    wkt = f"POLYGON(({lon1} {lat1}, {lon2} {lat2}, {lon3} {lat3}, {lon4} {lat4}, {lon1} {lat1}))"
    return wkt


def get_satellite_tile(lat,lon,zoom,tile_format,api_key):

    x,y =lat_lon_to_tile(lat, lon, zoom)


    # Construct the URL for the map tile API
    url = f'https://maps.hereapi.com/v3/base/mc/{zoom}/{x}/{y}/{tile_format}&style=satellite.day&size={tile_size}?apiKey={api_key}'

    # Make the request
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Save the tile to a file
        with open(f'satellite_tile.{tile_format}', 'wb') as file:
            file.write(response.content)
        print('Tile saved successfully.')
    else:
        print(f'Failed to retrieve tile. Status code: {response.status_code}')

    bounds = get_tile_bounds(x,y, zoom)
    wkt_polygon = create_wkt_polygon(bounds)
    return wkt_polygon

api_key = 'jzBVIsSTOJkbZsaOfqGR-4MiPd_waptlRzUyLuAPcqo'
latitude = 49.3407917
longitude = 6.74129034
zoom_level = 19 # Zoom level
tile_size = 512  # Tile size in pixels
tile_format = 'png'  # Tile format

wkt_bounds = get_satellite_tile(latitude,longitude,zoom_level,tile_format,api_key)
print(wkt_bounds)

Tile saved successfully.
POLYGON((6.7407989501953125 49.34123051256456, 6.741485595703125 49.34123051256456, 6.741485595703125 49.340783124813264, 6.7407989501953125 49.340783124813264, 6.7407989501953125 49.34123051256456))
