In [33]:
import sqlite3
import json
import pandas as pd
import polyline
import networkx as nx
import numpy as np
from sklearn.neighbors import BallTree
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium


In [8]:
# --- 1. Load activities from DB ---
conn = sqlite3.connect("strava_cache.db")
cursor = conn.cursor()
cursor.execute("SELECT id, data FROM activities")
rows = cursor.fetchall()

In [85]:
# --- 2. Decode each activity's polyline ---
activity_coords = {}  # id -> list of (lat, lon)
for activity_id, data_json in rows[1000:2000]:
    try:
        data = json.loads(data_json)
        poly = data.get("map", {}).get("summary_polyline")
        if not poly:
            continue
        coords = polyline.decode(poly)
        if coords:
            activity_coords[activity_id] = coords
    except Exception as e:
        print(f"Error with activity {activity_id}: {e}")

In [86]:
# --- 3. Compute centroids and extreme points for each activity ---
def compute_extreme_points(coords):
    """Compute the extreme points: north, south, east, and west."""
    latitudes, longitudes = zip(*coords)  # Unzip the coordinates

    # Calculate extreme points
    N_ind = np.argmax(latitudes)
    S_ind = np.argmin(latitudes)
    E_ind = np.argmax(longitudes)
    W_ind = np.argmin(longitudes)

    north = coords[N_ind]
    south = coords[S_ind]
    east = coords[E_ind]
    west = coords[W_ind]

    start = coords[0]
    finish = coords[-1]

    return [north, south, east, west,start,finish]

activity_key_points = {}
activity_centroids = {}


for aid, coords in activity_coords.items():
    # Compute centroid (mean of latitudes and longitudes in radians)
    centroid = np.mean(np.radians(coords), axis=0)
    activity_centroids[aid] = centroid
    
    extreme_points = compute_extreme_points(coords)
    activity_key_points[aid] = extreme_points

activity_ids = list(activity_key_points.keys())
centroid_array = np.array([activity_centroids[aid] for aid in activity_ids])  # lat, lon in radians

In [87]:
# Define BallTree using centroids in radians
tree = BallTree(centroid_array, metric='haversine')

# --- 5. Build graph ---
G = nx.Graph()

# Add nodes
for aid in activity_ids:
    G.add_node(aid)



In [88]:
# Define the search radius (400 meters = 0.4 km) in radians
radius_km = 0.4
radius_radians = radius_km / 6371.0088  # Earth's radius in km

# Add edges based on proximity (both centroid and extreme points)
for i, aid in enumerate(activity_ids):
    # Check proximity using the centroid
    indices = tree.query_radius([centroid_array[i]], r=radius_radians)[0]

    # Add the activity itself to its own list of indices (for connecting to itself)
    connected_nodes = set(indices)

    # Check proximity using extreme points
    extreme_points = activity_key_points[aid]

    for extreme_point in extreme_points:
        # Convert the extreme point into radians
        extreme_point_radians = np.radians([extreme_point[0], extreme_point[1]])

        # Query BallTree using the extreme point
        indices_extreme = tree.query_radius([extreme_point_radians], r=radius_radians)[0]
        connected_nodes.update(indices_extreme)

    # Add edges between activities that are within the radius threshold
    for j in connected_nodes:
        if i != j:
            other_aid = activity_ids[j]
            G.add_edge(aid, other_aid)

print(f"Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")


Graph has 982 nodes and 13559 edges.


In [89]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

# --- 1. Create map centered on mean lat/lon ---
all_coords = [pt for coords in activity_coords.values() for pt in coords]
mean_lat = sum(lat for lat, _ in all_coords) / len(all_coords)
mean_lon = sum(lon for _, lon in all_coords) / len(all_coords)

m = folium.Map(location=all_coords[-1], zoom_start=12,)

# --- 2. Assign a color to each connected component ---
components = list(nx.connected_components(G))
colormap = cm.get_cmap('tab20', len(components))
component_colors = {}

for idx, comp in enumerate(components):
    for aid in comp:
        component_colors[aid] = colors.to_hex(colormap(idx))

# --- 3. Draw each activity polyline ---
for aid, coords in activity_coords.items():
    folium.PolyLine(
        coords,
        color=component_colors.get(aid, "#888"),
        weight=3,
        opacity=0.7,
        popup=f"Activity {aid}"
    ).add_to(m)



# --- 5. Save map ---
m.save("activity_graph_map.html")
print("Map saved to activity_graph_map.html")


  colormap = cm.get_cmap('tab20', len(components))


Map saved to activity_graph_map.html


In [None]:
good_ids = [
    11969746958,
    12487635877,
    11681065104
]

Graph has 2269 nodes and 48454 edges.


In [None]:
# This is close - I think I just need to do a separate check points check for all the activites - hopefully this just fixes the centroid error
# It would be cool to see if I could do it as a tree

import sqlite3
import json
import polyline
import numpy as np
import networkx as nx
from sklearn.neighbors import BallTree
from math import radians

# --- 1. Load activities from DB ---
conn = sqlite3.connect("strava_cache.db")
cursor = conn.cursor()
cursor.execute("SELECT id, data FROM activities")
rows = cursor.fetchall()

# --- 2. Decode each activity's polyline ---
activity_coords = {}  # id -> list of (lat, lon)
for activity_id, data_json in rows:
    try:
        data = json.loads(data_json)
        poly = data.get("map", {}).get("summary_polyline")
        if not poly:
            continue
        coords = polyline.decode(poly)
        if coords:
            activity_coords[activity_id] = coords
    except Exception as e:
        print(f"Error with activity {activity_id}: {e}")

# --- 3. Compute centroids and extreme points for each activity ---
def compute_extreme_points(coords):
    """Compute the extreme points: north, south, east, and west."""
    latitudes, longitudes = zip(*coords)  # Unzip the coordinates

    # Calculate extreme points
    N_ind = np.argmax(latitudes)
    S_ind = np.argmin(latitudes)
    E_ind = np.argmax(longitudes)
    W_ind = np.argmin(longitudes)

    north = coords[N_ind]
    south = coords[S_ind]
    east = coords[E_ind]
    west = coords[W_ind]

    return north, south, east, west

activity_centroids = {}
activity_extremes = {}

for aid, coords in activity_coords.items():
    # Compute centroid (mean of latitudes and longitudes in radians)
    centroid = np.mean(np.radians(coords), axis=0)
    activity_centroids[aid] = centroid

    # Compute extreme points (north, south, east, west)
    extreme_points = compute_extreme_points(coords)
    activity_extremes[aid] = extreme_points

# --- 4. Prepare BallTree (using both centroids and extreme points) ---
# Convert centroids to radians for BallTree calculation
activity_ids = list(activity_centroids.keys())
centroid_array = np.array([activity_centroids[aid] for aid in activity_ids])  # lat, lon in radians

# Define BallTree using centroids in radians
tree = BallTree(centroid_array, metric='haversine')

# --- 5. Build graph ---
G = nx.Graph()

# Add nodes
for aid in activity_ids:
    G.add_node(aid)

# Define the search radius (400 meters = 0.4 km) in radians
radius_km = 0.4
radius_radians = radius_km / 6371.0088  # Earth's radius in km

# Add edges based on proximity (both centroid and extreme points)
for i, aid in enumerate(activity_ids):
    # Check proximity using the centroid
    indices = tree.query_radius([centroid_array[i]], r=radius_radians)[0]

    # Add the activity itself to its own list of indices (for connecting to itself)
    connected_nodes = set(indices)

    # Check proximity using extreme points
    extreme_points = activity_extremes[aid]

    for extreme_point in extreme_points:
        # Convert the extreme point into radians
        extreme_point_radians = np.radians([extreme_point[0], extreme_point[1]])

        # Query BallTree using the extreme point
        indices_extreme = tree.query_radius([extreme_point_radians], r=radius_radians)[0]
        connected_nodes.update(indices_extreme)

    # Add edges between activities that are within the radius threshold
    for j in connected_nodes:
        if i != j:
            other_aid = activity_ids[j]
            G.add_edge(aid, other_aid)

print(f"Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")


In [7]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

# --- 1. Create map centered on mean lat/lon ---
all_coords = [pt for coords in activity_coords.values() for pt in coords]
mean_lat = sum(lat for lat, _ in all_coords) / len(all_coords)
mean_lon = sum(lon for _, lon in all_coords) / len(all_coords)

m = folium.Map(location=all_coords[-1], zoom_start=12, tiles="cartodbpositron")

# --- 2. Assign a color to each connected component ---
components = list(nx.connected_components(G))
colormap = cm.get_cmap('tab20', len(components))
component_colors = {}

for idx, comp in enumerate(components):
    for aid in comp:
        component_colors[aid] = colors.to_hex(colormap(idx))

# --- 3. Draw each activity polyline ---
for aid, coords in activity_coords.items():
    folium.PolyLine(
        coords,
        color=component_colors.get(aid, "#888"),
        weight=3,
        opacity=0.7,
        popup=f"Activity {aid}"
    ).add_to(m)



# --- 5. Save map ---
m.save("activity_graph_map.html")
print("Map saved to activity_graph_map.html")


  colormap = cm.get_cmap('tab20', len(components))


Map saved to activity_graph_map.html


In [153]:
small_ids = [
    12292106042,
    8287258968,
    4380620390,
    14226775715,
    9715432255,
    7496473660,
    8844755256,
    8282865940,
    7496474686,
    8621567454,
    8271793221,
    10086163306,
    8275084646,
    13560538745,
    6647906082,
    3517268015,
    14021455130,
    13871347337
]

# --- Connect to DB ---
conn = sqlite3.connect("strava_cache.db")
cursor = conn.cursor()

# Use parameter substitution for safety and formatting
placeholders = ','.join(['?'] * len(small_ids))
query = f"SELECT id, data FROM activities WHERE id IN ({placeholders})"
cursor.execute(query, small_ids)
rows = cursor.fetchall()

activity_coords = {}  # id -> list of (lat, lon)
# --- 4. Loop through activities and draw each PolyLine ---
for activity_id, data_json in rows:
    data = json.loads(data_json)
    poly = data.get("map", {}).get("summary_polyline")
    if not poly:
        continue

    coords = polyline.decode(poly)
    if coords:
        activity_coords[activity_id] = coords

activity_centroids = {}
all_pts = {}
for aid, coords in activity_coords.items():
    # Compute centroid (mean of latitudes and longitudes in radians)
    centroid = np.mean(np.radians(coords), axis=0)
    activity_centroids[aid] = centroid
    for c in range(len(coords)):
        pt_rad = np.radians(coords[c])
        pt_ll = coords[c]
        pt_id = f'{aid}_{c}'
        all_pts[pt_id] = [pt_rad,pt_ll]
all_pt_ids = list(all_pts.keys())
pt_array = np.array([all_pts[pt_id][0] for pt_id in all_pt_ids])  # lat, lon in radians



In [154]:
# Define BallTree using centroids in radians
tree = BallTree(pt_array, metric='haversine')

# --- 5. Build graph ---
G = nx.Graph()

# Add nodes
for id in all_pt_ids:
    G.add_node(id)

In [155]:
# Define the search radius (400 meters = 0.4 km) in radians
radius_km = 0.4
radius_radians = radius_km / 6371.0088  # Earth's radius in km

# Add edges based on proximity (both centroid and extreme points)
for i, aid in enumerate(all_pt_ids):
    # Check proximity using the centroid
    indices = tree.query_radius([pt_array[i]], r=radius_radians)[0]

    # Add the activity itself to its own list of indices (for connecting to itself)
    connected_nodes = set(indices)


    # Add edges between activities that are within the radius threshold
    for j in connected_nodes:
        if i != j:
            other_aid = all_pt_ids[j]
            G.add_edge(aid, other_aid)

print(f"Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")
components = list(nx.connected_components(G))
print(f'{len(components)} connected components')

Graph has 5881 nodes and 95541 edges.
13 connected components


In [156]:
def get_act_ids(connected_component):
    return set(int(pt_id.split('_')[0]) for pt_id in connected_component)

# Step 1: Extract act_id sets
components_act_ids = [get_act_ids(cc) for cc in components]

# Step 2: Merge overlapping act_id sets
merged = []
used = [False] * len(components_act_ids)

for i, ids_i in enumerate(components_act_ids):
    if used[i]:
        continue

    merged_set = set(ids_i)
    used[i] = True

    changed = True
    while changed:
        changed = False
        for j, ids_j in enumerate(components_act_ids):
            if used[j]:
                continue
            if not merged_set.isdisjoint(ids_j):
                merged_set.update(ids_j)
                used[j] = True
                changed = True

    merged.append(sorted(merged_set))

# Output: merged list of act_id groups
checked_ids = {}
for comp_num in range(len(merged)):
    
    for act_id in merged[comp_num]:
        checked_ids[act_id] = comp_num


In [157]:

colormap = cm.get_cmap('tab20', len(merged))
component_colors = {}

for c_ind in range(len(merged)):
    
    component_colors[c_ind] = colors.to_hex(colormap(c_ind))


  colormap = cm.get_cmap('tab20', len(merged))


In [158]:
m = folium.Map(location=coords[-1], zoom_start=12,tiles="Cartodb Positron")

# --- 3. Draw each activity polyline ---
for aid, cc_color_code in checked_ids.items():
    folium.PolyLine(
        activity_coords[aid],
        color=component_colors.get(cc_color_code, "#888"),
        weight=3,
        opacity=0.7,
        popup=f"color {component_colors.get(cc_color_code, "#888")}"
    ).add_to(m)

# --- 5. Save map ---
m.save("work_map.html")
print("Map saved to work_map.html")

Map saved to work_map.html
