In [0]:
%pip install geopandas networkx shapely pyogrio

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
# 05_generate_safeloop (Databricks)
# Generates multiple SafeLoop route options from scored street edges.
#
# Notes:
# - Expects usc_edges_scored.geojson to already exist (from 01_build_safety_scores).
# - Uses time-aware safety_score from 01, and smooths risk over neighboring streets
#   to approximate "looking ahead a couple streets."

import os
import random
import numpy as np
import pandas as pd
import geopandas as gpd
import networkx as nx
from shapely.geometry import LineString


# User settings (distance from Databricks widget)
if "dbutils" in globals():
    target_str = dbutils.widgets.get("target_distance")
else:
    target_str = "3.0"

try:
    TARGET_DISTANCE = float(target_str)
except Exception:
    raise ValueError(f"Invalid widget value: {target_str}")

USE_MILES = True
NUM_ROUTES = 3
ALPHAS = np.linspace(0.3, 0.9, NUM_ROUTES)
ROAD_PENALTY_WEIGHT = 1.0
random.seed(42)

USC_LAT, USC_LON = 34.0224, -118.2851


# Paths
NOTEBOOK_DIR = os.getcwd()
PROJECT_ROOT = NOTEBOOK_DIR.split("/notebooks")[0]
DATA_DIR = os.path.join(PROJECT_ROOT, "data")
OUTPUTS_DIR = os.path.join(PROJECT_ROOT, "outputs")
os.makedirs(OUTPUTS_DIR, exist_ok=True)

print("Project root:", PROJECT_ROOT)
print("Data dir:    ", DATA_DIR)
print("Outputs dir: ", OUTPUTS_DIR)


# Load scored edges (from outputs/)
edges_path = os.path.join(OUTPUTS_DIR, "usc_edges_scored.geojson")
if not os.path.exists(edges_path):
    raise FileNotFoundError(
        f"Missing scored edges: {edges_path}\n"
        "Run 01_build_safety_scores first."
    )

edges = gpd.read_file(edges_path).to_crs(epsg=4326)
print("Scored edges loaded:", len(edges))

required_cols = {"edge_id", "u", "v", "length_m", "safety_score"}
missing = required_cols - set(edges.columns)
if missing:
    raise ValueError(f"Scored edges file is missing required columns: {sorted(missing)}")


# DPS zone filter (optional)
dps_path = os.path.join(DATA_DIR, "usc_dps_upc_zone.geojson")
if os.path.exists(dps_path):
    dps_gdf = gpd.read_file(dps_path).to_crs(edges.crs)
    dps_union = dps_gdf.unary_union
    before = len(edges)
    edges = edges[edges.geometry.intersects(dps_union)]
    after = len(edges)
    print(f"Filtered edges by DPS zone: {before} -> {after}")
else:
    print("DPS zone not found. Using full graph.")


# Road penalty (quieter streets preferred)
def get_road_penalty(row):
    quiet = [
        "residential", "living_street", "cycleway", "footway",
        "path", "pedestrian", "service", "track"
    ]
    big = ["primary", "secondary", "tertiary", "trunk", "motorway"]

    for col in ["highway", "road_type", "road_class"]:
        if col in row and pd.notna(row[col]):
            val = str(row[col]).lower()
            if any(t in val for t in quiet):
                return 0.1
            if any(t in val for t in big):
                return 1.0
            return 0.5
    return 0.5

edges["road_penalty"] = edges.apply(get_road_penalty, axis=1)


# Risk smoothing ("looking ahead a couple streets")
edges["risk_raw"] = 101 - edges["safety_score"]

node_edge_u = edges[["edge_id", "u", "risk_raw"]].rename(columns={"u": "node"})
node_edge_v = edges[["edge_id", "v", "risk_raw"]].rename(columns={"v": "node"})
node_edge = pd.concat([node_edge_u, node_edge_v], ignore_index=True)

node_risk = (
    node_edge.groupby("node")["risk_raw"]
    .mean()
    .rename("node_risk_mean")
    .reset_index()
)

edges = edges.merge(
    node_risk.rename(columns={"node": "u", "node_risk_mean": "u_risk_mean"}),
    on="u", how="left"
)
edges = edges.merge(
    node_risk.rename(columns={"node": "v", "node_risk_mean": "v_risk_mean"}),
    on="v", how="left"
)

edges["u_risk_mean"] = edges["u_risk_mean"].fillna(edges["risk_raw"])
edges["v_risk_mean"] = edges["v_risk_mean"].fillna(edges["risk_raw"])

edges["risk_smooth"] = (edges["risk_raw"] + edges["u_risk_mean"] + edges["v_risk_mean"]) / 3.0

print("Risk smoothing complete. Example rows:")
display(edges[["edge_id", "risk_raw", "risk_smooth"]].head(5))


# Build graph
G = nx.Graph()

for _, row in edges.iterrows():
    u, v = int(row["u"]), int(row["v"])
    dist = float(row["length_m"])
    safe = float(row["safety_score"])
    risk = float(row["risk_smooth"])
    road = float(row["road_penalty"])

    G.add_edge(
        u, v,
        length_m=dist,
        safety=safe,
        risk=risk,
        road_penalty=road,
        geometry=row["geometry"]
    )

print("Graph built:", G.number_of_nodes(), "nodes,", G.number_of_edges(), "edges")


# Pick start node near USC (use projected CRS to avoid centroid warning)
USC_LAT, USC_LON = 34.0224, -118.2851

edges_m = edges.to_crs(epsg=3857)  # meters
edges_m["centroid"] = edges_m.geometry.centroid

usc_point = gpd.GeoSeries(
    [gpd.points_from_xy([USC_LON], [USC_LAT])[0]],
    crs="EPSG:4326"
).to_crs(epsg=3857).iloc[0]

edges_m["dist_to_usc"] = edges_m["centroid"].distance(usc_point)

start_node = int(edges.loc[edges_m["dist_to_usc"].idxmin(), "u"])
print("Start node:", start_node)


# Loop generator
def generate_safe_loop(G, start_node, *, target_miles=None, alpha=0.5):
    """
    alpha controls safety vs. efficiency:
      - lower alpha -> more direct (distance matters more)
      - higher alpha -> safer (smoothed risk matters more)
    """
    target_km = target_miles * 1.60934
    target_m = target_km * 1000
    half_m = target_m / 2

    for u, v, d in G.edges(data=True):
        dist = d["length_m"] / 10.0
        risk = d["risk"]
        road = d["road_penalty"] * ROAD_PENALTY_WEIGHT
        d["weight"] = alpha * risk + (1 - alpha) * dist + road

    dist_from_start = nx.single_source_dijkstra_path_length(G, start_node, weight="length_m")
    cand = [n for n, d in dist_from_start.items() if 0.7 * half_m <= d <= 1.3 * half_m]
    if not cand:
        raise ValueError("No endpoints found. Try another distance.")

    random.shuffle(cand)
    best = None

    for n in cand:
        try:
            path = nx.shortest_path(G, start_node, n, weight="weight")
        except Exception:
            continue

        nodes = path + list(reversed(path[1:-1]))
        dist = sum(G[u][v]["length_m"] for u, v in zip(nodes[:-1], nodes[1:]))
        safety = np.mean([G[u][v]["safety"] for u, v in zip(nodes[:-1], nodes[1:])])

        score = abs(dist - target_m) / target_m - safety / 100.0

        if best is None or score < best["score"]:
            best = {"nodes": nodes, "dist_m": dist, "safety": safety, "score": score}

    best["dist_km"] = best["dist_m"] / 1000
    best["dist_miles"] = best["dist_m"] / 1609.34
    return best


# Generate routes
print(f"\nTarget distance: {TARGET_DISTANCE} miles")

rows = []

for i, alpha in enumerate(ALPHAS, start=1):
    print(f"\nRoute {i} (alpha={alpha:.2f})")
    loop = generate_safe_loop(G, start_node, target_miles=TARGET_DISTANCE, alpha=alpha)

    name = f"SafeLoop_{TARGET_DISTANCE}mi_opt{i}"

    coords = []
    for u, v in zip(loop["nodes"][:-1], loop["nodes"][1:]):
        geom = G[u][v]["geometry"]
        coords += list(geom.coords) if not coords else list(geom.coords)[1:]
    line = LineString(coords)

    rows.append({
        "name": name,
        "distance_miles": loop["dist_miles"],
        "distance_km": loop["dist_km"],
        "avg_safety": loop["safety"],
        "alpha": float(alpha),
        "geometry": line
    })


# Save routes
loops_gdf = gpd.GeoDataFrame(rows, crs="EPSG:4326")

outfile = f"safeloops_{TARGET_DISTANCE}mi.geojson"
savepath = os.path.join(OUTPUTS_DIR, outfile)

loops_gdf.to_file(savepath, driver="GeoJSON")
print("\nSaved routes to:", savepath)

display(loops_gdf[["name", "distance_miles", "distance_km", "avg_safety", "alpha"]])

loops_gdf[["name", "distance_miles", "distance_km", "avg_safety", "alpha"]]

Project root: /Workspace/Repos/mikenamo@usc.edu/fall2025/personal_folder/mikenamo/Final/SafeLoop
Data dir:     /Workspace/Repos/mikenamo@usc.edu/fall2025/personal_folder/mikenamo/Final/SafeLoop/data
Outputs dir:  /Workspace/Repos/mikenamo@usc.edu/fall2025/personal_folder/mikenamo/Final/SafeLoop/outputs
Scored edges loaded: 27778
Filtered edges by DPS zone: 27778 -> 14692
Risk smoothing complete. Example rows:


edge_id,risk_raw,risk_smooth
0,83.77766287487074,84.34126163391934
3,22.297828335056863,21.415977249224397
4,19.04033092037227,20.41778697001033
5,17.59772492244052,20.00482592209581
6,32.96690796277146,33.21949327817994


Graph built: 4803 nodes, 7288 edges
Start node: 4536989618

Target distance: 3.0 miles

Route 1 (alpha=0.30)

Route 2 (alpha=0.60)

Route 3 (alpha=0.90)

Saved routes to: /Workspace/Repos/mikenamo@usc.edu/fall2025/personal_folder/mikenamo/Final/SafeLoop/outputs/safeloops_3.0mi.geojson


name,distance_miles,distance_km,avg_safety,alpha
SafeLoop_3.0mi_opt1,2.9883730052502515,4.80930821226944,98.66685468960546,0.3
SafeLoop_3.0mi_opt2,3.0037878928330595,4.834116007451955,98.62273375832864,0.6000000000000001
SafeLoop_3.0mi_opt3,2.978732577310113,4.793793485968258,96.48860276446683,0.9


Unnamed: 0,name,distance_miles,distance_km,avg_safety,alpha
0,SafeLoop_3.0mi_opt1,2.988373,4.809308,98.666855,0.3
1,SafeLoop_3.0mi_opt2,3.003788,4.834116,98.622734,0.6
2,SafeLoop_3.0mi_opt3,2.978733,4.793793,96.488603,0.9
