In [0]:
%pip install geopandas shapely pyogrio

In [0]:
# SAFELOOP — 01_build_safety_scores
# Build time-aware safety scores for each street segment

import os
import pandas as pd
import geopandas as gpd


def build_safety_scores():

    # User settings — time of day / season
    RUN_MONTH = 11   # 1–12 (e.g., 11 = November)
    RUN_HOUR  = 18   # 0–23 (e.g., 18 = 6pm)
    BUFFER_M  = 50   # meters

    print(f"Scoring edges for month={RUN_MONTH}, hour={RUN_HOUR} (buffer={BUFFER_M}m)")

    def is_dark(month: int, hour: int) -> bool:
        winter_months = {11, 12, 1, 2}
        if month in winter_months:
            day_start, day_end = 7, 17
        else:
            day_start, day_end = 6, 20
        return hour < day_start or hour >= day_end

    def get_time_weights(month: int, hour: int):
        if is_dark(month, hour):
            return 0.7, 0.3
        else:
            return 0.5, 0.5

    CRIME_W, COLL_W = get_time_weights(RUN_MONTH, RUN_HOUR)
    print(f"Using weights → crime={CRIME_W:.2f}, collision={COLL_W:.2f}")

    # Paths
    NOTEBOOK_DIR = os.getcwd()
    PROJECT_ROOT = NOTEBOOK_DIR.split("/notebooks")[0]
    DATA_DIR     = os.path.join(PROJECT_ROOT, "data")
    OUTPUTS_DIR  = os.path.join(PROJECT_ROOT, "outputs")
    os.makedirs(OUTPUTS_DIR, exist_ok=True)

    print("Project root:", PROJECT_ROOT)
    print("Data dir:    ", DATA_DIR)
    print("Outputs dir: ", OUTPUTS_DIR)

    edges_path = os.path.join(DATA_DIR, "usc_street_edges.geojson")
    crime_path = os.path.join(DATA_DIR, "usc_crime_points.csv")
    coll_path  = os.path.join(DATA_DIR, "usc_collision_points.csv")

    # Load data
    edges = gpd.read_file(edges_path).to_crs("EPSG:4326")
    crime_df = pd.read_csv(crime_path)
    coll_df  = pd.read_csv(coll_path)

    print("Inputs loaded:")
    print("  Edges:     ", len(edges))
    print("  Crimes:    ", len(crime_df))
    print("  Collisions:", len(coll_df))

    crime_gdf = gpd.GeoDataFrame(
        crime_df,
        geometry=gpd.points_from_xy(crime_df["lon"], crime_df["lat"]),
        crs="EPSG:4326"
    )

    coll_gdf = gpd.GeoDataFrame(
        coll_df,
        geometry=gpd.points_from_xy(coll_df["lon"], coll_df["lat"]),
        crs="EPSG:4326"
    )

    # Project to meters
    edges_3857 = edges.to_crs(3857)
    crime_3857 = crime_gdf.to_crs(3857)
    coll_3857  = coll_gdf.to_crs(3857)

    edges_3857 = (
        edges_3857
        .reset_index(drop=True)
        .reset_index()
        .rename(columns={"index": "edge_id"})
    )

    edges_3857["length_m"] = edges_3857.geometry.length

    # Count crimes and collisions near each edge
    edges_buf = edges_3857[["edge_id", "geometry"]].copy()
    edges_buf["geometry"] = edges_buf.geometry.buffer(BUFFER_M)

    crime_join = gpd.sjoin(crime_3857, edges_buf, how="inner", predicate="within")
    coll_join  = gpd.sjoin(coll_3857, edges_buf, how="inner", predicate="within")

    crime_counts = crime_join.groupby("edge_id").size().rename("crime_count")
    coll_counts  = coll_join.groupby("edge_id").size().rename("collision_count")

    edges_scored = (
        edges_3857
        .merge(crime_counts, on="edge_id", how="left")
        .merge(coll_counts,  on="edge_id", how="left")
    )

    edges_scored["crime_count"]     = edges_scored["crime_count"].fillna(0)
    edges_scored["collision_count"] = edges_scored["collision_count"].fillna(0)

    # Normalize per km
    edges_scored["km"] = edges_scored["length_m"].clip(lower=1) / 1000.0

    edges_scored["crime_per_km"]     = edges_scored["crime_count"] / edges_scored["km"]
    edges_scored["collision_per_km"] = edges_scored["collision_count"] / edges_scored["km"]

    edges_scored["crime_rate_raw"]     = edges_scored["crime_per_km"]
    edges_scored["collision_rate_raw"] = edges_scored["collision_per_km"]

    # Rank-based safety components (1 = safest)
    edges_scored["crime_rank"]     = 1 - edges_scored["crime_per_km"].rank(pct=True)
    edges_scored["collision_rank"] = 1 - edges_scored["collision_per_km"].rank(pct=True)

    # Time-aware composite safety score (0–100)
    total_w = CRIME_W + COLL_W

    edges_scored["safety_score"] = 100 * (
        (CRIME_W * edges_scored["crime_rank"] +
         COLL_W  * edges_scored["collision_rank"]) / total_w
    )

    edges_scored["safety_score"] = edges_scored["safety_score"].clip(0, 100)

    # Rescale to full 0–100 range for interpretability
    smin = edges_scored["safety_score"].min()
    smax = edges_scored["safety_score"].max()
    if smax > smin:
        edges_scored["safety_score"] = 100 * (
            (edges_scored["safety_score"] - smin) / (smax - smin)
        )

    # Save scoring context
    edges_scored["run_month"] = RUN_MONTH
    edges_scored["run_hour"]  = RUN_HOUR
    edges_scored["crime_w"]   = CRIME_W
    edges_scored["coll_w"]    = COLL_W
    edges_scored["buffer_m"]  = BUFFER_M
%pip install geopandas shapely pyogrio

# SAFELOOP — 01_build_safety_scores
# Build time-aware safety scores for each street segment

import os
import pandas as pd
import geopandas as gpd


def build_safety_scores():

    # User settings — time of day / season
    RUN_MONTH = 11   # 1–12 (e.g., 11 = November)
    RUN_HOUR  = 18   # 0–23 (e.g., 18 = 6pm)
    BUFFER_M  = 50   # meters

    print(f"Scoring edges for month={RUN_MONTH}, hour={RUN_HOUR} (buffer={BUFFER_M}m)")

    def is_dark(month: int, hour: int) -> bool:
        winter_months = {11, 12, 1, 2}
        if month in winter_months:
            day_start, day_end = 7, 17
        else:
            day_start, day_end = 6, 20
        return hour < day_start or hour >= day_end

    def get_time_weights(month: int, hour: int):
        if is_dark(month, hour):
            return 0.7, 0.3
        else:
            return 0.5, 0.5

    CRIME_W, COLL_W = get_time_weights(RUN_MONTH, RUN_HOUR)
    print(f"Using weights → crime={CRIME_W:.2f}, collision={COLL_W:.2f}")

    # Paths
    NOTEBOOK_DIR = os.getcwd()
    PROJECT_ROOT = NOTEBOOK_DIR.split("/notebooks")[0]
    DATA_DIR     = os.path.join(PROJECT_ROOT, "data")
    OUTPUTS_DIR  = os.path.join(PROJECT_ROOT, "outputs")
    os.makedirs(OUTPUTS_DIR, exist_ok=True)

    print("Project root:", PROJECT_ROOT)
    print("Data dir:    ", DATA_DIR)
    print("Outputs dir: ", OUTPUTS_DIR)

    edges_path = os.path.join(DATA_DIR, "usc_street_edges.geojson")
    crime_path = os.path.join(DATA_DIR, "usc_crime_points.csv")
    coll_path  = os.path.join(DATA_DIR, "usc_collision_points.csv")

    # Load data
    edges = gpd.read_file(edges_path).to_crs("EPSG:4326")
    crime_df = pd.read_csv(crime_path)
    coll_df  = pd.read_csv(coll_path)

    print("Inputs loaded:")
    print("  Edges:     ", len(edges))
    print("  Crimes:    ", len(crime_df))
    print("  Collisions:", len(coll_df))

    crime_gdf = gpd.GeoDataFrame(
        crime_df,
        geometry=gpd.points_from_xy(crime_df["lon"], crime_df["lat"]),
        crs="EPSG:4326"
    )

    coll_gdf = gpd.GeoDataFrame(
        coll_df,
        geometry=gpd.points_from_xy(coll_df["lon"], coll_df["lat"]),
        crs="EPSG:4326"
    )

    # Project to meters
    edges_3857 = edges.to_crs(3857)
    crime_3857 = crime_gdf.to_crs(3857)
    coll_3857  = coll_gdf.to_crs(3857)

    edges_3857 = (
        edges_3857
        .reset_index(drop=True)
        .reset_index()
        .rename(columns={"index": "edge_id"})
    )

    edges_3857["length_m"] = edges_3857.geometry.length

    # Count crimes and collisions near each edge
    edges_buf = edges_3857[["edge_id", "geometry"]].copy()
    edges_buf["geometry"] = edges_buf.geometry.buffer(BUFFER_M)

    crime_join = gpd.sjoin(crime_3857, edges_buf, how="inner", predicate="within")
    coll_join  = gpd.sjoin(coll_3857, edges_buf, how="inner", predicate="within")

    crime_counts = crime_join.groupby("edge_id").size().rename("crime_count")
    coll_counts  = coll_join.groupby("edge_id").size().rename("collision_count")

    edges_scored = (
        edges_3857
        .merge(crime_counts, on="edge_id", how="left")
        .merge(coll_counts,  on="edge_id", how="left")
    )

    edges_scored["crime_count"]     = edges_scored["crime_count"].fillna(0)
    edges_scored["collision_count"] = edges_scored["collision_count"].fillna(0)

    # Normalize per km
    edges_scored["km"] = edges_scored["length_m"].clip(lower=1) / 1000.0

    edges_scored["crime_per_km"]     = edges_scored["crime_count"] / edges_scored["km"]
    edges_scored["collision_per_km"] = edges_scored["collision_count"] / edges_scored["km"]

    edges_scored["crime_rate_raw"]     = edges_scored["crime_per_km"]
    edges_scored["collision_rate_raw"] = edges_scored["collision_per_km"]

    # Rank-based safety components (1 = safest)
    edges_scored["crime_rank"]     = 1 - edges_scored["crime_per_km"].rank(pct=True)
    edges_scored["collision_rank"] = 1 - edges_scored["collision_per_km"].rank(pct=True)

    # Time-aware composite safety score (0–100)
    total_w = CRIME_W + COLL_W

    edges_scored["safety_score"] = 100 * (
        (CRIME_W * edges_scored["crime_rank"] +
         COLL_W  * edges_scored["collision_rank"]) / total_w
    )

    edges_scored["safety_score"] = edges_scored["safety_score"].clip(0, 100)

    # Rescale to full 0–100 range for interpretability
    smin = edges_scored["safety_score"].min()
    smax = edges_scored["safety_score"].max()
    if smax > smin:
        edges_scored["safety_score"] = 100 * (
            (edges_scored["safety_score"] - smin) / (smax - smin)
        )

    # Save scoring context
    edges_scored["run_month"] = RUN_MONTH
    edges_scored["run_hour"]  = RUN_HOUR
    edges_scored["crime_w"]   = CRIME_W
    edges_scored["coll_w"]    = COLL_W
    edges_scored["buffer_m"]  = BUFFER_M

    # Quick QA
    print("Sample scored edges:")
    display(
        edges_scored[[
            "edge_id", "length_m",
            "crime_count", "collision_count",
            "crime_per_km", "collision_per_km",
            "crime_rank", "collision_rank",
            "safety_score"
        ]].head(10)
    )

    print("Safety score summary:")
    display(edges_scored["safety_score"].describe())

    # Save outputs
    out_geo = os.path.join(OUTPUTS_DIR, "usc_edges_scored.geojson")
    out_csv = os.path.join(OUTPUTS_DIR, "usc_edges_scored.csv")

    edges_scored.to_file(out_geo, driver="GeoJSON")
    edges_scored.drop(columns="geometry").to_csv(out_csv, index=False)

    print("\n\nSaved scored edges:")
    print("  ", out_geo)
    print("  ", out_csv)

    return out_geo, out_csv


# Run
out_geo, out_csv = build_safety_scores()

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
Scoring edges for month=11, hour=18 (buffer=50m)
Using weights → crime=0.70, collision=0.30
Project root: /Workspace/Repos/mikenamo@usc.edu/fall2025/personal_folder/mikenamo/Final/SafeLoop
Data dir:     /Workspace/Repos/mikenamo@usc.edu/fall2025/personal_folder/mikenamo/Final/SafeLoop/data
Outputs dir:  /Workspace/Repos/mikenamo@usc.edu/fall2025/personal_folder/mikenamo/Final/SafeLoop/outputs
Inputs loaded:
  Edges:      27778
  Crimes:     1260
  Collisions: 1966
Sample scored edges:


edge_id,length_m,crime_count,collision_count,crime_per_km,collision_per_km,crime_rank,collision_rank,safety_score
0,16.908080494017266,3.0,5.0,177.4299572953604,295.71659549226734,0.1011771905824753,0.1641046871625027,17.22233712512926
1,12.090550319074447,3.0,5.0,248.12766340892705,413.546105681545,0.0787853697170423,0.1205450356397148,13.094105480868668
2,21.930617981333832,3.0,5.0,136.79505076206422,227.99175127010705,0.1150730794153647,0.2032723738210093,20.307135470527403
3,17.13904976425,0.0,2.0,0.0,116.69258374940716,0.6541327669378645,0.300687594499244,78.70217166494314
4,27.40903223740848,0.0,2.0,0.0,72.96864707504535,0.6541327669378645,0.3762869897040823,81.95966907962773
5,35.36688482342995,0.0,2.0,0.0,56.550075303070926,0.6541327669378645,0.4097667218662251,83.40227507755948
6,16.711668683508318,0.0,14.0,0.0,837.7380060086823,0.6541327669378645,0.0530815753473972,68.03309203722854
7,14.2925482232694,0.0,14.0,0.0,979.5314160428642,0.6541327669378645,0.0427856577147383,67.58945191313339
8,14.735853007140475,0.0,14.0,0.0,950.0637657837718,0.6541327669378645,0.044657642738858,67.67011375387797
9,15.885291334241629,0.0,14.0,0.0,881.318428817369,0.6541327669378645,0.0502735978112175,67.91209927611169


Safety score summary:


count    27778.000000
mean        71.791107
std         29.474762
min          0.000000
25%         48.575879
50%         79.465357
75%        100.000000
max        100.000000
Name: safety_score, dtype: float64



Saved scored edges:
   /Workspace/Repos/mikenamo@usc.edu/fall2025/personal_folder/mikenamo/Final/SafeLoop/outputs/usc_edges_scored.geojson
   /Workspace/Repos/mikenamo@usc.edu/fall2025/personal_folder/mikenamo/Final/SafeLoop/outputs/usc_edges_scored.csv
