score = 100
        - w1*road_class_risk
        - w2*speed_risk
        + w3*lighting_bonus
        + w4*sidewalk_bonus
        + w5*signals_crossings_bonus
        

In [15]:
import json
import pandas as pd

# Load raw OSM JSON
with open("data/raw/osm_bengaluru.json", "r", encoding="utf-8") as f:
    osm_data = json.load(f)

elements = osm_data["elements"]

# Extract nodes (id -> lat/lon)
nodes = {el["id"]: (el["lat"], el["lon"]) for el in elements if el["type"] == "node"}

# Extract ways (roads)
roads = []
for el in elements:
    if el["type"] == "way" and "highway" in el["tags"]:
        node_ids = el["nodes"]
        # Take the midpoint of the road segment (average lat/lon)
        coords = [nodes[nid] for nid in node_ids if nid in nodes]
        if coords:
            avg_lat = sum(c[0] for c in coords) / len(coords)
            avg_lon = sum(c[1] for c in coords) / len(coords)

            roads.append({
                "id": el["id"],
                "highway": el["tags"].get("highway", "unknown"),
                "maxspeed": el["tags"].get("maxspeed", None),
                "lit": el["tags"].get("lit", None),
                "sidewalk": el["tags"].get("sidewalk", None),
                "n_nodes": len(node_ids),
                "safety_score": 50,  # placeholder until we compute properly
                "lat": avg_lat,
                "lon": avg_lon
            })

df = pd.DataFrame(roads)

# Save updated processed CSV
df.to_csv("data/processed/bengaluru_segments.csv", index=False)
print("✅ Saved with coordinates:", df.shape)


✅ Saved with coordinates: (15406, 9)


In [None]:
def compute_safety(row):
    score = 50  # base score

    # Speed limit factor
    if row["maxspeed"]:
        try:
            speed = int(str(row["maxspeed"]).split()[0])  # handles "50 mph" etc.
            if speed <= 30:
                score += 20
            elif speed <= 50:
                score += 10
            else:
                score -= 10
        except:
            pass

    # Lighting factor
    if str(row["lit"]).lower() == "yes":
        score += 15
    elif str(row["lit"]).lower() == "no":
        score -= 10

    # Sidewalk factor
    if str(row["sidewalk"]).lower() in ["both", "left", "right"]:
        score += 10
    elif str(row["sidewalk"]).lower() == "no":
        score -= 5

    # Road length complexity (proxy using n_nodes)
    if row["n_nodes"] <= 5:
        score += 5
    elif row["n_nodes"] >= 50:
        score -= 10

    # Clip score between 0–100
    return max(0, min(100, score))

df["safety_score"] = df.apply(compute_safety, axis=1)

# Save updated processed CSV
df.to_csv("data/processed/bengaluru_segments.csv", index=False)
print("✅ Saved with computed safety scores:", df.shape)


Unnamed: 0,id,highway,maxspeed,lit,sidewalk,safety_score
0,4354938,service,,,,100.0
1,4354952,footway,,no,,100.0
2,8294886,tertiary,,,,85.0
3,8680571,secondary,,,separate,85.0
4,8681388,primary,,yes,,80.0


In [17]:
output_path = "data/processed/bengaluru_segments.csv"
df.to_csv(output_path, index=False)
print("✅ Saved processed CSV:", output_path)


✅ Saved processed CSV: data/processed/bengaluru_segments.csv
