In [2]:
import pandas as pd
import geopandas as gpd
import requests
import json
import os

In [68]:
def fetch_json(url, file_name):
    try:
        res = requests.get(url, timeout=5)
        data = res.json()

        with open(f'../data/raw/{file_name}', 'w') as f:
            json.dump(data, f, indent=4)
        
        print(f'Successfully saved JSON data: {file_name}')
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


In [69]:
fetch_json('https://api.censusreporter.org/1.0/geo/show/latest?geo_ids=140|05000US06075&format=geojson', 'sf_tracts_new.geojson')
fetch_json('https://api.censusreporter.org/1.0/data/show/latest?table_ids=B01003&geo_ids=140|05000US06075', 'sf_population_new.json')
fetch_json('https://data.sfgov.org/resource/xgse-mjer.geojson', 'sf_water.geojson')
fetch_json('https://data.sfgov.org/resource/wamw-vt4s.geojson', 'bay_area_county_polygons.geojson')
fetch_json('https://data.sfgov.org/resource/9exe-acju.geojson', 'sfmta_routes.geojson')
fetch_json('https://api.censusreporter.org/1.0/data/show/latest?table_ids=B08201&geo_ids=140|05000US06075', 'vehicle_data.json')

Successfully saved JSON data: sf_tracts_new.geojson
Successfully saved JSON data: sf_population_new.json
Successfully saved JSON data: sf_water.geojson
Successfully saved JSON data: bay_area_county_polygons.geojson
Successfully saved JSON data: sfmta_routes.geojson
Successfully saved JSON data: vehicle_data.json


In [None]:
# --- 1. Define File Names ---
population_json_file = "../data/raw/sf_population.json"
tract_geojson_file = "../data/raw/sf_tracts.geojson"
output_geojson_file = "../data/sf_tracts_with_density.geojson" 

print("Starting processing...")

try:
    # --- 2. Load Population Data (JSON) ---
    with open(population_json_file, 'r') as f:
        pop_data = json.load(f)
    print("Loaded population JSON.")

    # --- 3. Parse the Population JSON ---
    POPULATION_KEY = 'B01003001'
    population_list = []
    
    for geoid, tract_data in pop_data['data'].items():
        try:
            population = tract_data['B01003']['estimate'][POPULATION_KEY]
            population_list.append({
                'geoid': geoid,
                'population': population
            })
        except KeyError:
            print(f"Warning: Could not find population data for geoid {geoid}")

    pop_df = pd.DataFrame(population_list)
    print("Parsed population data into a DataFrame.")

    # --- 4. Load Geographic Data (GeoJSON) ---
    gdf_tracts = gpd.read_file(tract_geojson_file)
    print("Loaded census tract GeoJSON.")

    # --- 5. Merge Population and Geographic Data ---
    merged_gdf = gdf_tracts.merge(pop_df, on='geoid')
    print("Successfully merged population data with GeoJSON.")

    # --- 6. Project, Calculate Area, and Calculate Density (IN SQ MILES) ---
    projected_gdf = merged_gdf.to_crs(epsg=3310) # Project to meters
    projected_gdf['area_sq_meters'] = projected_gdf.geometry.area
    
    # --- UPDATED CONVERSION ---
    # 1 square kilometer = 0.386102 square miles
    projected_gdf['area_sq_km'] = projected_gdf['area_sq_meters'] / 1_000_000
    projected_gdf['area_sq_miles'] = projected_gdf['area_sq_km'] * 0.386102
    
    # --- UPDATED DENSITY CALCULATION ---
    projected_gdf['population_density_sq_mi'] = projected_gdf.apply(
        lambda row: row['population'] / row['area_sq_miles'] if row['area_sq_miles'] > 0 else 0,
        axis=1
    )
    print("Calculated area (sq miles) and population density (per sq mile).")

    # --- 7. Project back to WGS84 (EPSG:4326) for D3 ---
    final_gdf = projected_gdf.to_crs(epsg=4326)
    print("Projected final data back to WGS84.")

    # --- 8. Save the Final File (with new columns) ---
    # --- UPDATED COLUMNS ---
    columns_to_keep = [
        'geometry',
        'geoid',
        'name',
        'population',
        'area_sq_miles', 
        'population_density_sq_mi' 
    ]
    
    final_gdf_clean = final_gdf[columns_to_keep]
    final_gdf_clean.to_file(output_geojson_file, driver='GeoJSON')

    print("\n--- SUCCESS ---")
    print(f"Successfully created final file: {output_geojson_file}")
    
    print("\nPreview of final data:")
    print(final_gdf_clean.drop(columns='geometry').head())

except FileNotFoundError as e:
    print(f"ERROR: File not found. Make sure this file is in the same folder: {e.filename}")
except Exception as e:
    print(f"An error occurred: {e}")

Starting processing...
Loaded population JSON.
Parsed population data into a DataFrame.
Loaded census tract GeoJSON.
Successfully merged population data with GeoJSON.
Calculated area (sq miles) and population density (per sq mile).
Projected final data back to WGS84.

--- SUCCESS ---
Successfully created final file: ../data/sf_tracts_with_density.geojson

Preview of final data:
                geoid                                    name  population  \
0  14000US06075010101  Census Tract 101.01, San Francisco, CA      2004.0   
1  14000US06075010102  Census Tract 101.02, San Francisco, CA      1795.0   
2  14000US06075010201  Census Tract 102.01, San Francisco, CA      2608.0   
3  14000US06075010202  Census Tract 102.02, San Francisco, CA      1761.0   
4  14000US06075010300     Census Tract 103, San Francisco, CA      3791.0   

   area_sq_miles  population_density_sq_mi  
0       0.518960               3861.572540  
1       0.030726              58420.042081  
2       0.072147     

In [None]:
# --- Configuration ---
bay_area_file_name = "../data/raw/bay_area_county_polygons.geojson"
output_file_name = "../data/sf_county_boundary.geojson"
# --- End Configuration ---

print(f"Loading Bay Area file: {bay_area_file_name}...")

try:
    # 1. Load the big Bay Area file
    with open(bay_area_file_name, 'r') as f:
        bay_area_data = json.load(f)

    # 2. Find the San Francisco feature
    sf_feature = None
    for feature in bay_area_data['features']:
        if feature.get('properties', {}).get('county') == 'San Francisco':
            sf_feature = feature
            print("Found 'San Francisco' feature!")
            break

    if sf_feature:
        # 3. Create a new, empty GeoJSON structure
        # We copy the 'crs' (Coordinate Reference System) from the original
        output_geojson = {
            "type": "FeatureCollection",
            "crs": bay_area_data.get('crs'), 
            "features": [sf_feature] # Add only the SF feature
        }

        # 5. Save the new, smaller file
        with open(output_file_name, 'w') as f:
            json.dump(output_geojson, f)
            
        print(f"\n--- SUCCESS ---")
        print(f"Successfully created '{output_file_name}'")
        print("This file is now ready to be used as your map's clipping mask.")

    else:
        print("ERROR: Could not find a feature with 'county': 'San Francisco'")

except FileNotFoundError:
    print(f"ERROR: Could not find the file '{bay_area_file_name}'.")
    print("Please make sure it's in the same directory as this script.")
except Exception as e:
    print(f"An error occurred: {e}")

Loading Bay Area file: ../data/bay_area_county_polygons.geojson...
Found 'San Francisco' feature!

--- SUCCESS ---
Successfully created '../data/sf_county_boundary.geojson'
This file is now ready to be used as your map's clipping mask.


In [66]:
# --- Configuration ---
routes_file = "../data/raw/sfmta_routes.geojson"
output_file = "geary_route.geojson"
output_folder = "../data" # Assumes you want to save it in your data folder
# We'll focus on the main local and rapid routes
routes_to_extract = ['38', '38R'] 
# --- End Configuration ---

print(f"Loading {routes_file}...")

try:
    # Load the original routes file
    with open(routes_file, 'r') as f:
        routes_data = json.load(f)

    print("File loaded. Searching for Geary routes...")

    # Create a list to hold the features we want to keep
    geary_features = []
    
    # Check if 'features' key exists and is a list
    if 'features' in routes_data and isinstance(routes_data['features'], list):
        # Loop through all features
        for feature in routes_data['features']:
            if 'properties' in feature:
                props = feature['properties']
                
                # Check if the 'route_name' is one we want
                if props.get('route_name') in routes_to_extract:
                    geary_features.append(feature)

    if geary_features:
        print(f"Found {len(geary_features)} features for routes: {routes_to_extract}")
        
        # Create a new, empty GeoJSON structure
        # We copy the 'crs' (Coordinate Reference System) from the original
        output_geojson = {
            "type": "FeatureCollection",
            "crs": routes_data.get('crs'), 
            "features": geary_features # Add only the Geary features
        }

        # 4. Make sure the 'data' folder exists
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
            print(f"Created '{output_folder}' directory.")

        # 5. Save the new, smaller file
        output_path = os.path.join(output_folder, output_file)
        with open(output_path, 'w') as f:
            json.dump(output_geojson, f)
            
        print(f"\n--- SUCCESS ---")
        print(f"Successfully created '{output_path}'")
        print("This file contains the geometry for the 38 and 38R routes.")

    else:
        print(f"ERROR: Could not find any features for routes: {routes_to_extract}")

except FileNotFoundError:
    print(f"ERROR: Could not find the file '{routes_file}'.")
    print("Please make sure it's in the same directory as this script.")
except Exception as e:
    print(f"An error occurred: {e}")

Loading ../data/raw/sfmta_routes.geojson...
File loaded. Searching for Geary routes...
Found 4 features for routes: ['38', '38R']

--- SUCCESS ---
Successfully created '../data\geary_route.geojson'
This file contains the geometry for the 38 and 38R routes.


In [None]:
# --- Configuration ---
routes_file = "../data/raw/sfmta_routes.geojson"
output_file = "sfmta_rail_lines.geojson"
output_folder = "../data"
# These are the light rail lines
routes_to_extract = ['J', 'K', 'L', 'M', 'N', 'T']
# --- End Configuration ---

print(f"Loading {routes_file}...")

try:
    with open(routes_file, 'r') as f:
        routes_data = json.load(f)

    print(f"File loaded. Searching for Light Rail routes: {routes_to_extract}")

    rail_features = []
    
    if 'features' in routes_data and isinstance(routes_data['features'], list):
        for feature in routes_data['features']:
            if 'properties' in feature:
                props = feature['properties']
                if props.get('route_name') in routes_to_extract:
                    rail_features.append(feature)

    if rail_features:
        print(f"Found {len(rail_features)} features for rail routes.")
        
        output_geojson = {
            "type": "FeatureCollection",
            "crs": routes_data.get('crs'), 
            "features": rail_features
        }

        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
            print(f"Created '{output_folder}' directory.")

        output_path = os.path.join(output_folder, output_file)
        with open(output_path, 'w') as f:
            json.dump(output_geojson, f)
            
        print(f"\n--- SUCCESS ---")
        print(f"Successfully created '{output_path}'")

    else:
        print(f"ERROR: Could not find any rail line features.")

except FileNotFoundError:
    print(f"ERROR: Could not find the file '{routes_file}'.")
except Exception as e:
    print(f"An error occurred: {e}")

Loading ../data/sfmta_routes.geojson...
File loaded. Searching for Light Rail routes: ['J', 'K', 'L', 'M', 'N', 'T']
Found 12 features for rail routes.

--- SUCCESS ---
Successfully created '../data\sfmta_rail_lines.geojson'


In [8]:
# --- Configuration ---
bart_stations_file = "../data/raw/bart_stations.geojson"
output_file_name = "sf_bart_stations.geojson"
output_folder = "../data"

# Stations to find
# We use 'Name' for the non-SF ones just to be safe
stations_to_keep = {
    'City': ['San Francisco'],
    'Name': ['Daly City', 'West Oakland']
}
# --- End Configuration ---

print(f"Loading BART stations file: {bart_stations_file}...")

try:
    with open(bart_stations_file, 'r') as f:
        bart_data = json.load(f)

    sf_station_features = []
    
    if 'features' in bart_data and isinstance(bart_data['features'], list):
        for feature in bart_data['features']:
            props = feature.get('properties', {})
            
            # Check if it's an SF station OR one of our named stations
            if (props.get('City') in stations_to_keep['City'] or 
                props.get('Name') in stations_to_keep['Name']):
                
                sf_station_features.append(feature)
    
    if sf_station_features:
        print(f"Found {len(sf_station_features)} stations (SF + Daly City + West Oakland).")
        
        output_geojson = {
            "type": "FeatureCollection",
            "crs": bart_data.get('crs'), 
            "features": sf_station_features
        }

        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        output_path = os.path.join(output_folder, output_file_name)
        with open(output_path, 'w') as f:
            json.dump(output_geojson, f)
            
        print(f"\n--- SUCCESS ---")
        print(f"Successfully created '{output_path}'")

    else:
        print("ERROR: Could not find any of the target stations.")

except FileNotFoundError:
    print(f"ERROR: Could not find the file '{bart_stations_file}'.")
except Exception as e:
    print(f"An error occurred: {e}")

Loading BART stations file: ../data/raw/bart_stations.geojson...
Found 10 stations (SF + Daly City + West Oakland).

--- SUCCESS ---
Successfully created '../data\sf_bart_stations.geojson'


In [28]:
# --- 1. Define File Names ---
ridership_file = "../data/raw/sfmta_ridership.csv"
output_file = "corridor_ridership_summary.json"
output_folder = "../data"

print(f"Loading {ridership_file}...")

try:
    df = pd.read_csv(ridership_file)

    # --- 2. Clean Data ---
    # Convert 'Average Daily Boardings' to a clean number
    df['Average Daily Boardings'] = df['Average Daily Boardings'].str.replace(',', '')
    df['Average Daily Boardings'] = pd.to_numeric(df['Average Daily Boardings'], errors='coerce')
    df.dropna(subset=['Average Daily Boardings'], inplace=True)

    # Convert 'Month' to datetime to find the year
    df['Month'] = pd.to_datetime(df['Month'], format='%B %Y')
    df['Year'] = df['Month'].dt.year
    
    # Use 2024 as the most recent full year
    target_year = 2024
    print(f"Using data from the most recent full year: {target_year}")
    
    # Filter for target year, weekdays, and only bus routes
    non_bus_categories = ['Muni Metro', 'Cable Car', 'Historic Streetcar']
    bus_df = df[
        (df['Year'] == target_year) &
        (df['Service Day of the Week'] == 'Weekday') &
        (~df['Service Category'].isin(non_bus_categories))
    ].copy()

    # --- 3. Aggregate into Corridors ---
    # Get the yearly average for each route
    avg_annual_ridership = bus_df.groupby('Route')['Average Daily Boardings'].mean().reset_index()
    
    # Define corridor groups
    corridor_map = {
        '38 Geary': 'Geary Corridor',
        '38R Geary Rapid': 'Geary Corridor',
        '14 Mission': 'Mission Corridor',
        '14R Mission Rapid': 'Mission Corridor',
    }
    
    # Map routes to corridors, fill unmapped routes with their own name
    avg_annual_ridership['Corridor'] = avg_annual_ridership['Route'].map(corridor_map).fillna(avg_annual_ridership['Route'])
    
    # Group by the new 'Corridor' and sum the boardings
    corridor_totals = avg_annual_ridership.groupby('Corridor')['Average Daily Boardings'].sum().reset_index()
    corridor_totals.rename(columns={'Average Daily Boardings': 'Bus_Ridership'}, inplace=True)

    # --- 4. Manually Add BART Ridership Data ---
    # Based on our analysis of BART data (16th St + 24th St)
    BART_MISSION_RIDERSHIP = 12690
    
    corridor_totals['BART_Ridership'] = 0
    
    # Find the index for Mission Corridor and add BART ridership
    mission_index = corridor_totals.index[corridor_totals['Corridor'] == 'Mission Corridor']
    if not mission_index.empty:
        corridor_totals.at[mission_index[0], 'BART_Ridership'] = BART_MISSION_RIDERSHIP

    van_ness_index = corridor_totals.index[corridor_totals['Corridor'] == '49 Van Ness/Mission']
    if not van_ness_index.empty:
        corridor_totals.at[van_ness_index[0], 'BART_Ridership'] = BART_MISSION_RIDERSHIP
    
    # --- 5. Final Calculations and Sorting ---
    corridor_totals['Total_Ridership'] = corridor_totals['Bus_Ridership'] + corridor_totals['BART_Ridership']
    
    # Get the Top 10 Corridors
    top_10_corridors = corridor_totals.sort_values(by='Total_Ridership', ascending=False).head(10).copy()
    
    # Add a highlight column for D3
    top_10_corridors['Highlight'] = top_10_corridors['Corridor'].apply(
        lambda x: 'Geary Corridor' if 'Geary' in x else 'Other Corridors'
    )
    
    print("\n--- Top 10 Corridors by Total Weekday Ridership (2024) ---")
    print(top_10_corridors.to_string(index=False))

    # --- 6. Save to JSON for D3 ---
    # Convert DataFrame to a list of dictionaries (records)
    output_data = top_10_corridors.to_dict(orient='records')
    
    # Make sure 'data' folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created '{output_folder}' directory.")
        
    output_path = os.path.join(output_folder, output_file)
    with open(output_path, 'w') as f:
        json.dump(output_data, f, indent=2)
        
    print(f"\nSuccessfully created summary JSON file: {output_path}")

except FileNotFoundError:
    print(f"ERROR: Could not find the file '{ridership_file}'.")
    print("Please make sure it's in the same directory as this script.")
except Exception as e:
    print(f"An error occurred: {e}")

Loading ../data/raw/sfmta_ridership.csv...
Using data from the most recent full year: 2024

--- Top 10 Corridors by Total Weekday Ridership (2024) ---
           Corridor  Bus_Ridership  BART_Ridership  Total_Ridership       Highlight
   Mission Corridor   46316.666667           12690     59006.666667 Other Corridors
49 Van Ness/Mission   33925.000000           12690     46615.000000 Other Corridors
     Geary Corridor   41808.333333               0     41808.333333  Geary Corridor
        22 Fillmore   21783.333333               0     21783.333333 Other Corridors
         8 Bayshore   17758.333333               0     17758.333333 Other Corridors
       1 California   17408.333333               0     17408.333333 Other Corridors
        30 Stockton   16391.666667               0     16391.666667 Other Corridors
          29 Sunset   15400.000000               0     15400.000000 Other Corridors
   44 O'Shaughnessy   12241.666667               0     12241.666667 Other Corridors
   7 Haig

In [55]:
# --- 1. Define File Names ---
ridership_file = "../data/raw/sfmta_ridership.csv"
output_file = "corridor_growth_data.json"
output_folder = "../data"

print(f"Loading {ridership_file}...")

try:
    df = pd.read_csv(ridership_file)

    # --- 2. Clean Data ---
    df['Average Daily Boardings'] = df['Average Daily Boardings'].astype(str).str.replace(',', '')
    df['Average Daily Boardings'] = pd.to_numeric(df['Average Daily Boardings'], errors='coerce')
    df.dropna(subset=['Average Daily Boardings'], inplace=True)
    df['Average Daily Boardings'] = df['Average Daily Boardings'].astype(int)

    df['Month'] = pd.to_datetime(df['Month'], format='%B %Y')
    df['Year'] = df['Month'].dt.year
    
    # Filter for Weekday and Bus Routes only
    non_bus_categories = ['Muni Metro', 'Cable Car', 'Historic Streetcar']
    bus_df = df[
        (df['Service Day of the Week'] == 'Weekday') &
        (~df['Service Category'].isin(non_bus_categories))
    ].copy()

    # --- 3. Aggregate into Corridors ---
    
    # Define corridor groups
    # We now group 14, 14R, and 49 into the Mission Corridor
    corridor_map = {
        '38 Geary': 'Geary Corridor',
        '38R Geary Rapid': 'Geary Corridor',
        '14 Mission': 'Mission Corridor',
        '14R Mission Rapid': 'Mission Corridor',
        '49 Van Ness/Mission': '49 Van Ness/Mission',
        '22 Fillmore': '22 Fillmore',
        '8 Bayshore': '8 Bayshore',
        '1 California': '1 California',
    }
    
    # Map routes to corridors
    bus_df['Corridor'] = bus_df['Route'].map(corridor_map)
    
    # Filter out routes that are not in our main corridors
    main_corridors = [
        'Geary Corridor', 
        'Mission Corridor',
        '49 Van Ness/Mission',
        '22 Fillmore', 
        '8 Bayshore', 
        '1 California',
    ]
    bus_df = bus_df[bus_df['Corridor'].isin(main_corridors)]

    # --- 4. Group by Year and Corridor ---
    # We must calculate the *annual average* for each route first,
    # then sum those averages into corridors.
    
    # Get the average for each route for each year
    annual_avg_route = bus_df.groupby(['Year', 'Route'])['Average Daily Boardings'].mean().reset_index()
    
    # Map the routes to corridors
    annual_avg_route['Corridor'] = annual_avg_route['Route'].map(corridor_map)
    
    # Finally, sum the averages by Year and Corridor
    corridor_growth = annual_avg_route.groupby(['Year', 'Corridor'])['Average Daily Boardings'].sum().reset_index()
    
    # --- 5. Format Data for D3 (Nesting) ---
    # This creates the [ {corridor: "Name", values: [ {year: Y, ridership: R}, ... ]}, ... ] structure
    
    d3_data = []
    
    for corridor_name in corridor_growth['Corridor'].unique():
        corridor_data = corridor_growth[corridor_growth['Corridor'] == corridor_name]
        
        values_list = []
        for index, row in corridor_data.iterrows():
            values_list.append({
                "year": int(row['Year']),
                "ridership": int(row['Average Daily Boardings'])
            })
        
        d3_data.append({
            "corridor": corridor_name,
            "values": values_list
        })
        
    print("\n--- Processed Corridor Growth Data (Sample) ---")
    print(json.dumps(d3_data, indent=2))

    # --- 6. Save to JSON for D3 ---
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print(f"Created '{output_folder}' directory.")
        
    output_path = os.path.join(output_folder, output_file)
    with open(output_path, 'w') as f:
        json.dump(d3_data, f, indent=2)
        
    print(f"\nSuccessfully created summary JSON file: {output_path}")

except FileNotFoundError:
    print(f"ERROR: Could not find the file '{ridership_file}'.")
    print("Please make sure it's in the same directory as this script.")
except Exception as e:
    print(f"An error occurred: {e}")

Loading ../data/raw/sfmta_ridership.csv...

--- Processed Corridor Growth Data (Sample) ---
[
  {
    "corridor": "1 California",
    "values": [
      {
        "year": 2019,
        "ridership": 22716
      },
      {
        "year": 2020,
        "ridership": 9375
      },
      {
        "year": 2021,
        "ridership": 10100
      },
      {
        "year": 2022,
        "ridership": 13783
      },
      {
        "year": 2023,
        "ridership": 15816
      },
      {
        "year": 2024,
        "ridership": 17408
      },
      {
        "year": 2025,
        "ridership": 18620
      }
    ]
  },
  {
    "corridor": "22 Fillmore",
    "values": [
      {
        "year": 2019,
        "ridership": 17758
      },
      {
        "year": 2020,
        "ridership": 10158
      },
      {
        "year": 2021,
        "ridership": 11775
      },
      {
        "year": 2022,
        "ridership": 16741
      },
      {
        "year": 2023,
        "ridership": 19625
      },
  

In [70]:
# --- Configuration ---
# Your existing map file (the one with density)
base_map_file = "../data/sf_tracts_with_density.geojson" 
# The new file you just downloaded
vehicle_data_file = "../data/raw/vehicle_data.json"
# The new file we will create
output_file = "../data/sf_map_data.json" 

# Census table keys
TOTAL_HOUSEHOLDS_KEY = 'B08201001'
NO_VEHICLES_KEY = 'B08201002'
# --- End Configuration ---

print("Starting to merge equity data...")

try:
    # 1. Load the new vehicle data JSON
    with open(vehicle_data_file, 'r') as f:
        vehicle_data = json.load(f)
    print("Loaded vehicle_data.json")

    # 2. Parse the vehicle data into a list
    equity_list = []
    for geoid, tract_data in vehicle_data['data'].items():
        try:
            total_households = tract_data['B08201']['estimate'][TOTAL_HOUSEHOLDS_KEY]
            no_vehicles = tract_data['B08201']['estimate'][NO_VEHICLES_KEY]
            
            # Calculate percentage
            percent_no_vehicle = (no_vehicles / total_households) * 100 if total_households > 0 else 0
            
            equity_list.append({
                'geoid': geoid,
                'total_households': total_households,
                'no_vehicles': no_vehicles,
                'percent_no_vehicle': percent_no_vehicle
            })
        except KeyError:
            print(f"Warning: Could not find vehicle data for geoid {geoid}")

    # 3. Convert list to a DataFrame for merging
    import pandas as pd
    equity_df = pd.DataFrame(equity_list)
    
    # 4. Load your base GeoJSON file
    gdf = gpd.read_file(base_map_file)
    print("Loaded base map file (sf_tracts_with_density.geojson)")
    
    # 5. Merge the new data
    # The 'geoid' in our new data matches the 'geoid' in your map file
    merged_gdf = gdf.merge(equity_df, on='geoid')
    print("Successfully merged density data and equity data.")

    # 6. Save the new, final GeoJSON file
    merged_gdf.to_file(output_file, driver='GeoJSON')
    
    print(f"\n--- SUCCESS ---")
    print(f"Successfully created final file: {output_file}")
    print("This file contains all data (density, equity, etc.)")
    print("\nPreview of final data properties:")
    print(merged_gdf.drop(columns='geometry').head())

except FileNotFoundError as e:
    print(f"ERROR: File not found. Make sure this file is in your data folder: {e.filename}")
except ImportError:
    print("\n--- ERROR ---")
    print("You need 'geopandas' and 'pandas' for this script.")
    print("Please run: pip install pandas geopandas")
except Exception as e:
    print(f"An error occurred: {e}")

Starting to merge equity data...
Loaded vehicle_data.json
Loaded base map file (sf_tracts_with_density.geojson)
Successfully merged density data and equity data.

--- SUCCESS ---
Successfully created final file: ../data/sf_map_data.json
This file contains all data (density, equity, etc.)

Preview of final data properties:
                geoid                                    name  population  \
0  14000US06075010101  Census Tract 101.01, San Francisco, CA      2004.0   
1  14000US06075010102  Census Tract 101.02, San Francisco, CA      1795.0   
2  14000US06075010201  Census Tract 102.01, San Francisco, CA      2608.0   
3  14000US06075010202  Census Tract 102.02, San Francisco, CA      1761.0   
4  14000US06075010300     Census Tract 103, San Francisco, CA      3791.0   

   area_sq_miles  population_density_sq_mi  total_households  no_vehicles  \
0       0.518960               3861.572540            1212.0        459.0   
1       0.030726              58420.042081             907.