## III. Phase3: Census Tract Transit Characteristics Mapping

**1. Goal:**

To analyze the distribution of potential public transit access across Georgia census tracts based on data collected from the Google Places API (Phase 1) and align these metrics with the concepts in the Georgia QAP Community Transportation Options section. The objective is to create GeoJSON files suitable for mapping census tract-level indicators of transit availability, specifically:

- The density of potential transit stops.
- The density of potential transit hubs.
- The approximate area coverage within 0.5 miles of potential stops.
- The approximate area coverage within 0.5 miles of potential hubs.
These maps are intended to help visualize areas with higher transit potential relevant to QAP scoring components.

**2. Inputs:**

*   **`georgia_transit_locations_with_hub.csv`:** A CSV file (output of Phase 1 script) containing a list of potential transit locations across Georgia, identified via Google Places API. Must include columns for `latitude`, `longitude`, `place_id`, and `is_potential_hub`.
*   **Georgia Census Tract Boundaries File:** A geospatial file (e.g., Shapefile `.shp` or GeoJSON `.geojson`) defining the polygon boundaries for all census tracts in Georgia. Must contain a unique Census Tract identifier column (e.g., 'GEOID') and geometry information. (Example Source: US Census Bureau TIGER/Line).

**3. Outputs:**

GeoJSON files saved in the `communi_trans_geo` directory, where each feature represents a Georgia census tract:

*   **`georgia_tract_stop_density.geojson`:** Contains tract geometry and a `stop_density` property (integer count of all potential transit stops within the tract).
*   **`georgia_tract_hub_density.geojson`:** Contains tract geometry and a `hub_density` property (integer count of potential transit hubs within the tract).
*   **`georgia_tract_stop_coverage_0_5mi.geojson`:** Contains tract geometry and a `stop_coverage_pct` property (float percentage, 0-100, of the tract's area within a 0.5-mile straight-line buffer of any potential stop).
*   **`georgia_tract_hub_coverage_0_5mi.geojson`:** Contains tract geometry and a `hub_coverage_pct` property (float percentage, 0-100, of the tract's area within a 0.5-mile straight-line buffer of any potential hub).
*   **`georgia_tract_community_transportation_place.geojson`:** A combined file containing tract geometry and all four calculated properties (`stop_density`, `hub_density`, `stop_coverage_pct`, `hub_coverage_pct`).




---
# Code

In [3]:
# pip install geopandas shapely fiona pyproj pandas

import os
import pandas as pd
import geopandas as gpd
import warnings
from shapely.validation import make_valid
from shapely.errors import GEOSException


In [4]:

# --- CONFIGURATION ---

PHASE1_CSV_FILE = "georgia_transit_locations_with_hub_20250421_0949.csv"
TRACT_FILE_PATH = "../LIHTC-Project/data/raw/shapefiles/tl_2024_13_tract/tl_2024_13_tract.shp"

OUTPUT_DIR = "communi_trans_geo"
os.makedirs(OUTPUT_DIR, exist_ok=True)

STOP_DENSITY_GEOJSON   = os.path.join(OUTPUT_DIR, "georgia_tract_stop_density.geojson")
HUB_DENSITY_GEOJSON    = os.path.join(OUTPUT_DIR, "georgia_tract_hub_density.geojson")
STOP_COVERAGE_GEOJSON  = os.path.join(OUTPUT_DIR, "georgia_tract_stop_coverage_0_5mi.geojson")
HUB_COVERAGE_GEOJSON   = os.path.join(OUTPUT_DIR, "georgia_tract_hub_coverage_0_5mi.geojson")
COMBINED_GEOJSON       = os.path.join(OUTPUT_DIR, "georgia_tract_community_transportation_place.geojson")

BUFFER_DISTANCE_MILES = 0.5
BUFFER_DISTANCE_METERS = BUFFER_DISTANCE_MILES * 1609.34
PROJECTED_CRS = "EPSG:26917"

warnings.filterwarnings("ignore", message=".*invalid value encountered in intersection.*")

# --- LOAD TRANSIT STOPS ---

print(f"\nLoading transit stops from {PHASE1_CSV_FILE}...")
transit_df = pd.read_csv(PHASE1_CSV_FILE)
transit_df['latitude'] = pd.to_numeric(transit_df['latitude'], errors='coerce')
transit_df['longitude'] = pd.to_numeric(transit_df['longitude'], errors='coerce')
transit_df['is_potential_hub'] = transit_df['is_potential_hub'].astype(str).str.lower().isin(['true', '1', 't', 'y', 'yes'])

original_rows = len(transit_df)
transit_df.dropna(subset=['latitude', 'longitude'], inplace=True)
print(f"  Dropped {original_rows - len(transit_df)} invalid rows. Remaining: {len(transit_df)}")

gdf_stops = gpd.GeoDataFrame(
    transit_df,
    geometry=gpd.points_from_xy(transit_df.longitude, transit_df.latitude),
    crs="EPSG:4326"
)

# --- LOAD TRACTS ---

print(f"\nLoading Census tracts from {TRACT_FILE_PATH}...")
gdf_tracts = gpd.read_file(TRACT_FILE_PATH)
gdf_tracts = gdf_tracts.rename(columns={"GEOID": "GEOID"})[['GEOID', 'geometry']]

# --- REPROJECT TO METRIC CRS ---

print(f"\nReprojecting to {PROJECTED_CRS}...")
gdf_stops = gdf_stops.to_crs(PROJECTED_CRS)
gdf_tracts = gdf_tracts.to_crs(PROJECTED_CRS)

# --- CALCULATE AREA ---

gdf_tracts['total_tract_area_sqm'] = gdf_tracts.geometry.area
gdf_tracts = gdf_tracts[gdf_tracts['total_tract_area_sqm'] > 0]

# --- DENSITY CALCULATION ---

print("\nCalculating stop and hub densities...")
joined = gpd.sjoin(gdf_stops, gdf_tracts[['GEOID', 'geometry']], predicate="within", how="inner")

stop_density = joined.groupby("GEOID").size().rename("stop_density")
hub_density = joined[joined["is_potential_hub"]].groupby("GEOID").size().rename("hub_density")

gdf = gdf_tracts.merge(stop_density, on="GEOID", how="left").merge(hub_density, on="GEOID", how="left")
gdf["stop_density"] = gdf["stop_density"].fillna(0).astype(int)
gdf["hub_density"] = gdf["hub_density"].fillna(0).astype(int)

# --- BUFFER COVERAGE ---

def dissolve_valid_buffers(gdf_points, buffer_dist):
    if gdf_points.empty:
        return None
    buffers = gdf_points.buffer(buffer_dist)
    buffers = buffers.apply(lambda geom: make_valid(geom) if not geom.is_valid else geom)
    return buffers.unary_union

print("\nCalculating stop coverage...")
dissolved_stop = dissolve_valid_buffers(gdf_stops, BUFFER_DISTANCE_METERS)
if dissolved_stop:
    intersection_area = gdf.geometry.intersection(dissolved_stop).area
    gdf["stop_coverage_pct"] = (intersection_area / gdf["total_tract_area_sqm"]) * 100
    gdf["stop_coverage_pct"] = gdf["stop_coverage_pct"].clip(0, 100)
else:
    gdf["stop_coverage_pct"] = 0.0

print("Calculating hub coverage...")
gdf_hubs = gdf_stops[gdf_stops["is_potential_hub"]]
dissolved_hub = dissolve_valid_buffers(gdf_hubs, BUFFER_DISTANCE_METERS)
if dissolved_hub:
    intersection_area = gdf.geometry.intersection(dissolved_hub).area
    gdf["hub_coverage_pct"] = (intersection_area / gdf["total_tract_area_sqm"]) * 100
    gdf["hub_coverage_pct"] = gdf["hub_coverage_pct"].clip(0, 100)
else:
    gdf["hub_coverage_pct"] = 0.0

# --- SAVE OUTPUTS ---

print("\nSaving GeoJSON outputs...")
gdf[["GEOID", "geometry", "stop_density"]].to_file(STOP_DENSITY_GEOJSON, driver="GeoJSON")
gdf[["GEOID", "geometry", "hub_density"]].to_file(HUB_DENSITY_GEOJSON, driver="GeoJSON")
gdf[["GEOID", "geometry", "stop_coverage_pct"]].to_file(STOP_COVERAGE_GEOJSON, driver="GeoJSON")
gdf[["GEOID", "geometry", "hub_coverage_pct"]].to_file(HUB_COVERAGE_GEOJSON, driver="GeoJSON")
gdf[["GEOID", "geometry", "stop_density", "hub_density", "stop_coverage_pct", "hub_coverage_pct"]].to_file(COMBINED_GEOJSON, driver="GeoJSON")

print("\n Transit coverage and density analysis complete.")
print(f"GeoJSON files saved in: {OUTPUT_DIR}")


Loading transit stops from georgia_transit_locations_with_hub_20250421_0949.csv...
  Dropped 0 invalid rows. Remaining: 45554

Loading Census tracts from ../LIHTC-Project/data/raw/shapefiles/tl_2024_13_tract/tl_2024_13_tract.shp...

Reprojecting to EPSG:26917...

Calculating stop and hub densities...

Calculating stop coverage...


  return buffers.unary_union


Calculating hub coverage...


  return buffers.unary_union



Saving GeoJSON outputs...

 Transit coverage and density analysis complete.
GeoJSON files saved in: communi_trans_geo
