In [1]:
from pathlib import Path
import geopandas as gpd
from geopandas.tools import sjoin_nearest

# Importing Data

### File Paths

In [2]:
# Main Score Mapping Data Path
main_path = Path("../../data/maps/total_location_score/LIHTC-Project_data_map_final_scoring_by_category_multiproc.geojson")

# Desirable/Undesirable Activities Paths 
desirable_undesirable_scores_path = Path("../../data/maps/desirable_undesirable_activities/desirable_undesirable_scores.geojson")
desirable_places_path = Path("../../data/maps/desirable_undesirable_activities/desirable_activities_google_places_v3.geojson")
undesirable_places_path = Path("../../data/maps/desirable_undesirable_activities/undesirable_hsi_tri_cdr_rcra_frs_google_places.geojson")
food_desert_path = Path("../../data/maps/desirable_undesirable_activities/food_access_research_atlas.geojson")

# Housing Needs Characteristics Paths 
housing_need_characteristics_path = Path("../../data/maps/housing_need_characteristics/merged_housing_data.geojson")

# Stable Communities Paths
stable_path = Path("../../data/maps/stable_communities/stable_communities_scores_geo.geojson")
environmental_path = Path("../../data/maps/stable_communities/environmental_health_index_2024.geojson")
poverty_path = Path("../../data/maps/stable_communities/above_poverty_level_2024.geojson")
job_path = Path("../../data/maps/stable_communities/jobs_proximity_index_2024.geojson")
income_path = Path("../../data/maps/stable_communities/median_income_2024.geojson")
transit_path = Path("../../data/maps/stable_communities/transit_access_index_2024.geojson")

# Census Tracts Shapefile Path 
ga_tracts_path = "../../data/raw/shapefiles/tl_2024_13_tract/tl_2024_13_tract.shp"

# Applicants Path
applicants_path = Path("../../data/maps/application_list_2022_2023_2024_v2.geojson")

### Load Data

In [None]:
# Main Score Mapping Data
gdf_main_scores = gpd.read_file(main_path)

# Desirable/Undesirable Activities 
gdf_desirable_undesirable_scores = gpd.read_file(desirable_undesirable_scores_path)
gdf_desirable_places = gpd.read_file(desirable_places_path)
gdf_undesirable_places = gpd.read_file(undesirable_places_path)
gdf_food_desert = gpd.read_file(food_desert_path).to_crs("EPSG:4326")

# Housing Needs Characteristics Paths 
gdf_housing_need = gpd.read_file(housing_need_characteristics_path).to_crs("EPSG:4326")

# Stable Communities
gdf_stable_score = gpd.read_file(stable_path).to_crs("EPSG:4326")
gdf_environmental = gpd.read_file(environmental_path).to_crs("EPSG:4326")
gdf_poverty = gpd.read_file(poverty_path).to_crs("EPSG:4326")
gdf_job = gpd.read_file(job_path).to_crs("EPSG:4326")
gdf_income = gpd.read_file(income_path).to_crs("EPSG:4326")
gdf_transit = gpd.read_file(transit_path).to_crs("EPSG:4326")

# Census Tracts 
ga_tracts = gpd.read_file(ga_tracts_path).to_crs("EPSG:4326")

# Applicants 
gdf_applicants = gpd.read_file(applicants_path).to_crs("EPSG:4326")

# Preparing Data

### Add GEOID (census tract ID) to main files and filtering to just tracts in metro Atlanta

In [None]:
def attach_geoid(points_gdf, tracts=ga_tracts):
    points_gdf = points_gdf.to_crs("EPSG:4326")
    joined = gpd.sjoin(
        points_gdf, tracts[["GEOID", "geometry"]],
        how="left", predicate="within"
    ).drop(columns="index_right")
    return joined

gdf_main_geo = attach_geoid(gdf_main_scores)

In [None]:
# Define Metro ATL counties
metro_atlanta_counties = {
    "Cherokee": "057", "Clayton": "063", "Cobb": "067", "DeKalb": "089", "Douglas": "097",
    "Fayette": "113", "Forsyth": "117", "Fulton": "121", "Gwinnett": "135", "Rockdale": "247"
}

# Filter tracts to Metro ATL
metro_tracts = ga_tracts[ga_tracts["COUNTYFP"].isin(metro_atlanta_counties.values())]

# Dissolve into one geometry per county
metro_county_polygons = metro_tracts.dissolve(by="COUNTYFP").reset_index()

# dissolve to a single union if you just want inclusion in "any metro ATL county"
metro_union = metro_tracts.unary_union

# Ensure gdf_main is a GeoDataFrame and in same CRS
gdf_main_geo = gdf_main_geo.set_geometry("geometry").set_crs("EPSG:4326")

# Filter to points inside the Metro ATL union geometry
gdf_main_filtered = gdf_main_geo[gdf_main_geo.within(metro_union)]

  metro_union = metro_tracts.unary_union


In [None]:
# Stable Communities Indicators 
gdf_environmental_filtered = gdf_environmental[gdf_environmental.within(metro_union)]
gdf_poverty_filtered = gdf_poverty[gdf_poverty.within(metro_union)]
gdf_job_filtered = gdf_job[gdf_job.within(metro_union)]
gdf_income_filtered = gdf_income[gdf_income.within(metro_union)]
gdf_transit_filtered = gdf_transit[gdf_transit.within(metro_union)]

# Housing Needs Characteristics Indicators
gdf_housing_filtered = gdf_housing_need[gdf_housing_need.within(metro_union)]

# Desirable/Undesirable Activities Indicators
gdf_desirable_places_filtered = gdf_desirable_places[gdf_desirable_places.within(metro_union)]
gdf_undesirable_places_filtered = gdf_undesirable_places[gdf_undesirable_places.within(metro_union)]
gdf_food_desert_filtered = gdf_food_desert[gdf_food_desert.within(metro_union)]

# # Application Data
gdf_applicants_filtered = gdf_applicants[gdf_applicants.within(metro_union)]

In [9]:
# Create separate GeoDataFrames for each scoring category
gdf_transport_filtered = gdf_main_filtered[gdf_main_filtered["scoring_category"] == "Community Transportation Options"].copy()
gdf_desirable_filtered = gdf_main_filtered[gdf_main_filtered["scoring_category"] == "Desirable/Undesirable Activities"].copy()
gdf_stable_filtered = gdf_main_filtered[gdf_main_filtered["scoring_category"] == "Stable Communities"].copy()
gdf_need_filtered = gdf_main_filtered[gdf_main_filtered["scoring_category"] == "Housing Need Characteristics"].copy()
gdf_education_filtered = gdf_main_filtered[gdf_main_filtered["scoring_category"] == "Quality Education"].copy()
gdf_total_filtered = gdf_main_filtered[gdf_main_filtered["scoring_category"] == "Total Score"].copy()

### Rolling up lat/lon data to census tract level for relevant scoring categories 

In [10]:
# Rolling up lat/lon data to census tract level for relevant scoring categories 

gdf_stable_filtered_mean = (
    gdf_stable_filtered[["GEOID", "score"]]
    .groupby("GEOID", as_index=False)["score"]
    .mean()
)

gdf_education_filtered_mean = (
    gdf_education_filtered[["GEOID", "score"]]
    .groupby("GEOID", as_index=False)["score"]
    .mean()
)


In [11]:
gdf_stable_filtered_mean = (
    gdf_stable_filtered_mean      
      .merge(ga_tracts[["GEOID", "geometry"]], on="GEOID", how="left")  
)

gdf_education_filtered_mean = (
    gdf_education_filtered_mean      
      .merge(ga_tracts[["GEOID", "geometry"]], on="GEOID", how="left")  
)

In [12]:
gdf_stable_final = gpd.GeoDataFrame(
    gdf_stable_filtered_mean,
    geometry="geometry",
    crs="EPSG:4326"
)

gdf_education_final = gpd.GeoDataFrame(
    gdf_education_filtered_mean,
    geometry="geometry",
    crs="EPSG:4326"
)

### Adding Missing Tracts in Stable Communities Data

In [13]:
gdf_stable_merged = metro_tracts.merge(
    gdf_stable_final[['score', "GEOID"]],
    how="left",
    on="GEOID"
)

score_column = 'score'
missing_gdf = gdf_stable_merged[gdf_stable_merged[score_column].isna()]
scored_gdf = gdf_stable_merged[gdf_stable_merged[score_column].notna()]

# Spatial join to get nearest scores
nearest_matches = sjoin_nearest(
    missing_gdf,
    scored_gdf[['score', 'geometry']],
    how='left',
    distance_col='dist_to_nearest'
)

# Assign nearest scores to missing tracts
gdf_stable_merged.loc[missing_gdf.index, 'score'] = nearest_matches['score_right'].values[:len(missing_gdf)]

# Assign the source of the score

gdf_stable_merged['score_source'] = 'Original'
gdf_stable_merged.loc[missing_gdf.index, 'score_source'] = 'Imputed'




# Saving Data

In [None]:
# Total Score
## Score
gdf_total_filtered.to_file("../../data/maps/total_location_score/total_score_metro_atl.geojson", driver="GeoJSON")

# Community Transportation Options
## Score
gdf_transport_filtered.to_file("../../data/maps/community_transportation_options/transportation_options_score_metro_atl.geojson", driver="GeoJSON")

# Desirable/Undesirable Activities 
## Score
gdf_desirable_filtered.to_file("../../data/maps/desirable_undesirable_activities/desirable_undesirable_score_metro_atl.geojson", driver="GeoJSON")
## Desirable incidators
gdf_desirable_places_filtered.to_file("../../data/maps/desirable_undesirable_activities/desirable_places_metro_atl.geojson", driver="GeoJSON")
## Undesirable indicators
gdf_undesirable_places_filtered.to_file("../../data/maps/desirable_undesirable_activities/undesirable_places_metro_atl.geojson", driver="GeoJSON")
## Food desert indicator
gdf_food_desert_filtered.to_file("../../data/maps/desirable_undesirable_activities/food_deserts_metro_atl.geojson", driver="GeoJSON")


# Housing Need Characteristics 
## Score
gdf_need_filtered.to_file("../../data/maps/housing_need_characteristics/housing_need_score_metro_atl.geojson", driver="GeoJSON")
## Housing Need Characteristics indicators
gdf_housing_filtered.to_file("../../data/maps/housing_need_characteristics/housing_need_indicators_metro_atl.geojson", driver="GeoJSON")

# Quality Education 
## Score
gdf_education_final.to_file("../../data/maps/quality_education_areas/education_score_metro_atl.geojson", driver="GeoJSON")

# Stable Communities
## Score
gdf_stable_merged.to_file("../../data/maps/stable_communities/stable_communities_score_metro_atl.geojson", driver="GeoJSON")
## Stable Communities indicators
gdf_environmental_filtered.to_file("../../data/maps/stable_communities/environmental_health_index_metro_atl.geojson", driver="GeoJSON")
gdf_job_filtered.to_file("../../data/maps/stable_communities/jobs_proximity_index_metro_atl.geojson", driver="GeoJSON")
gdf_poverty_filtered.to_file("../../data/maps/stable_communities/above_poverty_level_metro_atl.geojson", driver="GeoJSON")
gdf_income_filtered.to_file("../../data/maps/stable_communities/median_income_metro_atl.geojson", driver="GeoJSON")
gdf_transit_filtered.to_file("../../data/maps/stable_communities/transit_access_index_metro_atl.geojson", driver="GeoJSON")

# Applicants
gdf_applicants_filtered.to_file("../../data/maps/application_list_2022_2023_2024_metro_atl.geojson", driver="GeoJSON")