In [108]:
import geopandas as gpd
import pandas as pd

precinct_shapes = gpd.read_file('data/master_precinct_shapes.csv')
gotv_targets = pd.read_csv('data/gotv_targets.csv', dtype={'van_precinct_id': str, 'targets':int})

# filter only columns from precinct_shapes that we want
shapes_filtered = precinct_shapes[['van_precinct_name', 'van_precinct_id','PrcnctName', 'GEOID']].copy()

# rename PrcnctName to precinct_name_doe
shapes_filtered = shapes_filtered.rename(columns={'PrcnctName': 'precinct_name_doe'})

# # filter only data we want from turfs_precincts
# turfs_filtered = turfed_precincts[['van_precinct_id', 'county_name', 'van_precinct_name', 
#                                   'Current Region', 'Current Turf', 'HDs', 'voters', 'supporters']].copy()

# left join the geometries onto the turf data
merged_gdf = gotv_targets.merge(shapes_filtered, on='van_precinct_id', how='left')

# convert to gdf
# merged_gdf = gpd.GeoDataFrame(merged_gdf, crs='EPSG:4326')

In [109]:
water_shapes = gpd.read_file('data/clipped_targets_sans_water.csv')

In [110]:
water_shapes_filtered = water_shapes[['WKT','van_precinct_id']].copy()
merged_gdf_water = merged_gdf.merge(water_shapes_filtered, on='van_precinct_id', how='left')

In [111]:
hd_precincts = pd.read_csv('data/hd_precincts.csv', dtype={'van_precinct_id': str, 'targets':int})
water_shapes_filtered_targets = water_shapes[['WKT','van_precinct_id','targets']]
hd_precincts_shapes = hd_precincts.merge(water_shapes_filtered_targets, on='van_precinct_id', how='left')


In [112]:
merged_gdf_water.to_csv('output/precinct_shapes_with_targets.csv')
hd_precincts_shapes.to_csv('output/house_districts_with_targets.csv')


In [113]:
merged_gdf_water.drop(columns = 'targets', inplace=True)

In [114]:
merged_gdf_water

Unnamed: 0,van_precinct_id,van_precinct_name,precinct_name_doe,GEOID,WKT
0,2257730,118 - Precinct 118,PRECINCT 118,51013000118,MULTIPOLYGON (((-77.0802759997755 38.893249999...
1,1071009,332 - Coates,COATES,51059000332,MULTIPOLYGON (((-77.4046161116707 38.956599160...
2,2257679,119 - Precinct 119,PRECINCT 119,51013000119,MULTIPOLYGON (((-77.0659740002905 38.889389000...
3,295994,132 - Monument,MONUMENT,51059000132,MULTIPOLYGON (((-77.3347390241583 38.853949940...
4,2257747,146 - Precinct 146,PRECINCT 146,51013000146,MULTIPOLYGON (((-77.1076629998263 38.881226000...
...,...,...,...,...,...
2551,296863,204 - Clark's,Clarke's,51169000204,MULTIPOLYGON (((-82.3473127749332 36.689436129...
2552,295537,201 - Davis,Davis,51021000201,MULTIPOLYGON (((-80.9126739999807 37.073348999...
2553,297004,207 - Freestone,FREESTONE,51185000207,MULTIPOLYGON (((-81.5104317540435 37.003425967...
2554,296867,304 - Copper Creek,Copper Creek,51169000304,MULTIPOLYGON (((-82.5395678333016 36.719106127...


In [115]:
trb = pd.read_csv('data/turfs_regions_base.csv',dtype={'van_precinct_id': str, 'targets':int})
trb_filtered = trb[['Region','fo_name','county','van_precinct_id','voters','doors','targets']].copy()

In [116]:
merged_gdf_water_w_turfs = merged_gdf_water.merge(trb_filtered,on='van_precinct_id', how='left')

In [117]:
from shapely.geometry import Point

# Read CSVs as regular DataFrames first
ooc_sls_df = pd.read_csv('data/ooc_sls.csv')
regular_sls_df = pd.read_csv('data/regular_sls.csv')

# Convert to GeoDataFrames using lat/lon columns
ooc_sls_gdf = gpd.GeoDataFrame(
    ooc_sls_df, 
    geometry=gpd.points_from_xy(ooc_sls_df.longitude, ooc_sls_df.latitude),
    crs='EPSG:4326'  # WGS84 coordinate system
)

regular_sls_gdf = gpd.GeoDataFrame(
    regular_sls_df,
    geometry=gpd.points_from_xy(regular_sls_df.longitude, regular_sls_df.latitude),
    crs='EPSG:4326'
)

In [118]:
merged_gdf_water_w_turfs.head(5)

Unnamed: 0,van_precinct_id,van_precinct_name,precinct_name_doe,GEOID,WKT,Region,fo_name,county,voters,doors,targets
0,2257730,118 - Precinct 118,PRECINCT 118,51013000118,MULTIPOLYGON (((-77.0802759997755 38.893249999...,R01,R01B - Arlington,ARLINGTON,6788,6336,2165
1,1071009,332 - Coates,COATES,51059000332,MULTIPOLYGON (((-77.4046161116707 38.956599160...,R02,R02F - Herndon,FAIRFAX,7112,6205,2092
2,2257679,119 - Precinct 119,PRECINCT 119,51013000119,MULTIPOLYGON (((-77.0659740002905 38.889389000...,R01,R01B - Arlington,ARLINGTON,5688,5244,1818
3,295994,132 - Monument,MONUMENT,51059000132,MULTIPOLYGON (((-77.3347390241583 38.853949940...,R02,R02D - Fair Oaks,FAIRFAX,5713,4674,1616
4,2257747,146 - Precinct 146,PRECINCT 146,51013000146,MULTIPOLYGON (((-77.1076629998263 38.881226000...,R01,R01C - Ballston,ARLINGTON,4753,4501,1595


In [119]:
from shapely import wkt

# Convert WKT string to geometry and create GeoDataFrame
merged_gdf_water_w_turfs['geometry'] = merged_gdf_water_w_turfs['WKT'].apply(wkt.loads)
merged_gdf_water_w_turfs = gpd.GeoDataFrame(merged_gdf_water_w_turfs, geometry='geometry', crs='EPSG:4326')

# Verify it worked
print(f"Type: {type(merged_gdf_water_w_turfs)}")
print(f"Geometry column: {merged_gdf_water_w_turfs.geometry.name}")

# Now the spatial join should work
ooc_sls_with_precincts = gpd.sjoin(
    ooc_sls_gdf, 
    merged_gdf_water_w_turfs[['van_precinct_name', 'van_precinct_id', 'Region', 'fo_name', 'county', 'geometry']], 
    how='left', 
    predicate='within'
)

regular_sls_with_precincts = gpd.sjoin(
    regular_sls_gdf,
    merged_gdf_water_w_turfs[['van_precinct_name', 'van_precinct_id', 'Region', 'fo_name', 'county', 'geometry']], 
    how='left', 
    predicate='within'
)

Type: <class 'geopandas.geodataframe.GeoDataFrame'>
Geometry column: geometry


In [120]:
# First, let's see what columns you have
print("Columns in merged_gdf_water_w_turfs:")
print(merged_gdf_water_w_turfs.columns.tolist())

# Check if it's actually a GeoDataFrame
print(f"\nType: {type(merged_gdf_water_w_turfs)}")

Columns in merged_gdf_water_w_turfs:
['van_precinct_id', 'van_precinct_name', 'precinct_name_doe', 'GEOID', 'WKT', 'Region', 'fo_name', 'county', 'voters', 'doors', 'targets', 'geometry']

Type: <class 'geopandas.geodataframe.GeoDataFrame'>


In [121]:
ooc_sls_with_precincts

Unnamed: 0,proposed_regionturf,location,latitude,longitude,geometry,index_right,van_precinct_name,van_precinct_id,Region,fo_name,county
0,R01,Inner NOVA,38.853815,-77.094557,POINT (-77.09456 38.85381),230.0,101 - Precinct 101,2257734.0,R01,R01D - Barcroft,ARLINGTON
1,R02,Fairfax,38.884599,-77.298989,POINT (-77.29899 38.8846),798.0,727 - Oakton,295963.0,R02,R02I - Vienna,FAIRFAX
2,R03,Loudoun,38.942316,-77.534865,POINT (-77.53487 38.94232),21.0,119 - Arcola,1594345.0,R03,R03G - South Riding,LOUDOUN
3,R04,Prince Williams,38.678744,-77.284304,POINT (-77.2843 38.67874),589.0,506 - Old Bridge,296717.0,R04,R04F - Lake Ridge,PRINCE WILLIAM
4,R05,Fredericksburg,38.218773,-77.557366,POINT (-77.55737 38.21877),239.0,103 - John J Wright,296922.0,R05,R05C - HD 66,SPOTSYLVANIA
5,R06/7,Richmond,37.530007,-77.492871,POINT (-77.49287 37.53001),409.0,402 - Four Hundred Two,297504.0,R07,R07G - South Garden,RICHMOND CITY
6,R08,Newport News,37.088709,-76.456978,POINT (-76.45698 37.08871),226.0,319 - Saunders,297366.0,R08,R08F - Port Warwick,NEWPORT NEWS CITY
7,R10,Virginia Beach,36.823541,-76.099772,POINT (-76.09977 36.82354),312.0,036 - Precinct 36,297662.0,R10,R10C - HD 97,VIRGINIA BEACH CITY
8,R10,Norfolk,36.889232,-76.256957,POINT (-76.25696 36.88923),,,,,,
9,R11,Williamsburg,37.296105,-76.748537,POINT (-76.74854 37.2961),664.0,202 - Jamestown B,296339.0,R11,R11C - HD 71,JAMES CITY


In [122]:
pd.set_option('display.max_rows', 100)
regular_sls_with_precincts


Unnamed: 0,proposed_regionturf,Location,latitude,longitude,geometry,index_right,van_precinct_name,van_precinct_id,Region,fo_name,county
0,R01A,East Alexandria,38.809613,-77.056369,POINT (-77.05637 38.80961),246,104 - Durant Center,297095,R01,R01A - Alexandria,ALEXANDRIA CITY
1,R01B,Arlington,38.875044,-77.09819,POINT (-77.09819 38.87504),211,102 - Precinct 102,2257719,R01,R01E - Clarendon,ARLINGTON
2,R01f,Crystal City,38.844909,-77.062406,POINT (-77.06241 38.84491),932,103 - Precinct 103,2257680,R01,R01F - Crystal City,ARLINGTON
3,R01I,West End Alexandria,38.81281,-77.12425,POINT (-77.12425 38.81281),41,304 - Tucker School,297112,R01,R01I - Westend Alexandria,ALEXANDRIA CITY
3,R01I,West End Alexandria,38.81281,-77.12425,POINT (-77.12425 38.81281),134,303 - Charles E Beatley Library,297111,R01,R01I - Westend Alexandria,ALEXANDRIA CITY
4,R02E,Falls Church,38.873425,-77.174965,POINT (-77.17497 38.87343),426,535 - Fort Buffalo,295945,R02,R02E - Falls Church,FAIRFAX
5,R02A,Annandale,38.830808,-77.211169,POINT (-77.21117 38.83081),50,106 - Heritage,295794,R02,R02A - Annandale,FAIRFAX
6,R02H,Reston,38.954164,-77.349125,POINT (-77.34912 38.95416),12,208 - Reston #1,295818,R02,R02H - Reston,FAIRFAX
7,R02I,Vienna,38.906023,-77.26097,POINT (-77.26097 38.90602),428,214 - Vienna #2,295821,R02,R02I - Vienna,FAIRFAX
8,R02B,Centerville,38.840229,-77.443611,POINT (-77.44361 38.84023),427,924 - Spindle,296013,R02,R02B - Centreville,FAIRFAX


In [123]:
# Update the latitude and longitude for row index 8 (the Norfolk row with NaN values)
ooc_sls_with_precincts.loc[8, 'latitude'] = 36.88711985322262
ooc_sls_with_precincts.loc[8, 'longitude'] = -76.26026094766804

# Update the geometry to match the new coordinates
ooc_sls_with_precincts.loc[8, 'geometry'] = Point(-76.26026094766804, 36.88711985322262)

In [124]:
# First, drop the index_right column to avoid conflicts
temp_point = ooc_sls_with_precincts.loc[[8]].drop(columns=['index_right', 'van_precinct_name', 'van_precinct_id', 'Region', 'fo_name', 'county'])

# Make it a proper GeoDataFrame
temp_point = gpd.GeoDataFrame(temp_point, crs='EPSG:4326')

# Do spatial join for just this point
updated_row = gpd.sjoin(
    temp_point, 
    merged_gdf_water_w_turfs[['van_precinct_name', 'van_precinct_id', 'Region', 'fo_name', 'county', 'geometry']], 
    how='left', 
    predicate='within'
)

# Update the original dataframe with the new precinct info
if not updated_row.empty and not pd.isna(updated_row.iloc[0]['van_precinct_id']):
    ooc_sls_with_precincts.loc[8, 'van_precinct_name'] = updated_row.iloc[0]['van_precinct_name']
    ooc_sls_with_precincts.loc[8, 'van_precinct_id'] = updated_row.iloc[0]['van_precinct_id']
    ooc_sls_with_precincts.loc[8, 'Region'] = updated_row.iloc[0]['Region']
    ooc_sls_with_precincts.loc[8, 'fo_name'] = updated_row.iloc[0]['fo_name']
    ooc_sls_with_precincts.loc[8, 'county'] = updated_row.iloc[0]['county']
    ooc_sls_with_precincts.loc[8, 'index_right'] = updated_row.iloc[0]['index_right']
    print("Successfully updated row 8!")
else:
    print("No precinct found for the new coordinates - might still be in water/outside boundaries")

# Check the result
print(ooc_sls_with_precincts.loc[8])

Successfully updated row 8!
proposed_regionturf                                             R10
location                                                    Norfolk
latitude                                                   36.88712
longitude                                                -76.260261
geometry               POINT (-76.26026094766804 36.88711985322262)
index_right                                                   396.0
van_precinct_name                                     218 - Willard
van_precinct_id                                              297393
Region                                                          R10
fo_name                                         R10D - West Norfolk
county                                                 NORFOLK CITY
Name: 8, dtype: object


In [125]:
ooc_sls_with_precincts

Unnamed: 0,proposed_regionturf,location,latitude,longitude,geometry,index_right,van_precinct_name,van_precinct_id,Region,fo_name,county
0,R01,Inner NOVA,38.853815,-77.094557,POINT (-77.09456 38.85381),230.0,101 - Precinct 101,2257734,R01,R01D - Barcroft,ARLINGTON
1,R02,Fairfax,38.884599,-77.298989,POINT (-77.29899 38.8846),798.0,727 - Oakton,295963,R02,R02I - Vienna,FAIRFAX
2,R03,Loudoun,38.942316,-77.534865,POINT (-77.53487 38.94232),21.0,119 - Arcola,1594345,R03,R03G - South Riding,LOUDOUN
3,R04,Prince Williams,38.678744,-77.284304,POINT (-77.2843 38.67874),589.0,506 - Old Bridge,296717,R04,R04F - Lake Ridge,PRINCE WILLIAM
4,R05,Fredericksburg,38.218773,-77.557366,POINT (-77.55737 38.21877),239.0,103 - John J Wright,296922,R05,R05C - HD 66,SPOTSYLVANIA
5,R06/7,Richmond,37.530007,-77.492871,POINT (-77.49287 37.53001),409.0,402 - Four Hundred Two,297504,R07,R07G - South Garden,RICHMOND CITY
6,R08,Newport News,37.088709,-76.456978,POINT (-76.45698 37.08871),226.0,319 - Saunders,297366,R08,R08F - Port Warwick,NEWPORT NEWS CITY
7,R10,Virginia Beach,36.823541,-76.099772,POINT (-76.09977 36.82354),312.0,036 - Precinct 36,297662,R10,R10C - HD 97,VIRGINIA BEACH CITY
8,R10,Norfolk,36.88712,-76.260261,POINT (-76.26026 36.88712),396.0,218 - Willard,297393,R10,R10D - West Norfolk,NORFOLK CITY
9,R11,Williamsburg,37.296105,-76.748537,POINT (-76.74854 37.2961),664.0,202 - Jamestown B,296339,R11,R11C - HD 71,JAMES CITY


In [99]:
# First, let's see what columns you currently have
print("Current columns in merged_gdf_water_w_turfs:")
print(merged_gdf_water_w_turfs.columns.tolist())

Current columns in merged_gdf_water_w_turfs:
['van_precinct_id', 'van_precinct_name', 'precinct_name_doe', 'GEOID', 'WKT', 'Region', 'fo_name', 'county', 'voters', 'doors', 'targets', 'geometry']


In [126]:
# Fix column names and add missing columns (keep all existing columns)
metrics_df = merged_gdf_water_w_turfs.copy()

# Rename columns to match Streamlit app expectations (without dropping originals)
metrics_df['Current Region'] = metrics_df['Region']
metrics_df['county_name'] = metrics_df['county']
metrics_df['Current Turf'] = metrics_df['fo_name']
metrics_df['supporters'] = metrics_df['targets']  # Use targets instead of supporters

# Add bounding box columns
def get_bounds(geom):
    bounds = geom.bounds
    return pd.Series({
        'min_lon': bounds[0],
        'min_lat': bounds[1], 
        'max_lon': bounds[2],
        'max_lat': bounds[3]
    })

metrics_df[['min_lon', 'min_lat', 'max_lon', 'max_lat']] = metrics_df['geometry'].apply(get_bounds)
metrics_df['van_precinct_id'] = metrics_df['van_precinct_id'].astype(str)

# Save CSV (drop only geometry and WKT)
metrics_df.drop(columns=['geometry', 'WKT']).to_csv('output/precincts_metrics.csv', index=False)

# Save GeoJSON with lighter simplification
simplified_gdf = metrics_df.copy()
simplified_gdf['geometry'] = simplified_gdf['geometry'].simplify(0.0001)  # Lighter simplification
simplified_gdf.to_file('output/precincts_simplified.geojson', driver='GeoJSON')

print("Files created successfully!")
print(f"Metrics CSV shape: {metrics_df.shape}")
print("✅ Ready for Streamlit app with targets as supporters!")

Files created successfully!
Metrics CSV shape: (2576, 20)
✅ Ready for Streamlit app with targets as supporters!


In [127]:
# Save OOC points as CSV
ooc_csv_columns = [col for col in ooc_sls_with_precincts.columns if col != 'geometry']
ooc_export = ooc_sls_with_precincts[ooc_csv_columns].copy()

# Add lat/lon columns for easy use
ooc_export['latitude'] = ooc_sls_with_precincts.geometry.y
ooc_export['longitude'] = ooc_sls_with_precincts.geometry.x

# Clean up the spatial join artifacts
if 'index_right' in ooc_export.columns:
    ooc_export = ooc_export.drop(columns=['index_right'])

ooc_export.to_csv('output/ooc_sls_points.csv', index=False)

# Save regular SLS points as CSV
regular_csv_columns = [col for col in regular_sls_with_precincts.columns if col != 'geometry']
regular_export = regular_sls_with_precincts[regular_csv_columns].copy()

# Add lat/lon columns
regular_export['latitude'] = regular_sls_with_precincts.geometry.y  
regular_export['longitude'] = regular_sls_with_precincts.geometry.x

# Clean up the spatial join artifacts
if 'index_right' in regular_export.columns:
    regular_export = regular_export.drop(columns=['index_right'])

regular_export.to_csv('output/regular_sls_points.csv', index=False)

print("Point data saved successfully!")
print(f"OOC points: {len(ooc_export)} rows")
print(f"Regular SLS points: {len(regular_export)} rows")
print("Columns in OOC:", ooc_export.columns.tolist())

Point data saved successfully!
OOC points: 13 rows
Regular SLS points: 70 rows
Columns in OOC: ['proposed_regionturf', 'location', 'latitude', 'longitude', 'van_precinct_name', 'van_precinct_id', 'Region', 'fo_name', 'county']
