# Voter Enrollment

### Files included in 2023 Know Your District:
- Voter enrollments by borough as of February 2023: https://elections.ny.gov/enrollment-county?f%5B0%5D=filter_term%3A36
    - Columns: 'county', 'status', 'dem', 'rep', 'con', 'wor', 'oth', 'blank', 'total', 'pct_dem', 'pct_rep', 'pct_con', 'pct_wor', 'pct_oth', 'pct_blank'
- Total voters enrollments (active+ inactive) by election districts as of February 2023: https://elections.ny.gov/enrollment-election-district?f%5B0%5D=filter_term%3A126
    - Columns: 'ed', 'cd', 'dem', 'rep', 'con', 'wor', 'oth', 'blank', 'total'
- Total active voters enrollments by election districts as of Feb 2023 in geojson format
    - Columns: 'ed', 'cd', 'dem', 'rep', 'con', 'wor', 'oth', 'blank', 'total', 'geometry'
### Files to include in 2025 Know Your District:
- DONE: Voter enrollments by borough as of February 2025: https://elections.ny.gov/enrollment-county?f%5B0%5D=filter_term%3A596
- DONE: Total voters enrollments (active+ inactive) by election districts as of February 2025: https://elections.ny.gov/enrollment-election-district?q=/enrollment-election-district%3Fq%3D/enrollment-election-district%3Ff%5B0%5D%3Dfilter_term%3A126&f%5B0%5D=filter_term%3A601
- DONE: Total active voters enrollments by election districts as of Feb 2023 in geojson format


### Voter enrollments by borough as of February 2025:

In [1]:
# import libraries

import pandas as pd
import csv
import json
import geopandas as gpd

In [2]:
## set display settings

pd.options.display.max_rows = 500

In [3]:
## start with borough enrollment, pull in excel file

boro_enrollment = pd.read_excel("../input/voter_enrollment/voters_boro_feb25.xlsx")

In [4]:
## take a peak at the data

boro_enrollment.head()

Unnamed: 0,county,status,dem,rep,con,wor,oth,blank,total
0,Richmond,Active,119569,101396,4111,1136,7522,78998,312732
1,Richmond,InActive,7883,6006,228,64,575,4485,19241
2,Richmond,Total,127452,107402,4339,1200,8097,83483,331973
3,Bronx,Active,509008,53768,3352,3375,10232,141854,721589
4,Bronx,InActive,40166,3062,228,255,967,10075,54753


In [5]:
## find the percentage of voters in each borough that are enrolled under a particular party
## assign and create new columns to store the data

boro_enrollment["pct_dem"] = (boro_enrollment["dem"]/boro_enrollment["total"])*100
boro_enrollment["pct_rep"] = (boro_enrollment["rep"]/boro_enrollment["total"])*100
boro_enrollment["pct_con"] = (boro_enrollment["con"]/boro_enrollment["total"])*100
boro_enrollment["pct_wor"] = (boro_enrollment["wor"]/boro_enrollment["total"])*100
boro_enrollment["pct_oth"] = (boro_enrollment["oth"]/boro_enrollment["total"])*100
boro_enrollment["pct_blank"] = (boro_enrollment["blank"]/boro_enrollment["total"])*100

In [6]:
## take a peak

boro_enrollment.head(10)

Unnamed: 0,county,status,dem,rep,con,wor,oth,blank,total,pct_dem,pct_rep,pct_con,pct_wor,pct_oth,pct_blank
0,Richmond,Active,119569,101396,4111,1136,7522,78998,312732,38.233695,32.422649,1.314544,0.36325,2.405254,25.260607
1,Richmond,InActive,7883,6006,228,64,575,4485,19241,40.969804,31.214594,1.18497,0.332623,2.98841,23.309599
2,Richmond,Total,127452,107402,4339,1200,8097,83483,331973,38.392279,32.352631,1.307034,0.361475,2.439054,25.147527
3,Bronx,Active,509008,53768,3352,3375,10232,141854,721589,70.539878,7.451333,0.46453,0.467718,1.417982,19.658559
4,Bronx,InActive,40166,3062,228,255,967,10075,54753,73.358537,5.592388,0.416416,0.465728,1.766113,18.400818
5,Bronx,Total,549174,56830,3580,3630,11199,151929,776342,70.738669,7.320227,0.461137,0.467577,1.442534,19.569855
6,Kings,Active,1014572,140687,4722,7687,20234,294585,1482487,68.43716,9.489931,0.318519,0.518521,1.364869,19.871001
7,Kings,InActive,83138,8867,318,543,2049,21342,116257,71.512253,7.627068,0.273532,0.467069,1.762475,18.357604
8,Kings,Total,1097710,149554,5040,8230,22283,315927,1598744,68.660774,9.354468,0.315247,0.514779,1.393782,19.76095
9,New York,Active,691185,74910,2019,3080,14339,200382,985915,70.105942,7.598018,0.204784,0.3124,1.454385,20.32447


In [7]:
## save as a csv file in the output folder for this project

boro_enrollment.to_csv("../output/voters/boro_enrollment_25.csv")

### Total voters enrollments (active+ inactive) by election districts as of February 2025: 

In [8]:
## now work on the election district level data, pull in the excel sheets

richmond = pd.read_excel("../input/voter_enrollment/richmonded_feb25.xlsx")
bronx = pd.read_excel("../input/voter_enrollment/bronxed_feb25.xlsx")
kings = pd.read_excel("../input/voter_enrollment/kingsed_feb25.xlsx")
ny = pd.read_excel("../input/voter_enrollment/new-yorked_feb25.xlsx")
queens = pd.read_excel("../input/voter_enrollment/queensed_feb25.xlsx")

In [9]:
## add those variables to a list, name it boros

boros = [richmond, bronx, kings, ny, queens]

In [10]:
## concat them together

combined_boros = pd.concat(boros, ignore_index = True)
combined_boros.head(500)

Unnamed: 0,county,ed,status,dem,rep,con,wor,oth,blank,total
0,Richmond,61001,Active,971,146,20,10,27,420,1594
1,Richmond,61001,Inactive,47,10,2,0,3,27,89
2,Richmond,61001,Total,1018,156,22,10,30,447,1683
3,Richmond,61002,Active,930,116,2,7,29,419,1503
4,Richmond,61002,Inactive,74,2,0,1,4,27,108
5,Richmond,61002,Total,1004,118,2,8,33,446,1611
6,Richmond,61003,Active,1252,78,5,8,31,343,1717
7,Richmond,61003,Inactive,49,0,2,0,0,9,60
8,Richmond,61003,Total,1301,78,7,8,31,352,1777
9,Richmond,61004,Active,1166,97,11,7,28,327,1636


In [11]:
## import crosswalk

ed_cd_crosswalk = pd.read_csv("../input/crosswalks/ed23-to-cd23-crosswalk.csv")

In [12]:
## take a peak

ed_cd_crosswalk.head()

Unnamed: 0,ed,cd
0,23001,32
1,23002,32
2,23003,32
3,23004,32
4,23005,32


In [13]:
## merge the dfs together on the election district column

merged = pd.merge(ed_cd_crosswalk,
                 combined_boros,
                 left_on = "ed",
                 right_on = "ed")

In [14]:
## filter for all voters

voters_feb25 = merged[merged["status"] == "Total"]
voters_feb25

Unnamed: 0,ed,cd,county,status,dem,rep,con,wor,oth,blank,total
2,23001,32,Queens,Total,539,736,68,2,61,302,1708
5,23002,32,Queens,Total,592,689,68,3,50,315,1717
8,23003,32,Queens,Total,212,204,9,1,21,139,586
11,23004,32,Queens,Total,677,631,16,2,50,376,1752
14,23005,32,Queens,Total,700,624,23,1,41,313,1702
...,...,...,...,...,...,...,...,...,...,...,...
12111,84030,8,Bronx,Total,1220,93,8,5,23,287,1636
12114,62058,50,Richmond,Total,216,283,14,0,23,181,717
12117,62059,50,Richmond,Total,366,798,26,4,36,319,1549
12120,61019,49,Richmond,Total,698,134,8,8,31,202,1081


In [15]:
## save this csv in the output folder

voters_feb25.to_csv("../output/voters/voters_feb25.csv")

### Total active voters enrollments by election districts as of Feb 2025 in geojson format:

In [16]:
## now move onto working with only active voter data

active_voters = merged[merged["status"] == "Active"]
active_voters

Unnamed: 0,ed,cd,county,status,dem,rep,con,wor,oth,blank,total
0,23001,32,Queens,Active,518,714,68,1,58,296,1655
3,23002,32,Queens,Active,575,665,65,3,49,302,1659
6,23003,32,Queens,Active,204,201,8,1,20,133,567
9,23004,32,Queens,Active,647,616,15,2,47,361,1688
12,23005,32,Queens,Active,655,597,22,1,39,303,1617
...,...,...,...,...,...,...,...,...,...,...,...
12109,84030,8,Bronx,Active,1117,86,8,5,21,272,1509
12112,62058,50,Richmond,Active,206,270,14,0,23,175,688
12115,62059,50,Richmond,Active,347,772,24,4,31,306,1484
12118,61019,49,Richmond,Active,666,127,6,8,27,185,1019


In [17]:
## now bring in 2025 city council districts shapefile 

districts = gpd.read_file("../input/GIS/council_districts_25")
districts.head()

Unnamed: 0,CounDist,Shape_Leng,Shape_Area,geometry
0,3,81955.340126,76316710.0,"POLYGON ((985271.189 219735.272, 985562.304 21..."
1,38,177740.219692,151736800.0,"POLYGON ((986291.305 173075.384, 986092.063 17..."
2,43,58169.109318,75477510.0,"POLYGON ((984119.708 171475.183, 984303.448 17..."
3,46,254186.106833,277477700.0,"MULTIPOLYGON (((1006493.46 157737.21, 1006476...."
4,25,47289.029395,63861370.0,"POLYGON ((1015112.91 217226.088, 1015205.898 2..."


In [18]:
## change the coordinate system

districts = districts.to_crs(epsg = 4326)
districts.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [19]:
active_voters_feb25 = pd.merge(active_voters,
                              districts,
                              left_on = "cd",
                              right_on = "CounDist")

In [20]:
active_voters_feb25.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4049 entries, 0 to 4048
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   ed          4049 non-null   int64   
 1   cd          4049 non-null   int64   
 2   county      4049 non-null   object  
 3   status      4049 non-null   object  
 4   dem         4049 non-null   int64   
 5   rep         4049 non-null   int64   
 6   con         4049 non-null   int64   
 7   wor         4049 non-null   int64   
 8   oth         4049 non-null   int64   
 9   blank       4049 non-null   int64   
 10  total       4049 non-null   int64   
 11  CounDist    4049 non-null   int32   
 12  Shape_Leng  4049 non-null   float64 
 13  Shape_Area  4049 non-null   float64 
 14  geometry    4049 non-null   geometry
dtypes: float64(2), geometry(1), int32(1), int64(9), object(2)
memory usage: 458.8+ KB


In [21]:
active_voters_feb25 = gpd.GeoDataFrame(active_voters_feb25,
                                       geometry = active_voters_feb25.geometry,
                                       crs = 4326
                                      )

In [22]:
active_voters_feb25.head()

Unnamed: 0,ed,cd,county,status,dem,rep,con,wor,oth,blank,total,CounDist,Shape_Leng,Shape_Area,geometry
0,23001,32,Queens,Active,518,714,68,1,58,296,1655,32,342444.805887,358669000.0,"MULTIPOLYGON (((-73.82645 40.59053, -73.82642 ..."
1,23002,32,Queens,Active,575,665,65,3,49,302,1659,32,342444.805887,358669000.0,"MULTIPOLYGON (((-73.82645 40.59053, -73.82642 ..."
2,23003,32,Queens,Active,204,201,8,1,20,133,567,32,342444.805887,358669000.0,"MULTIPOLYGON (((-73.82645 40.59053, -73.82642 ..."
3,23004,32,Queens,Active,647,616,15,2,47,361,1688,32,342444.805887,358669000.0,"MULTIPOLYGON (((-73.82645 40.59053, -73.82642 ..."
4,23005,32,Queens,Active,655,597,22,1,39,303,1617,32,342444.805887,358669000.0,"MULTIPOLYGON (((-73.82645 40.59053, -73.82642 ..."


In [23]:
## save as a csv
active_voters_feb25.to_csv("../output/voters/active_voters_feb25.csv")

In [25]:
## save as json
active_voters_feb25.to_file("../output/voters/active_voters_feb25.geojson", driver = "geojson")