# Elections

### Election data included in the old version of KYD:
- 2022 Gubernatorial Election Results: 
    - Columns: 'ed', 'cd','hochul', 'zeldin', 'gov_votes', 'voters_nov22'
    - Note: City Council District 2023 merged with election districts 2022 using largest intersection logic
- 2022 Gubernatorial Election Results in geojson format: 
    - Columns: 'ed', 'cd', 'dem', 'rep', 'con', 'wor', 'oth', 'blank', 'total'
    - Note: City Council District 2023 merged with election districts 2022 using largest intersection logic
- 2021 Mayoral Election Results:
    - Columns: 'ed', 'cd','adams', 'sliwa', 'mayor_votes'
    - Note: City Council District 2023 merged with election districts 2021 using largest intersection logic
- 2021 primary round 1 and mayoral election results in GeoJSON format
    - Columns: 'ed', 'cd', 'adams', 'sliwa', 'mayor_votes', 'geometry', 'prim_adams', 'prim_wiley', 'prim_garcia', 'prim_yang', 'prim_stringer','prim_morales', 'prim_mcguire', 'prim_donovan', 'prim_votes'
    - Note: City Council District 2023 merged with election districts 2021 using largest intersection logic.
### What we need for the new version:
- DONE: 2024 Presidential General Election Results in GeoJSON format
- DONE: 2021 Mayoral Election Results
- DONE: 2021 primary round 1 and mayoral election results in GeoJSON format
- DONE: 2024 Presidential General Election Results
- DONE: 2024 voter turnout for general election 

In [2]:
## import libraries

import pandas as pd
import geopandas as gpd
import csv

In [3]:
## read in the csv file that includes the presidential election data

prez_election_results = pd.read_excel("../input/elections/presidential_2024.xlsx",
                                      sheet_name = "presidential_2024",
                                      dtype ={"ed": object})
prez_election_results.info()

FileNotFoundError: [Errno 2] No such file or directory: '../input/elections/presidential_2024.xlsx'

In [3]:
## read in the voter enrollment files

richmond = pd.read_excel("../input/voter_enrollment/richmonded_nov24.xlsx", dtype ={"ed": object})
bronx = pd.read_excel("../input/voter_enrollment/bronxed_nov24.xlsx", dtype ={"ed": object})
kings = pd.read_excel("../input/voter_enrollment/kingsed_nov24.xlsx", dtype ={"ed": object})
ny = pd.read_excel("../input/voter_enrollment/new-yorked_nov24.xlsx", dtype ={"ed": object})
queens = pd.read_excel("../input/voter_enrollment/queensed_nov24.xlsx", dtype ={"ed": object})

In [4]:
## put the enrollment files into a list
all_enrolled_voters = [richmond, bronx, kings, ny, queens]

In [5]:
## combine the excel files into a single df
enrollment = pd.concat(all_enrolled_voters, ignore_index = True)
enrollment.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12373 entries, 0 to 12372
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   county  12373 non-null  object
 1   ed      12373 non-null  object
 2   status  12373 non-null  object
 3   dem     12373 non-null  int64 
 4   rep     12373 non-null  int64 
 5   con     12373 non-null  int64 
 6   wor     12373 non-null  int64 
 7   oth     12373 non-null  int64 
 8   blank   12373 non-null  int64 
 9   total   12373 non-null  int64 
dtypes: int64(7), object(3)
memory usage: 966.8+ KB


In [6]:
## filter for only active voter counts

active_voters = enrollment[enrollment["status"] == "Active"]
active_voters.head()

Unnamed: 0,county,ed,status,dem,rep,con,wor,oth,blank,total
0,Richmond,61001,Active,908,141,17,7,26,389,1488
3,Richmond,61002,Active,851,94,2,7,27,379,1360
6,Richmond,61003,Active,1248,71,6,8,31,338,1702
9,Richmond,61004,Active,1303,109,12,10,35,397,1866
12,Richmond,61005,Active,582,248,11,2,32,332,1207


In [7]:
## combine active_voters with results

prez_results_w_turnout = pd.merge(prez_election_results,
                                  active_voters[['ed','total']],
                                  on='ed',
                                  how='left')

In [8]:
## take a peak
prez_results_w_turnout

Unnamed: 0,ed,county,ab_military,affidavit,trump_con,trump_rep,fed,harris_dem,harris_wor,manually_counted_emergency,public_counter,scattered,Grand Total,harris_tot,trump_tot,prez_votes,total
0,23001,Queens,158,18,149.0,862.0,2,281.0,14.0,0,1149,13.0,2646,295,1011,1325,1652.0
1,23002,Queens,128,27,121.0,836.0,6,268.0,21.0,0,1105,12.0,2524,289,957,1260,1664.0
2,23003,Queens,37,8,33.0,297.0,0,93.0,6.0,0,388,3.0,865,99,330,433,568.0
3,23004,Queens,161,25,59.0,812.0,5,375.0,16.0,0,1090,12.0,2555,391,871,1276,1702.0
4,23005,Queens,116,36,60.0,876.0,5,315.0,21.0,0,1137,11.0,2577,336,936,1289,1627.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4414,87055,Bronx,55,18,13.0,173.0,0,467.0,18.0,0,614,8.0,1366,485,186,687,1370.0
4415,87056,Bronx,0,0,0.0,3.0,0,4.0,1.0,0,8,,16,5,3,8,19.0
4416,87057,Bronx,0,0,0.0,0.0,0,0.0,0.0,0,0,,0,0,0,0,
4417,87058,Bronx,0,0,,,0,,,0,0,,0,0,0,0,


In [9]:
## import crosswalk
ed_cd_crosswalk = pd.read_csv("../input/crosswalks/ed23-to-cd23-crosswalk.csv", 
                              dtype = {"ed":object,
                                      "cd": object}
                             )

In [10]:
## merge the crosswalk with the full file

prez_election_24 = pd.merge(prez_results_w_turnout,
                 ed_cd_crosswalk[['ed','cd']],
                 on = "ed",
                 how = "left")

In [11]:
## take a peak
prez_election_24.head()

Unnamed: 0,ed,county,ab_military,affidavit,trump_con,trump_rep,fed,harris_dem,harris_wor,manually_counted_emergency,public_counter,scattered,Grand Total,harris_tot,trump_tot,prez_votes,total,cd
0,23001,Queens,158,18,149.0,862.0,2,281.0,14.0,0,1149,13.0,2646,295,1011,1325,1652.0,32
1,23002,Queens,128,27,121.0,836.0,6,268.0,21.0,0,1105,12.0,2524,289,957,1260,1664.0,32
2,23003,Queens,37,8,33.0,297.0,0,93.0,6.0,0,388,3.0,865,99,330,433,568.0,32
3,23004,Queens,161,25,59.0,812.0,5,375.0,16.0,0,1090,12.0,2555,391,871,1276,1702.0,32
4,23005,Queens,116,36,60.0,876.0,5,315.0,21.0,0,1137,11.0,2577,336,936,1289,1627.0,32


In [12]:
## create new turnout column
prez_election_24["turnout"] = (prez_election_24["prez_votes"]/prez_election_24["total"])*100

In [13]:
## clean up
results_cleaned = prez_election_24.loc[:,["ed",
                                          "cd",
                                          "harris_tot",
                                          "trump_tot",
                                          "prez_votes",
                                          "total",
                                          "turnout"]]

In [14]:
## take a look at the df's info, specifically the dtypes of each column
results_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4419 entries, 0 to 4418
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   ed          4419 non-null   object 
 1   cd          4264 non-null   object 
 2   harris_tot  4419 non-null   int64  
 3   trump_tot   4419 non-null   int64  
 4   prez_votes  4419 non-null   int64  
 5   total       4131 non-null   float64
 6   turnout     4131 non-null   float64
dtypes: float64(2), int64(3), object(2)
memory usage: 241.8+ KB


In [1]:
print(results_cleaned.isnull())

NameError: name 'results_cleaned' is not defined

In [15]:
## read in 2025 council district shapefile
districts = gpd.read_file("../input/GIS/council_districts_24")

In [16]:
## change dtypes
results_cleaned["cd"] = results_cleaned["cd"].astype(str)
districts["CounDist"] = districts["CounDist"].astype(str)

In [17]:
## change the coordinate system
districts = districts.to_crs(epsg = 4326)
districts.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [18]:
## merge the shapefile with the data df
president_results = pd.merge(results_cleaned,
                            districts,
                            left_on = "cd",
                            right_on = "CounDist")

In [26]:
president_results = gpd.GeoDataFrame(president_results,
                                    geometry = president_results.geometry,
                                    crs = 4326
                                    )

In [27]:
## save as csv
president_results.to_csv("../output/elections/president_results.csv")

In [28]:
## save as geojson
president_results.to_file("../output/elections/president_results.geojson", driver = "geojson")