## 2024 Precinct Map and Data Cleaning
Baltimore City, Baltimore County, Anne Arundel, and Howard County

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np

## Join on Name

In [69]:
coverage = pd.read_csv('data/coverage_area_results_2024.csv')
shapefile = gpd.read_file('assets/precincts/precinct_simplified.geojson')

In [72]:
shapefile.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2141 entries, 0 to 2140
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    2141 non-null   int32   
 1   COUNTY      2141 non-null   object  
 2   GIS_SRC     2141 non-null   object  
 3   JURSCODE    2141 non-null   object  
 4   COUNTYNAME  2141 non-null   object  
 5   NAME        2141 non-null   object  
 6   VTD         2122 non-null   object  
 7   LABEL       2140 non-null   object  
 8   DATE_AGGRE  2141 non-null   object  
 9   AGG_SRC     2141 non-null   object  
 10  SRC_DATE    2141 non-null   object  
 11  Shape_Leng  2141 non-null   float64 
 12  Shape_Area  2141 non-null   float64 
 13  geometry    2141 non-null   geometry
dtypes: float64(2), geometry(1), int32(1), object(10)
memory usage: 225.9+ KB


In [73]:
coverage.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1942 entries, 0 to 1941
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   name        1942 non-null   object 
 1   prec_name   1942 non-null   int64  
 2   sum_trump   1942 non-null   int64  
 3   pct_trump   1940 non-null   float64
 4   sum_harris  1942 non-null   int64  
 5   pct_harris  1940 non-null   float64
 6   sum_other   1942 non-null   int64  
 7   pct_other   1940 non-null   float64
 8   prec_sum    1942 non-null   int64  
 9   leader      1942 non-null   object 
 10  number      1942 non-null   int64  
 11  vtd_join    1942 non-null   int64  
 12  county      1942 non-null   int64  
 13  countyname  1942 non-null   object 
 14  label       1942 non-null   object 
 15  new_geo     1942 non-null   object 
dtypes: float64(3), int64(8), object(5)
memory usage: 242.9+ KB


In [26]:
# drop columns in coverage
# test = coverage
# test.drop(columns=['prec_name', 'number', 'vtd_join', 'county', 'countyname', 'label', 'new_geo'], inplace=True)
test.head()

Unnamed: 0,name,sum_trump,pct_trump,sum_harris,pct_harris,sum_other,pct_other,prec_sum,leader
0,Allegany Precinct 01-000,420,84.34,72,14.46,6,1.2,498,Trump
1,Allegany Precinct 02-000,463,83.27,82,14.75,11,1.98,556,Trump
2,Allegany Precinct 03-000,478,84.01,83,14.59,8,1.41,569,Trump
3,Allegany Precinct 06-005,980,50.13,923,47.21,52,2.66,1955,Trump
4,Allegany Precinct 06-006,664,68.74,282,29.19,20,2.07,966,Trump


In [28]:
# concatenate the two dataframes
merged = pd.merge(test, shapefile, left_on='name', right_on='NAME', how='left')  


In [32]:
merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1949 entries, 0 to 1948
Data columns (total 23 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   name        1949 non-null   object  
 1   sum_trump   1949 non-null   int64   
 2   pct_trump   1947 non-null   float64 
 3   sum_harris  1949 non-null   int64   
 4   pct_harris  1947 non-null   float64 
 5   sum_other   1949 non-null   int64   
 6   pct_other   1947 non-null   float64 
 7   prec_sum    1949 non-null   int64   
 8   leader      1949 non-null   object  
 9   OBJECTID    1948 non-null   float64 
 10  COUNTY      1948 non-null   object  
 11  GIS_SRC     1948 non-null   object  
 12  JURSCODE    1948 non-null   object  
 13  COUNTYNAME  1948 non-null   object  
 14  NAME        1948 non-null   object  
 15  VTD         1935 non-null   object  
 16  LABEL       1948 non-null   object  
 17  DATE_AGGRE  1948 non-null   object  
 18  AGG_SRC     1948 non-null   object  
 19  SRC_DA

In [33]:
merged.drop(columns=['NAME'], inplace=True)

In [36]:
# save merged as a geojson

gdf = gpd.GeoDataFrame(merged, geometry='geometry')
gdf.to_file('data/coverage_area_results_2024.geojson', driver='GeoJSON')

In [70]:
coverage.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1942 entries, 0 to 1941
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   name        1942 non-null   object 
 1   prec_name   1942 non-null   int64  
 2   sum_trump   1942 non-null   int64  
 3   pct_trump   1940 non-null   float64
 4   sum_harris  1942 non-null   int64  
 5   pct_harris  1940 non-null   float64
 6   sum_other   1942 non-null   int64  
 7   pct_other   1940 non-null   float64
 8   prec_sum    1942 non-null   int64  
 9   leader      1942 non-null   object 
 10  number      1942 non-null   int64  
 11  vtd_join    1942 non-null   int64  
 12  county      1942 non-null   int64  
 13  countyname  1942 non-null   object 
 14  label       1942 non-null   object 
 15  new_geo     1942 non-null   object 
dtypes: float64(3), int64(8), object(5)
memory usage: 242.9+ KB


In [71]:
coverage['leader'].value_counts()

leader
Harris    1396
Trump      546
Name: count, dtype: int64

In [52]:
coverage['sum_trump'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 1942 entries, 0 to 1941
Series name: sum_trump
Non-Null Count  Dtype
--------------  -----
1942 non-null   int64
dtypes: int64(1)
memory usage: 15.3 KB


In [61]:
df_2020 = pd.read_csv('data/coverage_area_results_2020.csv')

In [63]:
df_2020.head()

Unnamed: 0,VTD,total_votes,biden_votes,pct_biden,trump_votes,pct_trump
0,2400301-001,1672,737,0.440789,893,0.534091
1,2400301-002,1418,655,0.461918,717,0.505642
2,2400301-003,1453,590,0.406056,822,0.565726
3,2400301-004,1539,911,0.591943,595,0.386615
4,2400301-005,1301,864,0.664105,401,0.308224


In [64]:
# create new column for the 2020 data showing if biden or trump won

df_2020['leader'] = np.where(df_2020['pct_biden'] > df_2020['pct_trump'], 'biden', 'trump')

In [66]:
df_2020['leader'].value_counts()

leader
biden    590
trump    140
Name: count, dtype: int64

In [67]:
# Around 500 precincts in the state of Maryland voted saw former President Donald Trump leading in the 2024 elections compared to the 140 precincts that Trump led in 2020. While Maryland remains a blue state, with Vice President Kamala Harris leading in nearly 1,400 precincts, the number of precincts that saw Trump improve his lead threefold in 2024. 
