In [1]:

#| Title           | Description                                                    |
#|-----------------|----------------------------------------------------------------|
#| Author          | Kymberly Ayodeji                                               |
#| Start Date      | 2024-05-25                                                       |
#| End Date        | 2024-05-25                                                          |
#| Datasets		   |https://gisdata.mn.gov/dataset/bdry-electionresults-2012-2020                                                              |
#
# 0: Install Libraries and Packages
# Import Packages and Libraries needed for the project
import pandas as pd
import numpy as np

# for data processing
import requests
import json

# for datetime
from datetime import datetime

# for Geospatial data
import geopandas as gpd
import contextily 

# for Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [1]:
def a_load_prep_shapefile(shape_file):
    df = gpd.read_file(shape_file)
    return df

In [4]:
election_results = "data/shp_bdry_electionresults_2012_2020.zip"
raw_data = a_load_prep_shapefile(election_results)
raw_data.head()

Unnamed: 0,VTDID,PCTNAME,PCTCODE,SHORTLABEL,MCDNAME,MCDCODE,MCDFIPS,MCDGNIS,CTU_TYPE,COUNTYNAME,...,MNLEGDFL,MNLEGWI,MNLEGTOTAL,MNCA1YES,MNCA1NO,MNCA1EST,MNCA1TOTAL,Shape_Leng,Shape_Area,geometry
0,270010005,Aitkin,5,,Aitkin,4,460,2393894,city,Aitkin,...,411,0,972,731,224,45,1000,31638.83326,6629747.0,"POLYGON ((447378.031 5154733.389, 448085.138 5..."
1,270010010,Aitkin Twp,10,,Aitkin Twp,5,478,663389,township,Aitkin,...,173,0,543,438,95,17,550,67719.452182,87809130.0,"MULTIPOLYGON (((445904.069 5151658.489, 445903..."
2,270010015,Ball Bluff Twp,15,,Ball Bluff Twp,15,3358,663498,township,Aitkin,...,79,0,158,131,32,1,164,38301.2488,91480570.0,"POLYGON ((485475.521 5207980.031, 485475.766 5..."
3,270010020,Balsam Twp,20,,Balsam Twp,20,3412,663499,township,Aitkin,...,11,0,19,15,2,2,19,38988.094778,95179410.0,"POLYGON ((495441.473 5188825.678, 495423.020 5..."
4,270010025,Beaver Twp,25,,Beaver Twp,25,4384,663536,township,Aitkin,...,23,0,34,31,6,0,37,38288.61419,91359490.0,"POLYGON ((495765.091 5140614.891, 494153.670 5..."


Datast metadata: https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_sos/bdry_electionresults_2012_2020/metadata/metadata.html

In [None]:
def b_clean_organize_data(data):
    #Extract only city of Minneapolis data
    mpls_df = raw_data[raw_data['MCDNAME'] == 'Minneapolis']
    # Only keep necessary columns
    necessary_columns = ['VTDID','PCTNAME', 'WARD', 'PCTCODE', 'REG7AM', 'SIGNATURES', 'EDR',
                         'AB_MB', 'FEDONLYAB', 'PRESONLYAB', 'TOTVOTING', 'USPRSR', 'USPRSDFL', 
                         'USPRSCP', 'USPRSLMN', 'USPRSSWP', 'USPRSGP', 'USPRSADP', 'USPRSIP', 
                         'USPRSLIB', 'USPRSWI', 'USPRSTOTAL', 'USREPR', 'USREPDFL', 'USREPWI', 
                         'USREPTOTAL', 'MNSENR', 'MNSENDFL', 'Shape_Leng','Shape_Area', 'geometry']
    mpls_df= mpls_df[necessary_columns]
    # Update ward by string W- and removing the results from wards outside of 1-13
    mpls_df['WARD'] = mpls_df['WARD'].str.replace('W-', '')
    # Drop rows with missing values
    mpls_df.dropna(inplace=True)
    # Drop rows with 0 votes
    mpls_df = mpls_df[(mpls_df['USPRSTOTAL'] != 0)]
    # group by Ward and Precint
    ward_data = data.groupby(['WARD', 'PCTNAME']).sum().reset_index()
        
    return ward_data

In [None]:
ward_df = b_clean_organize_data(raw_data)

In [29]:
pres_counts_df=ward_df.groupby('WARD')[['USPRSR', 'USPRSDFL', 'USPRSTOTAL']].sum()
type(pres_counts_df)

pandas.core.frame.DataFrame

In [33]:
pres_counts_df['NON_BINARY'] = pres_counts_df['USPRSTOTAL'] - pres_counts_df['USPRSR'] - pres_counts_df['USPRSDFL']
pres_counts_df['NON_BINARY_PCT'] = np.round(100*pres_counts_df['NON_BINARY'] / pres_counts_df['USPRSTOTAL'],2)
pres_counts_df['REP_PCT'] = pres_counts_df['USPRSR'] / pres_counts_df['USPRSTOTAL']
pres_counts_df['DFL_PCT'] = np.round(100*pres_counts_df['USPRSDFL'] / pres_counts_df['USPRSTOTAL'],2)
pres_counts_df['COLOR'] = np.where(pres_counts_df['REP_PCT'] > pres_counts_df['DFL_PCT'], 'red', 'blue')
pres_counts_df

Unnamed: 0_level_0,USPRSR,USPRSDFL,USPRSTOTAL,NON_BINARY,REP_PCT,DFL_PCT,COLOR,NON_BINARY_PCT
WARD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
W-01,2598,13189,17551,1764,0.148026,75.15,blue,10.05
W-02,1740,12701,16068,1627,0.10829,79.05,blue,10.13
W-03,3381,16901,22389,2107,0.151012,75.49,blue,9.41
W-04,1970,10396,13547,1181,0.14542,76.74,blue,8.72
W-05,833,9608,11217,776,0.074262,85.66,blue,6.92
W-06,1075,10050,12030,905,0.08936,83.54,blue,7.52
W-07,2240,15078,18714,1396,0.119696,80.57,blue,7.46
W-08,1199,13696,16127,1232,0.074347,84.93,blue,7.64
W-09,734,8847,10694,1113,0.068637,82.73,blue,10.41
W-10,1749,15712,19093,1632,0.091604,82.29,blue,8.55
