In [1]:
import pandas as pd
import geopandas
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import geopy.distance
from shapely.ops import nearest_points

### Load Polling Place Data (2020)

In [2]:
polling = geopandas.read_file('../00_source_data/2020 Polling Data/polling_pk_master_post.csv')
early = geopandas.read_file('../00_source_data/2020 Polling Data/earlyVote_pk_master.csv')
dropoff = geopandas.read_file('../00_source_data/2020 Polling Data/dropoff_pk_master.csv')

In [3]:
# Data cleaning before creating GeoDataFrame
polling = polling.replace(r'^\s*$', np.nan, regex=True)
polling = polling.astype({'latitude': 'float'})
polling = polling.astype({'longitude': 'float'})
polling_gdf = geopandas.GeoDataFrame(
    polling, geometry=geopandas.points_from_xy(polling.longitude, polling.latitude))

In [4]:
polling_gdf.head()

Unnamed: 0,query_id,placekey,error,pollingHours,latitude,longitude,sources,address.locationName,address.line1,address.city,address.state,address.zip,notes,startDate,endDate,address.line2,address.line3,id,geometry
0,0,223@8m4-tbj-bp9,,"Tue, Nov 3: 7 am - 7 pm",21.305224,-157.857002,"[{'name': 'Voting Information Project', 'offic...",Honolulu Hale,530 South King Street,Honolulu,HI,96813,Voter service centers provide accessible in-pe...,11/3/2020,11/3/2020,,,,POINT (-157.85700 21.30522)
1,1,@8m4-t4v-q4v,Invalid address,"Tue, Nov 3: 7 am - 7 pm",21.329854,-158.081865,"[{'name': 'Voting Information Project', 'offic...",Kapolei Hale,1000 Uluʻōhiʻa Street,Kapolei,HI,96707,Voter service centers provide accessible in-pe...,11/3/2020,11/3/2020,,,,POINT (-158.08186 21.32985)
2,2,@8m4-793-jvz,Invalid address,"Tue, Nov 3: 7 am - 7 pm",21.090406,-157.017609,"[{'name': 'Voting Information Project', 'offic...",Mitchell Pauole Center Conference Room,90 Ainoa Street,Kaunakakai,HI,96748,Voter service centers provide accessible in-pe...,11/3/2020,11/3/2020,,,,POINT (-157.01761 21.09041)
3,3,@bp5-ytd-j35,Invalid address,"Tue, Nov 3: 7 am - 7 pm",20.826513,-156.921143,"[{'name': 'Voting Information Project', 'offic...",Lanai Council District Office Lanai Community ...,Lanai Council District Office,Lanai City,HI,96763,Voter service centers provide accessible in-pe...,11/3/2020,11/3/2020,8th Street,,,POINT (-156.92114 20.82651)
4,4,zzw-222@bp5-chr-8y9,,"Tue, Nov 3: 7 am - 7 pm",20.900159,-156.491939,"[{'name': 'Voting Information Project', 'offic...",Velma McWayne Santos Community Center,395 Waena Street,Wailuku,HI,96793,Voter service centers provide accessible in-pe...,11/3/2020,11/3/2020,,,,POINT (-156.49194 20.90016)


### Load Final List of Colleges and Subset of Colleges

In [5]:
college = geopandas.read_file('../20_intermediate_files/final_college_polygons.csv', GEOM_POSSIBLE_NAMES="geometry", KEEP_GEOM_COLUMNS="NO")
print(len(college))

5801


In [6]:
subset_college = geopandas.read_file('../20_intermediate_files/subset_final_college_polygon.csv', GEOM_POSSIBLE_NAMES="geometry", KEEP_GEOM_COLUMNS="NO")
print(len(subset_college))

2230


### Find Nearest Traditional Polling Place for Subset of Colleges

In [7]:
college.crs = 4326
subset_college.crs = 4326
polling_gdf.crs = 4326

In [8]:
def nearest_poll_idx(row, polling_df):
    polling_index = polling_df['geometry'].distance(row.geometry).sort_values().index[0]
    return polling_index
def nearest_poll_dist(row, polling_df):
    polling_distance = polling_df['geometry'].distance(row.geometry).sort_values().values[0]
    return polling_distance
def nearest_poll_name(row, polling_df):
    polling_place_name = polling_df.loc[row.nearest_polling_index]['address.locationName']
    return polling_place_name
def nearest_poll_geom(row, polling_df):
    polling_place_geom = polling_df.loc[row.nearest_polling_index]['geometry']
    return polling_place_geom


In [9]:
college_trad_poll = subset_college.copy()
college_trad_poll.head()

Unnamed: 0,field_1,State_x,State_y,School Name,Institution Type,x_centroid,y_centroid,centroid_geom,geometry
0,0,AK,Alaska,alaska pacific university,"Private, 4 Year",-149.80181074831123,61.191111629181066,"-149.80181074831125, 61.191111629181066","POLYGON ((-149.80159 61.18746, -149.80055 61.1..."
1,1,AK,Alaska,alaska pacific university,"Private, 4 Year",-149.8098437,61.1888471,"-149.8098437, 61.1888471",POINT (-149.80984 61.18885)
2,2,AK,Alaska,ilisagvik college,"Public, 4 Year",-156.6798569974413,71.32481059160978,"-156.6798569974413, 71.32481059160978","POLYGON ((-156.67949 71.32556, -156.68056 71.3..."
3,3,AK,Alaska,prince william sound community college,,-146.3553932205255,61.13427196693991,"-146.3553932205255, 61.134271966939906","POLYGON ((-146.35624 61.13467, -146.35624 61.1..."
4,4,AK,Alaska,university of alaska anchorage,"Public, 4 Year",-149.81570562147257,61.19213041702953,"-149.81570562147255, 61.19213041702953","POLYGON ((-149.81587 61.19325, -149.81584 61.1..."


In [10]:
%%time
college_trad_poll['nearest_polling_index'] = pd.DataFrame(college_trad_poll.apply(lambda row: nearest_poll_idx(row,polling_gdf), axis=1).tolist(), index= college_trad_poll.index)
college_trad_poll['nearest_polling_distance'] = pd.DataFrame(college_trad_poll.apply(lambda row: nearest_poll_dist(row,polling_gdf), axis=1).tolist(), index= college_trad_poll.index)
college_trad_poll['nearest_polling_name'] = pd.DataFrame(college_trad_poll.apply(lambda row: nearest_poll_name(row,polling_gdf), axis=1).tolist(), index= college_trad_poll.index)
college_trad_poll['nearest_polling_geometry'] = pd.DataFrame(college_trad_poll.apply(lambda row: nearest_poll_geom(row,polling_gdf), axis=1).tolist(), index= college_trad_poll.index)
college_trad_poll.head()


  polling_index = polling_df['geometry'].distance(row.geometry).sort_values().index[0]

  polling_distance = polling_df['geometry'].distance(row.geometry).sort_values().values[0]


CPU times: user 42min 8s, sys: 16.1 s, total: 42min 24s
Wall time: 42min 34s


Unnamed: 0,field_1,State_x,State_y,School Name,Institution Type,x_centroid,y_centroid,centroid_geom,geometry,nearest_polling_index,nearest_polling_distance,nearest_polling_name,nearest_polling_geometry
0,0,AK,Alaska,alaska pacific university,"Private, 4 Year",-149.80181074831123,61.191111629181066,"-149.80181074831125, 61.191111629181066","POLYGON ((-149.80159 61.18746, -149.80055 61.1...",11464,0.015356,ALASKA HOUSING FINANCE CORPORATION BUILDING,POINT (-149.7796543 61.1814132)
1,1,AK,Alaska,alaska pacific university,"Private, 4 Year",-149.8098437,61.1888471,"-149.8098437, 61.1888471",POINT (-149.80984 61.18885),11461,0.022481,TRINITY CHRISTIAN REFORMED CHURCH,POINT (-149.8244492 61.2059374)
2,2,AK,Alaska,ilisagvik college,"Public, 4 Year",-156.6798569974413,71.32481059160978,"-156.6798569974413, 71.32481059160978","POLYGON ((-156.67949 71.32556, -156.68056 71.3...",11570,7.392052,NORTHWEST ARCTIC BOROUGH BLDG,POINT (-162.5999965 66.89516139999999)
3,3,AK,Alaska,prince william sound community college,,-146.3553932205255,61.13427196693991,"-146.3553932205255, 61.134271966939906","POLYGON ((-146.35624 61.13467, -146.35624 61.1...",11541,0.003247,VALDEZ CITY COUNCIL CHAMBERS,POINT (-146.3551253 61.1305889)
4,4,AK,Alaska,university of alaska anchorage,"Public, 4 Year",-149.81570562147257,61.19213041702953,"-149.81570562147255, 61.19213041702953","POLYGON ((-149.81587 61.19325, -149.81584 61.1...",11461,0.011157,TRINITY CHRISTIAN REFORMED CHURCH,POINT (-149.8244492 61.2059374)


In [12]:
college_trad_poll.to_csv('../20_intermediate_files/subset_college_nearest_poll_2020.csv', index=False)

### Find Nearest Early Voting Place for Subset of Colleges

In [13]:
#Data cleaning before creating GeoDataFrame
early = early.replace(r'^\s*$', np.nan, regex=True)
early = early.astype({'latitude': 'float'})
early = early.astype({'longitude': 'float'})
early_gdf = geopandas.GeoDataFrame(
    early, geometry=geopandas.points_from_xy(early.longitude, early.latitude))

In [14]:
#Subset our college to only states with early voting data
state_list = list(early_gdf['address.state'].unique())
college_early = subset_college[subset_college['State_x'].isin(state_list)]

In [15]:
college_early.head()

Unnamed: 0,field_1,State_x,State_y,School Name,Institution Type,x_centroid,y_centroid,centroid_geom,geometry
72,72,AZ,Arizona,arizona christian university,"Private, 4 Year",-112.18263707811263,33.62266752697055,"-112.18263707811265, 33.62266752697055","POLYGON ((-112.18065 33.62508, -112.18067 33.6..."
73,73,AZ,District of Columbia,arizona state university,"Public, 4 Year",-77.0419523,38.9011733,"-77.0419523, 38.9011733",POINT (-77.04195 38.90117)
74,74,AZ,Arizona,arizona state university,"Public, 4 Year",-111.92788959467356,33.422833111668304,"-111.92788959467357, 33.422833111668304","MULTIPOLYGON (((-111.93666 33.42191, -111.9367..."
75,75,AZ,Arizona,arizona western college,"Public, 2 Year",-114.49573170772813,32.68833203107732,"-114.49573170772813, 32.68833203107732","POLYGON ((-114.49571 32.69226, -114.49693 32.6..."
76,76,AZ,Arizona,coconino community college,"Public, 2 Year",-111.6455877973481,35.17118487417045,"-111.6455877973481, 35.17118487417045","POLYGON ((-111.64671 35.17175, -111.64670 35.1..."


In [16]:
%%time
college_early['nearest_polling_index'] = pd.DataFrame(college_early.apply(lambda row: nearest_poll_idx(row,early_gdf), axis=1).tolist(), index= college_early.index)
college_early['nearest_polling_distance'] = pd.DataFrame(college_early.apply(lambda row: nearest_poll_dist(row,early_gdf), axis=1).tolist(), index= college_early.index)
college_early['nearest_polling_name'] = pd.DataFrame(college_early.apply(lambda row: nearest_poll_name(row,early_gdf), axis=1).tolist(), index= college_early.index)
college_early['nearest_polling_geometry'] = pd.DataFrame(college_early.apply(lambda row: nearest_poll_geom(row,early_gdf), axis=1).tolist(), index= college_early.index)
college_early.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


CPU times: user 8.91 s, sys: 39.8 ms, total: 8.95 s
Wall time: 8.98 s


Unnamed: 0,field_1,State_x,State_y,School Name,Institution Type,x_centroid,y_centroid,centroid_geom,geometry,nearest_polling_index,nearest_polling_distance,nearest_polling_name,nearest_polling_geometry
72,72,AZ,Arizona,arizona christian university,"Private, 4 Year",-112.18263707811263,33.62266752697055,"-112.18263707811265, 33.62266752697055","POLYGON ((-112.18065 33.62508, -112.18067 33.6...",709,0.011057,CACTUS HIGH SCHOOL,POINT (-112.1964578 33.626737)
73,73,AZ,District of Columbia,arizona state university,"Public, 4 Year",-77.0419523,38.9011733,"-77.0419523, 38.9011733",POINT (-77.04195 38.90117),343,6.755636,1ST UNITED METHODIST,POINT (-82.4304751 42.9757831)
74,74,AZ,Arizona,arizona state university,"Public, 4 Year",-111.92788959467356,33.422833111668304,"-111.92788959467357, 33.422833111668304","MULTIPOLYGON (((-111.93666 33.42191, -111.9367...",658,0.0,ASU SUN DEVIL FITNESS CENTER (FREE PARKING),POINT (-111.9318841 33.4155075)
75,75,AZ,Arizona,arizona western college,"Public, 2 Year",-114.49573170772813,32.68833203107732,"-114.49573170772813, 32.68833203107732","POLYGON ((-114.49571 32.69226, -114.49693 32.6...",787,1.790482,GILA BEND SCHOOL DISTRICT OFFICE,POINT (-112.7189368 32.9532931)
76,76,AZ,Arizona,coconino community college,"Public, 2 Year",-111.6455877973481,35.17118487417045,"-111.6455877973481, 35.17118487417045","POLYGON ((-111.64671 35.17175, -111.64670 35.1...",778,1.375954,SPIRIT IN THE DESERT RETREAT CENTER,POINT (-111.9211106 33.822175)


In [17]:
college_early.to_csv('../20_intermediate_files/subset_college_nearest_early_poll_2020.csv', index=False)