In [3]:
%run 10_merging_HIFLD_w_demographics.py

Original Master: 3114
Updated Master: 2838
Length of Master Table: 2838
Length of Merged Table: 2444
Length of Merged Table without duplicates: 2421


In [39]:
%run 20_calc_dist_to_nearest_pp_elecday.py

Pre # of campuses: 2421
Within State Set # of campuses: 2258


In [23]:
import pandas as pd
import geopandas as gpd
import numpy as np


wkt = """PROJCS["USA_Contiguous_Equidistant_Conic",
        GEOGCS["GCS_North_American_1983",
            DATUM["North_American_Datum_1983",
                SPHEROID["GRS_1980",6378137,298.257222101]],
            PRIMEM["Greenwich",0],
            UNIT["Degree",0.017453292519943295]],
        PROJECTION["Equidistant_Conic"],
        PARAMETER["False_Easting",0],
        PARAMETER["False_Northing",0],
        PARAMETER["Longitude_Of_Center",-96],
        PARAMETER["Standard_Parallel_1",33],
        PARAMETER["Standard_Parallel_2",45],
        PARAMETER["Latitude_Of_Center",39],
        UNIT["Meter",1],
        AUTHORITY["EPSG","102005"]]"""

# Load Final List of Colleges and Subset of Colleges
campuses = gpd.read_file(
    "../../20_intermediate_files/20_campus_polygons_w_demographics.geojson"
)
#Drop excess columns
campuses = campuses.drop(columns=['SLSV Coalition', 'ALL IN', 'AGF', 'Ask Every Student', 'Campus Vote Project','Voter Friendly Campus', 'StudentPIRGs', 'Aliento Education Fund', 'Alliance for Youth Organizing', 'American Democracy Project', 
                   " Arizona Students' Association", 'Baltimore Collegetown Network\t\t', 'Black Girls Vote', 
                   'Boston Votes Coalition', 'California Campus Compact ', 'CALPIRG Students', 'CEEP', 'Civic Nebraska', 
                   'Common Cause', 'Count US IN', 'Creative Campus Voting Project', 'Day on Democracy', 'Democracy Works', 
                   'Engage Miami', 'Every Vote Counts', 'Forward Montana', 'Georgia Shift', 'Hillel International', 
                   'IGNITE National', 'IA & MN Campus Compact', 'LeadMN', 'Loud Light', 'Maine Students Vote', 'MARYPIRG', 
                   'MI Familia Vota', 'Minnesota Youth Collective', 'Mississippi Votes', 'NASPAA', 'New Era Colorado', 
                   'NYPIRG', 'NCAAT', 'NCPIRG', 'NC Campus Compact', 'Ohio Campus Compact', 'PIRGIM Campus Action', 
                   'Project Pericles', 'TurnUp Activism', 'Up to Us/ Net Impact', 'VoteRiders', 'Washington Student Association', 'Xceleader (Vote HBCU)', 'NVEW 2020', 'NVRD\n2017', 'NVRD 2018', 'NVRD 2020'])

campuses = campuses.to_crs(wkt)

campuses = campuses.copy()
print('Pre # of campuses: {}'.format(len(campuses)))

#Get sets of states common to all polling place data
p = gpd.read_file(
        f"../../20_intermediate_files/10_polling_places/2020_BallotReady_Early.geojson"
    )
ev_state_set = set(p['state'].unique())

q = gpd.read_file(
        f"../../20_intermediate_files/10_polling_places/2020_elecday_cpi.geojson"
    )
ed_state_set = set(q['state'].unique())

max_state_set = ev_state_set.union(ed_state_set)
    
#Limit campuses to those within our common states
campuses = campuses[campuses['STATE'].isin(max_state_set)]
print('Within State Set # of campuses: {}'.format(len(campuses)))
temp_list = []

p = p.to_crs(wkt)
q = q.to_crs(wkt)

assert campuses.crs == p.crs
if 'latitude' in q.columns:
    q = q.rename(columns={"latitude": "Latitude", "longitude": "Longitude"})
q = q[["state", "Latitude","Longitude","geometry"]]
year = '2020'
q = q.rename({"state": f"state_{year}"}, axis="columns")
q = q.rename({"Latitude": f"Latitude_{year}"}, axis="columns")
q = q.rename({"Longitude": f"Longitude_{year}"}, axis="columns")
    
# Get distance to nearest by looping through states, then concat
temp = pd.DataFrame()
for state in max_state_set:
    temp1 = pd.DataFrame()
    temp1 = campuses[campuses['STATE'] == state].sjoin_nearest(q[q[f"state_{year}"] == state], distance_col=f"distances_{year}", how="left")
    #Remove multiple equidistant polling places
    temp1 = temp1.drop_duplicates(subset=['UNIQUEID'], keep='last')
    temp = pd.concat([temp, temp1])

#Set final df
final_df = temp

##############################
#Add Ballot Ready Polling Data
##############################
p = gpd.read_file(
        f"../../20_intermediate_files/10_polling_places/2020_BallotReady_Early.geojson"
    )
p['Longitude'] = p['geometry'].x
p['Latitude'] = p['geometry'].y
p = p.to_crs(wkt)
#Save a copy of polling place geometries
p = p[["state", "Latitude", "Longitude", "geometry"]]
year = '2020_early'
p = p.rename({"state": f"state_{year}"}, axis="columns")
p = p.rename({"Latitude": f"Latitude_{year}"}, axis="columns")
p = p.rename({"Longitude": f"Longitude_{year}"}, axis="columns")
temp = pd.DataFrame()

#Loop through states and merge with final dataframe
for state in max_state_set:
    temp1 = pd.DataFrame()
    temp1 = campuses[campuses['STATE'] == state].sjoin_nearest(p[p[f"state_{year}"] == state], distance_col=f"distances_{year}", how="left")
    #Remove multiple equidistant polling places
    temp1 = temp1.drop_duplicates(subset=['UNIQUEID'], keep='last')
    temp = pd.concat([temp, temp1])
final_df = final_df.merge(temp.iloc[:,[0,-3,-2,-1]],on='UNIQUEID',how='inner')

######################
#Add Nearest Starbucks
######################
s = gpd.read_file(
        f"../../00_source_data/store_locations/Starbucks_USA.csv"
    )
s = gpd.GeoDataFrame(s,geometry=gpd.points_from_xy(s['Longitude'], s['Latitude']))
s = s.set_crs(epsg=4326)
s = s.to_crs(wkt)
#Save a copy of polling place geometries
s = s[["State", "Latitude", "Longitude", "geometry"]]
year = 'starbucks'
s = s.rename({"State": f"state_{year}"}, axis="columns")
s = s.rename({"Latitude": f"Latitude_{year}"}, axis="columns")
s = s.rename({"Longitude": f"Longitude_{year}"}, axis="columns")
temp = pd.DataFrame()
#Loop through states and merge with final dataframe
for state in max_state_set:
    temp1 = pd.DataFrame()
    temp1 = campuses[campuses['STATE'] == state].sjoin_nearest(s[s[f"state_{year}"] == state], distance_col=f"distances_{year}", how="left")
    #Remove multiple equidistant Starbucks
    temp1 = temp1.drop_duplicates(subset=['UNIQUEID'], keep='last')
    temp = pd.concat([temp, temp1])
final_df = final_df.merge(temp.iloc[:,[0,-3,-2,-1]],on='UNIQUEID',how='inner')

######################
#Add Nearest Target
######################
s = gpd.read_file(
        f"../../00_source_data/store_locations/Target_USA.csv"
    )
s = gpd.GeoDataFrame(s,geometry=gpd.points_from_xy(s['Longitude'], s['Latitude']))
s = s.set_crs(epsg=4326)
s = s.to_crs(wkt)
#Save a copy of polling place geometries
s = s[["State", "Latitude", "Longitude", "geometry"]]
year = 'target'
s = s.rename({"State": f"state_{year}"}, axis="columns")
s = s.rename({"Latitude": f"Latitude_{year}"}, axis="columns")
s = s.rename({"Longitude": f"Longitude_{year}"}, axis="columns")
temp = pd.DataFrame()
#Loop through states and merge with final dataframe
for state in max_state_set:
    temp1 = pd.DataFrame()
    temp1 = campuses[campuses['STATE'] == state].sjoin_nearest(s[s[f"state_{year}"] == state], distance_col=f"distances_{year}", how="left")
    #Remove multiple equidistant Targets
    temp1 = temp1.drop_duplicates(subset=['UNIQUEID'], keep='last')
    temp = pd.concat([temp, temp1])
final_df = final_df.merge(temp.iloc[:,[0,-3,-2,-1]],on='UNIQUEID',how='inner')


#Save to file
#final_df.to_file(
#    "../../20_intermediate_files/30_campuses_w_dist_to_nearest_pp.geojson",
#    driver="GeoJSON")
#final_df.to_csv(
#    "../../20_intermediate_files/30_campuses_w_dist_to_nearest_pp.csv")

Pre # of campuses: 2421
Within State Set # of campuses: 2258
    UNIQUEID                                 NAME                    ADDRESS  \
90    212878    HARRISBURG AREA COMMUNITY COLLEGE                 1 HACC DR.   
91    214111  MONTGOMERY COUNTY COMMUNITY COLLEGE            340 DEKALB PIKE   
93    213376                   LACKAWANNA COLLEGE                501 VINE ST   
95    215655             ROBERT MORRIS UNIVERSITY  6001 UNIVERSITY BOULEVARD   
188   212869                       HARCUM COLLEGE         750 MONTGOMERY AVE   

              CITY STATE    ZIP  ZIP4       TELEPHONE TYPE STATUS  ...  \
90      Harrisburg    PA  17110  2999  (717) 780-2300    1      A  ...   
91       Blue Bell    PA  19422  0796  (215) 641-6300    1      A  ...   
93        Scranton    PA  18509  3206  (570) 961-7810    2      A  ...   
95   Moon Township    PA  15108  1189  (800) 762-0097    2      A  ...   
188      Bryn Mawr    PA  19010  3470  (610) 525-4100    2      A  ...   

     Black or

In [24]:
len(final_df)

2258

In [25]:
sum(final_df['distances_2020'].isna())

443

In [26]:
sum(final_df['distances_2020_early'].isna())

88

In [37]:
len(final_df['STATE'].unique())

48

In [27]:
len(ev_state_set)

45

In [28]:
len(ed_state_set)

35

In [29]:
df_ed = final_df[final_df['distances_2020'].notna()]

In [30]:
df_ev = final_df[final_df['distances_2020_early'].notna()]

In [31]:
len(df_ed)

1815

In [34]:
len(df_ed['STATE'].unique())

35

In [32]:
len(df_ev)

2170

In [35]:
len(df_ev['STATE'].unique())

45

In [19]:
import geopandas as gpd
fin = gpd.read_file("../../20_intermediate_files/30_campuses_w_dist_to_nearest_pp.geojson")

In [20]:
fin.head()

Unnamed: 0,UNIQUEID,NAME,ADDRESS,CITY,STATE,ZIP,ZIP4,TELEPHONE,TYPE,STATUS,...,Latitude_2020_early,Longitude_2020_early,distances_2020_early,Latitude_starbucks,Longitude_starbucks,distances_starbucks,Latitude_target,Longitude_target,distances_target,geometry
0,230737,UTAH VALLEY UNIVERSITY,800 W UNIVERSITY PARKWAY,Orem,UT,84058,5999,(801) 863-8000,1,A,...,40.307563,-111.735687,272.164025,40.27883,-111.714617,0.0,40.2959,-111.699239,1711.027074,"MULTIPOLYGON (((-1323470.53542 257003.34571, -..."
1,230490,OGDEN-WEBER TECHNICAL COLLEGE,200 N WASHINGTON BLVD,Ogden,UT,84404,4089,(801) 627-8300,1,A,...,41.22326,-111.970034,4252.466673,41.2452,-111.970485,1832.088454,41.173612,-112.006235,10294.86196,"POLYGON ((-1324324.58080 367418.25623, -132434..."
2,438151,STEVENS-HENAGER COLLEGE,383 W VINE ST,Salt Lake City,UT,84123,5671,(801) 281-7620,2,A,...,40.665099,-111.888214,1236.786553,40.659511,-111.887825,1172.075537,40.646102,-111.940181,3413.714194,"POLYGON ((-1331123.34073 300517.71684, -133123..."
3,230728,UTAH STATE UNIVERSITY,OLD MAIN HILL,Logan,UT,84322,1400,(435) 797-1000,1,A,...,41.722622,-111.848255,3150.762959,41.739979,-111.830619,1045.334557,41.173612,-112.006235,64713.336523,"MULTIPOLYGON (((-1302624.24752 419672.17026, -..."
4,230746,SALT LAKE COMMUNITY COLLEGE,4600 S REDWOOD RD,Salt Lake City,UT,84123,NOT AVAILABLE,(801) 957-4111,1,A,...,40.654562,-111.955438,1850.323586,40.683266,-111.939437,864.873904,40.646102,-111.940181,2571.48342,"POLYGON ((-1334174.70639 302840.33035, -133402..."


In [None]:
#Add Nearest Starbucks

s = gpd.read_file(
        f"../../00_source_data/store_locations/Starbucks_USA.csv"
    )
s['Longitude'] = s['geometry'].x
s['Latitude'] = s['geometry'].y
s = s.to_crs(wkt)
#Save a copy of polling place geometries
s = s[["state", "Latitude", "Longitude", "geometry"]]
year = 'starbucks'
s = s.rename({"state": f"state_{year}"}, axis="columns")
s = s.rename({"Latitude": f"Latitude_{year}"}, axis="columns")
s = s.rename({"Longitude": f"Longitude_{year}"}, axis="columns")
temp = pd.DataFrame()
#Loop through states and merge with final dataframe
for state in state_set:
    temp1 = pd.DataFrame()
    temp1 = campuses[campuses['STATE'] == state].sjoin_nearest(s[s[f"state_{year}"] == state], distance_col=f"distances_{year}", how="left")
    #Remove multiple equidistant polling places
    temp1 = temp1.drop_duplicates(subset=['UNIQUEID'], keep='last')
    temp = pd.concat([temp, temp1])
final_df = final_df.merge(temp.iloc[:,[0,-3,-2,-1]],on='UNIQUEID',how='inner')
