In [None]:
import pandas as pd # standard python data library
import geopandas as gp # the geo-version of pandas
import numpy as np 
import os
import fiona
from statistics import mean, median
from pandas import read_csv
gp.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw' #To load KML files

# VEST PA Validation

In [None]:
vest_pa_18 = gp.read_file("./raw-from-source/VEST/pa_2018/pa_2018.shp")

Election results from the Pennsylvania Secretary of State's office via OpenElections (https://github.com/openelections/openelections-data-pa/). Precinct data was corrected with canvass reports for the following counties: Berks, Blair, Bradford, Cambria, Carbon, Crawford, Elk, Forest, Franklin, Lawrence, Lycoming, Mifflin, Montgomery, Montour, Northumberland, Susquehanna. The candidate totals for Berks, Blair, Crawford, and Mifflin differ from the county totals reported by the state and therefore the statewide totals differ from the official results accordingly.

Precinct shapefiles primarily from the U.S. Census Bureau's 2020 Redistricting Data Program Phase 2 release. The shapefiles from Delaware County and the City of Pittsburgh are from the respective jurisdictions instead. Precinct numbers were corrected to match the voter file in the following locales: Allegheny (Elizabeth, Pittsburgh W12), Blair (Greenfield), Bradford (Athens), Greene (Nonongahela), Monroe (Smithfield), Montgomery (Hatfield), Northampton (Bethlehem Twp), Perry (Toboyne), Washington (New Eagle, Somerset), York (Fairview).

Precinct boundaries throughout the state were edited to match voter assignments in the PA Secretary of State voter file from the 2018 election cycle. While some edits reflect official updates to wards or divisions the great majority involve voters incorrectly assigned to voting districts by the counties. As such the VEST shapefile endeavors to reflect the de facto precinct boundaries and these often differ from the official voting district boundaries, in some cases quite drastically. Wherever possible edits were made using census boundaries or alternatively using the parcel shapefiles from the respective counties. 

In certain areas voter assignments appear so erratic that it is impractical to place all voters within their assigned precinct. These areas were edited so as to place as many voters as possible within their assigned precinct without displacing a greater number from their assigned precinct. In general, municipal boundaries were retained except where significant numbers of numbers were misassigned to the wrong municipality. In cases where the odd/even split was incorrectly reversed for precinct boundary streets the official boundary was retained. All such cases involved near equal number of voters swapped between voting districts.

The following revisions were made to the base shapefiles to match the de facto 2018 precinct boundaries consistent with the voter file. Individual precincts are noted in cases of splits or merges. Due to the sheer number of edits boundary adjustments are noted at the borough/township level. There may be as many as two dozen individual precincts that were revised within a given municipality.

Adams: Adjust Cumberland, Franklin  
Allegheny: Merge CD splits for S Fayette 3/5; Split Pittsburgh W5 11/17; Merge Pittsburgh W16 9/11/12, Align   McCandless with municipal boundary; Adjust Avalon, Baldwin, Bethel Park, Braddock, Brentwood, Castle Shannon, Clairton, Collier, Coraopolis, Crescent, Dormont, Dravosburg, Duquesne, E Deer, E McKeesport, E Pittsburgh, Elizabeth, Emsworth, Forward, Glassport, Hampton, Harmar, Ingram, Jefferson Hills, Kennedy, Leet, Liberty, Marshall, McCandless, McKees Rocks, McKeesport, Monroeville, Moon, Mount Lebanon, Munhall, N Fayette, N Versailles, O'Hara, Oakdale, Penn Hills, Pine, Pittsburgh (nearly all wards), Pleasant Hills, Reserve, Richland, Ross, Scott, Sewickley, Shaler, S Fayette, S Park, Stowe, Swissvale, Upper St. Clair, W Deer, W Homestead, W Mifflin, W View, Whitaker, Whitehall, Wilkins, Wilkinsburg
Armstrong: Align Dayton, Elderton, Ford City, Kittanning, N Apollo with municipal boundaries; Adjust Ford City, Gilpin, Kiskiminetas, Kittanning, Manor, N Buffalo, Parks, Parker City, S Buffalo  
Beaver: Adjust Aliquippa, Ambridge, Baden, Beaver, Brighton, Center, Chippewa, Conway, Economy, Franklin, Hanover, Harmony, Hopewell, Midland, Monaca, N Sewickley  
Bedford: Adjust Bedford Boro, Bedford Twp  
Berks: Adjust Cumru, Douglass, Oley, Maxatawny, Robeson, Sinking Spring, Spring, Union  
Blair: Merge Tunnelhill/Allegheny Twp 4; Align Altoona, Bellwood, Duncansville, Hollidaysburg, Newry, Roaring Spring, Tyrone, Williamsburg with municipal boundaries; Adjust Allegheny, Altoona, Antis, Frankstown, Freedom, Greenfield, Huston, Juniata, N Woodbury, Logan, Snyder, Tyrone Boro, Tyrone Twp  
Bucks: Align Sellersville, Tullytown with municipal boundaries; Adjust Bensalem, Bristol Boro, Bristol Twp, Buckingham, Doylestown Twp, Falls, Hilltown, Lower Makefield N, Lower Southampton E, Middletown, Milford, Morrissville, Newtown Twp, Northampton, Solebury Lower, Solebury, Springfield, Tinicum, Upper Makefield, Upper Southampton E, Warminster, Warrington, W Rockhill  
Butler: Merge CD splits for Cranberry E 2, 3, Cranberry W 1, 2, Jefferson 1, 2; Align Butler Twp, Valencia with municipal boundaries; Adjust Adams, Buffalo, Butler Boro, Butler Twp, Center, Cranberry E, Cranberry W, Jackson, Jefferson, Zelienople
Cambria: Align Daisytown, Sankertown, W Taylor, Wilmore with municipal boundaries; Adjust Cambria, Conemaugh, Croyle, E Taylor, Ebensburg, E Carroll, Geistown, Jackson, Johnstown W8, W17, W20, Lower Yoder, Northern Cambria, Portage Boro, Portage Twp, Richland, Southmont, Stonycreek, Summerhill, Susquehanna, Upper Yoder, W Carroll, Westmont
Cameron: Adjust Emporium, Shippen
Carbon: Adjust Jim Thorpe, Kidder, Mahoning, New Mahoning, Summit Hill
Centre: Merge CD splits for Halfmoon E Central/Proper; Merge Ferguson Northeast 1 A/B; Adjust Benner, College, Ferguson, Patton
Chester: Merge CD/LD splits for Birmingham 2, Phoenixville M 1; Adjust Birmingham, E Bradford S, E Fallowfield, E Goshen, E Marlborough, Easttown, N Coventry, Spring City, Tredyffrin M, Uwchlan, W Bradford, W Caln, W Goshen N, W Goshen S, Westtown
Clarion: Merge Emlenton/Richland; Adjust Clarion, Highland, Farmington, Knox
Clearfield: Adjust Bradford, Cooper, Decatur, Golden Rod, Lawrence Glen Richie, Morris, Plympton, Woodward
Columbia: Merge Ashland/Conyngham; Adjust Orange, Scott West
Crawford: Align Mead, Woodcock with municipal boundaries
Cumberland: Merge CD splits for N Middleton 1, 3; Split Lower Allen 1/Annex; Align Carlisle, E Pennsboro, Hampton, Lemoyne, Lower Allen, Mechanisburg, Middlesex, Mount Holly Springs, N Middleton, Shiremanstown, Silver Spring, W Pennsboro, Wormsleysburg with municipal boundaries
Dauphin: Align Middletown with municipal boundary; Adjust Derry, Harrisburg W1, W7, W8, W9, Hummelstown, Lower Paxton, Lykens, Middletown
Delaware: Adjust Chester, Concord, Darby Boro, Darby Twp, Haverford, Marple, Nether Providence, Newtown, Radnor, Ridley, Sharon Hill, Thornbury, Tinicum, Trainer, Upper Chichester, Upper Darby, Upper Providence
Elk: Split N/S Horton; Adjust Johnsonburg, Ridgeway Boro, Ridgeway Twp, St. Marys
Erie: Adjust Erie W1, W4, W5, W6, Greene, Lawrence Park, McKean, Millcreek, North East
Fayette: Align Dunbar with municipal boundary; Adjust Brownsville, Bullskin, Dunbar, Georges, German, Luzerne, N Union, Redstone
Franklin: Align Mercersburg with municipal boundary; Adjust Antrim, Fannett, Greene, Guilford, Hamilton, Metal, Peters, Quincy, St. Thomas, Southampton, Washington
Fulton: Align McConnellsburg with municipal boundary
Greene: Align Carmichaels with municipal boundary; Adjust Cumberland, Dunkard, Franklin, Jefferson, Lipencott, Mather, Morgan Chart, Monongahela, Nemacolin
Huntingdon: Merge CD splits for Penn; Adjust Huntingdon, Mount Union
Jefferson:  Align Reynoldsville with municipal boundary; Adjust Punxsutawney
Lackawanna: Adjust Archbald, Blakely, Carbondale, Clarks Summit, Dickson City, Dunmore, Fell, Jermyn, Jessup, Mayfield, Moosic, Old Forge, Olyphant, Scranton W1, W2, W3, W6, W7, W10, W12, W13, W14, W15, W16, W19, W20, W23, S Abington, Taylor, Throop
Lancaster: Split Lancaster 7-8 CV/LS; Adjust Brecknock, Columbia, E Hempfield, E Lampeter, E Petersburg, Elizabethtown, Ephrata, Lancaster W4, W8, Lititz, Manheim, Manor, Millersville, Mt Joy Boro, Mt Joy Twp, New Holland, Penn, Providence, Rapho, Warwick, W Cocalico, W Donegal, W Hempfield
Lawrence: Adjust Neshannock
Lebanon: Adjust Jackson, Lickdale, S Lebanon, Union Green Pt
Lehigh: Adjust Lower Macungie, Salisbury
Luzerne: Merge CD splits for Hazle 1; Align Avoca, Pittston with municipal boundaries; Adjust Butler, Dallas, Exeter, Foster, Freeland, Hanover, Hazle, Jenkins, Kingston Boro, Kingston Twp, Larksville, Lehman, Nanticoke, Newport, Plains, Salem, Smoyersville, W Wyoming, Wilkes-Barre
Lycoming: Align Williamsport with municipal boundary; Adjust Jersey Shore
McKean: Adjust Bradford City, Bradford Twp, Foster, Keating, Otto
Mercer: Adjust Delaware, Fredonia, Greenville, Hempfield, Hermitage, Sharon, Sharpsville, S Pymatuning, W Salem
Mifflin: Split Brown Reedsville/Church Hill
Monroe: Align E Stroudsburg with municipal boundary; Adjust E Stroudsburg, Smithfield
Montgomery: Add CD special election splits for Horsham 2-2, Perkiomen 1, Plymouth 2-3; Adjust Abington, Lower Merion, Pottstown, Springfield, Upper Moreland, Upper Merion, Upper Providence
Northampton: Align Glendon, Walnutport with municipal boundaries; Adjust Bangor, Bethlehem W2, W3, W4, W7, W9, W14, W15, Bethlehem Twp, Bushkill, Easton, Forks, Hanover, Hellertown, Lehigh, Lower Mt Bethel, Lower Saucon, Moore, Nazareth, Palmer, Plainfield, Upper Mt Bethel, Washington, Williams
Northumberland: Align Northumberland with municipal boundary; Adjust Coal, Milton, Mount Carmel W, Natalie-Strong, Northumberland, Point, Ralpho, Shamokin, Sunbury, Upper Augusta
Philadelphia: Adjust 1-19/21, 5-3/19, 7-2/3/17, 7-6/7, 9-5/6, 15-7/10, 17-20/26, 20-5/10, 21-1/15, 21-40/41, 22-21/26, 23-11/12, 25-9/17, 25-4/7/12, 25-10/12, 26-1/2, 27-7/8, 27-18/20/21, 28-1/8, 29-9/11, 29-10/17, 30-14/15, 31-5/6, 38-11/17, 38-13/20, 38-15/19, 40-12/18/19, 40-17/19, 42-3/4/7, 44-8/14, 50-3/12, 50-11/27, 52-2/6/9, 52-3/8, 57-6/7, 57-10/27, 57-17/28, 58-6/12, 62-5/19, 65-4/7, 65-11/16, 66-22/34  
Pike: Adjust Matamoras  
Potter: Adjust Galeton, Sharon  
Schuylkill: Adjust Coaldale, N Manheim, Norwegian, Porter, Pottsville
Somerset: Align New Centerville with municipal boundary; Adjust Conemaugh, Jefferson, Middlecreek, Paint, Somerset Boro  
Susquehanna: Adjust Montrose; Lanesboro, Susquehanna Depot  
Tioga: Adjust Delmar, Wellsboro  
Union: Adjust Buffalo, White Deer  
Venango: Adjust Franklin, Sugarcreek, Cornplanter, Oil City  
Warren: Adjust Conewango  
Washington: Align Allenport, Beallsville, Burgettstown, Canonsburg, Carroll, Charleroi, Claysville, Elco, Finleyville, Houston, Long Branch, McDonald, Monongahela, Speers, Twilight with municipal boundaries; Adjust Amwell, Bentleyville, California, Canonsburg, Canton, Cecil, Centerville, Chartiers, Donegal, Donora, Fallowfield, Hanover, Independence, Mount Pleasant, N Franklin, N Strabane, Peters, Robinson, Smith, Somerset, S Franklin, S Strabane, Union Washington, W Brownsville  
Wayne: Adjust Honesdale  
Westmoreland: Merge CD splits for Unity Pleasant Unity; Align Greensburg with municipal boundary; Adjust Allegheny, Arnold, Bell, Derry, E Huntingdon, Fairfield, Greensburg W1-W8, Hempfield, Jeannette, Latrobe, Ligonier, Lower Burrell, Monessen, Mount Pleasant, Murraysville, New Kensington, N Belle Vernon, N Huntingdon, Penn, Rostraver, St. Clair, Scottdale, Sewickley, S Greensburg, S Huntingdon, Trafford, Upper Burrell, Unity, Vandergrift, Washington, Youngwood  
Wyoming: Adjust Falls  
York: Merge CD splits for York Twp 5-3; Align E Prospect, Goldsboro, Jefferson, Manchester, Monaghan, Wellsville, York with municipal boundaries; Adjust Chanceford, Codorus, Conewago, Dover, Fairview, Hanover, Jackson, Lower Windsor, New Freedom, Newberry, N Codorus, Penn, Red Lion, Shrewsbury, Spring Garden, Springbettsbury, W Manchester, Windsor Boro, Windsor Twp, Wrightsville, York Twp, York W5, W6, W15  

## Load Election Results

In [None]:
fips_file = pd.read_csv("./raw-from-source/FIPS/US_FIPS_Codes.csv")
fips_file = fips_file[fips_file["State"]=="Pennsylvania"]
fips_file["FIPS County"]=fips_file["FIPS County"].astype(str)
fips_file["FIPS County"]=fips_file["FIPS County"].str.zfill(3)
fips_file["unique_ID"] =  "42" + fips_file["FIPS County"]
fips_codes = fips_file["unique_ID"].tolist()
print(fips_file["County Name"].unique())
pa_fips_dict = dict(zip(fips_file["County Name"],fips_file["FIPS County"]))

In [None]:
pa_election = pd.read_csv("./raw-from-source/Election_Results/openelections-data-pa-master/2018/20181106__pa__general__precinct.csv")
butler_election = pd.read_csv("./raw-from-source/Election_Results/openelections-data-pa-master/2018/counties/20181106__pa__general__butler__precinct.csv")
clearfield_election = pd.read_csv("./raw-from-source/Election_Results/openelections-data-pa-master/2018/counties/20181106__pa__general__clearfield__precinct.csv")
westmoreland_election = pd.read_csv("./raw-from-source/Election_Results/openelections-data-pa-master/2018/counties/20181106__pa__general__westmoreland__precinct.csv")

In [None]:
print(butler_election.columns)
print(clearfield_election.columns)
print(westmoreland_election.columns)
print(pa_election.columns)


In [None]:
#print(butler_election.head())
#print(clearfield_election.head())
#print(westmoreland_election.head())
pa_election = pa_election[["county","precinct","office","party","candidate","votes"]]
butler_election = butler_election[["county","precinct","office","party","candidate","votes"]]
clearfield_election = clearfield_election[["county","precinct","office","party","candidate","votes"]]
westmoreland_election = westmoreland_election[["county","precinct","office","party","candidate","votes"]]

#print(butler_election.head())
#print(clearfield_election.head())
#print(westmoreland_election.head())

other_list = [butler_election,clearfield_election,westmoreland_election]
other_df = pd.concat(other_list)
print(other_df.head())
print(other_df["county"].unique())

In [None]:
print(pa_election["county"].unique())

lone_counties = ["Butler","Clearfield","Westmoreland"]

pa_election = pa_election[~pa_election["county"].isin(lone_counties)]
print(pa_election["county"].unique())

two_dfs_list = [other_df,pa_election]

pa_election = pd.concat(two_dfs_list)

In [None]:
print(pa_election["county"].unique())

Do not include the "Straight Party" votes

In [None]:
#The full file does not include the Governors results from Butler
#Clearfield does not include Senate results
#Westmoreland does not include governor results

In [None]:
#pa_election[pa_election["county"]=="Westmoreland"].to_csv("./westmoreland.csv")
#pa_election[pa_election["county"]=="Butler"].to_csv("./butler.csv")
#pa_election[pa_election["county"]=="Clearfield"].to_csv("./clearfield.csv")

In [None]:
office_changes = {'GOVERNOR':'Governor','United States Senator':"U.S. Senate"}
pa_election["office"]=pa_election["office"].map(office_changes).fillna(pa_election["office"])

In [None]:
office_list = ["U.S. Senate", 'Governor']
filtered_pa_election = pa_election[pa_election["office"].isin(office_list)]
county_changes_dict = {"Washington ":"Washington"}
filtered_pa_election["county"] = filtered_pa_election["county"].map(county_changes_dict).fillna(filtered_pa_election["county"])
filtered_pa_election["County_FIPS"]=filtered_pa_election.loc[:,"county"].map(pa_fips_dict).fillna(filtered_pa_election.loc[:,"county"])

In [None]:
filtered_pa_election["pivot_col"]=filtered_pa_election["County_FIPS"]+filtered_pa_election["precinct"]
filtered_pa_election["candidate"]=filtered_pa_election["candidate"].str.upper()
filtered_pa_election["candidate"] = filtered_pa_election["candidate"].str.strip()
filtered_pa_election["party"] = filtered_pa_election["party"].str.upper()


print(filtered_pa_election["party"].unique())

party_changes_dict = {"DEMOCRATIC":"DEM","REPUBLICAN":"REP","LIBERTARIAN":"LIB","GREEN":"GRN",
                     "GR":"GRN","GRE":"GRN","DEMOCRAT":"DEM"}

filtered_pa_election["party"] = filtered_pa_election["party"].map(party_changes_dict).fillna(filtered_pa_election["party"])


In [None]:
filtered_pa_election = filtered_pa_election[~(filtered_pa_election["candidate"].str[-3:]=="(W)")]

In [None]:
candidate_name_changes = {
   'DEMOCRATIC':'DEM', 
 'REPUBLICAN':"REP",
 'GREEN':"GRN", 
 'INDEPENDENT':"IND", 
 'LIBERTARIAN':"LIB",
    
    
    'LOU BARLETTA':'BARLETTA',
 'LOU  BARLETTA':'BARLETTA',
 'LOU BARLETTA JR':'BARLETTA',
 'BARLETTA, LOU':'BARLETTA',

    'KEN V KRAWCHUK, GOVERNOR':'KRAWCHUK',
    'KEN V. KRAWCHUK/K.S. SMITH':'KRAWCHUK',
    'KRAWCHUK /SMITH':'KRAWCHUK',
    'KEN V. KRAWCHUK KATHLEEN S. SMITH':'KRAWCHUK',
    'KRAWCHUK\\SMITH':'KRAWCHUK',
     'KEN V KRAWCHUK':'KRAWCHUK', 
 'KRAWCHUK / SMITH':'KRAWCHUK',
 'KRAWCHUK/SMITH':'KRAWCHUK',
 'KEN V. KRAWCHUK/K. S. SMITH':'KRAWCHUK',
 'KEN KRAWCHUK':'KRAWCHUK',
 'KRAWCHUK/ SMITH':'KRAWCHUK',
 'KEN V. KRAWCHUK':'KRAWCHUK',
 'KRAWCHUK, KEN V.':'KRAWCHUK',
 'KEN V. KRAWCHUK / K. S. SMITH':'KRAWCHUK',
    
    'GLOVER / BOSTICK':'GLOVER',
    'PAUL GLOVER, GOVERNOR':'GLOVER',
    'GLOVER / BOWSER BOSTICK':'GLOVER',
    'PAUL GLOVER/J. BOWSER-BOSTICK':'GLOVER',
    'GLOVER/BOSTICK':'GLOVER',
    'GLOVER/BOWSER-BOSTIC':'GLOVER',
    'PAUL GLOVER JOCOLYN BOWSER-BOSTICK':'GLOVER',
    'GLOVER/BOWSERBOS':'GLOVER',
    'GLOVER\\BOWSERBOSTICK':'GLOVER',
     'GLOVER / BOWSER-BOSTICK':'GLOVER', 
 'GLOVER/BOWSER-BOSTICK':'GLOVER',
 'GLOVER/BOWSER-BOS':'GLOVER', 
 'PAUL GLOVER/JOCOLYN BOWER-BOSTICK':'GLOVER', 
 'PAUL  GLOVER':'GLOVER',
 'GLOVER / BOWSER-BOS':'GLOVER',
 'PAUL GLOVER':'GLOVER',
 'GLOVER, PAUL':'GLOVER',
 'PAUL GLOVER / J. BOWSER BOSTICK':'GLOVER',
    
    'SCOTT R WAGNER, GOVERNOR':'WAGNER',
    'SCOTT R. WAGNER JEFF BARTOS':'WAGNER',
    'WAGNER\\BARTOS':'WAGNER',
     'SCOTT R WAGNER':'WAGNER', 
    'WAGNER/BARTOS':'WAGNER',
 'WAGNER / BARTOS':'WAGNER',
  'SCOTT R. WAGNER/JEFF BARTOS':'WAGNER',
 'WAGNER/ BARTOS':'WAGNER',
 'SCOTT R WAGNER AND JEFF BARTOS':'WAGNER',
 'SCOTT WAGNER':'WAGNER',
 'SCOTT R. WAGNER':'WAGNER',
 'WAGNER, SCOTT R.':'WAGNER',
 'SCOTT R. WAGNER / JEFF BARTOS':'WAGNER',
    
    'TOM WOLF, GOVERNOR':'WOLF',
    'TOM WOLF JOHN FETTERMAN':'WOLF',
    'WOLF\\FETTERMAN':'WOLF',
     'WOLF / FETTERMAN':'WOLF',
 'WOLF/FETTERMAN':'WOLF',
 'TOM WOLF/JOHN FETTERMAN':'WOLF', 
 'TOM  WOLF':'WOLF',
 'TOM WOLF AND JOHN FETTERMAN':'WOLF',
 'TOM WOLF':'WOLF',
 'WOLF, TOM':'WOLF',
 'TOM WOLF / JOHN FETTERMAN':'WOLF',
    
    'DALE KERNS':"KERNS",
    'DALE R KEARNS, JR':"KERNS",
 'DALE R KERNS, JR':"KERNS",
 'DALE R. KERNS JR.':"KERNS",
  'DALE KERNS JR':"KERNS", 
 'DALE R. KERNS, JR':"KERNS",
 'DALE R. KERNS, JR.':"KERNS",
 'DALE R. KERNS JR':"KERNS", 
 'DALE R KERNS JR':"KERNS",
    'KERNS, JR., DALE R.':"KERNS",
    
    'ROBERT CASEY JR.':"CASEY",
     'BOB CASEY, JR':"CASEY",
 'BOB CASEY JR.':"CASEY",
 'BOB  CASEY, JR.':"CASEY",
 'BOB CASEY':"CASEY",
 'CASEY, JR., BOB':"CASEY",
 'BOB CASEY, JR.':"CASEY", 
 'BOB CASEY JR':"CASEY", 
    
    'NEAL GALE':"GALE",
 'NEAL  GALE':"GALE",
 'GALE, NEAL':"GALE",
 'NEALE GALE':"GALE"}

filtered_pa_election["candidate"] = filtered_pa_election["candidate"].map(candidate_name_changes).fillna(filtered_pa_election["candidate"])

In [None]:
candidates_to_remove = ["NO AFFILIATION",'WRITE - IN','BLANK VOTES',
                      'WRITE-INS','WRITE IN','CAST VOTES','OVER VOTES',
                     'UNDER VOTES','WRITE IN VOTES','WRITE-IN VOTES']

parties_to_remove = ["NAF","IND"]

In [None]:
filtered_pa_election["votes"]=filtered_pa_election["votes"].fillna(0)
filtered_pa_election = filtered_pa_election[~(filtered_pa_election["candidate"].isin(candidates_to_remove))]
filtered_pa_election = filtered_pa_election[~(filtered_pa_election["party"].isin(parties_to_remove))]
filtered_pa_election["party"] = filtered_pa_election["party"].fillna(filtered_pa_election["candidate"])
filtered_pa_election["candidate"] = filtered_pa_election["candidate"].fillna(filtered_pa_election["party"])
filtered_pa_election["votes"]=filtered_pa_election["votes"].astype(str)
filtered_pa_election["votes"]=filtered_pa_election["votes"].str.replace(',', '').astype(int)
filtered_pa_election.loc[:,"votes"]=filtered_pa_election.loc[:,"votes"].astype(int)
#Delete the duplicates b/c of Delaware
filtered_pa_election = filtered_pa_election.drop_duplicates()
filtered_pa_election["cand_col"]=filtered_pa_election["office"]+filtered_pa_election["candidate"]

In [None]:
print(filtered_pa_election["cand_col"].unique())

#### Pivot election results

In [None]:
pivoted_2018 = pd.pivot_table(filtered_pa_election, values=["votes"], index=["pivot_col"],columns=["cand_col"],aggfunc=sum)

#### Clean pivoted file

In [None]:
pivoted_2018.reset_index(drop=False,inplace=True)
pivoted_2018.columns = pivoted_2018.columns.droplevel(0)
pivoted_2018.columns=["cty_pct","G18GOVGGLO","G18GOVLKRA","G18GOVRWAG","G18GOVDWOL","G18USSRBAR","G18USSDCAS","G18USSGGAL","G18USSLKER"]
pivoted_2018 = pivoted_2018.fillna(0)
pivoted_2018["county"]=pivoted_2018["cty_pct"].str[0:3]
pivoted_2018 = pivoted_2018[~(pivoted_2018["cty_pct"].str[3:]=="Total")]

#### Check Election Result Totals

In [None]:
col_list=["G18GOVGGLO","G18GOVLKRA","G18GOVRWAG","G18GOVDWOL","G18USSRBAR","G18USSDCAS","G18USSGGAL","G18USSLKER"]
for i in col_list:
    print(i)
    print(sum(vest_pa_18[i]))
    print(sum(pivoted_2018[i]))
    print("")
    


In [None]:
diff_counties = []

for i in col_list:
    diff = pivoted_2018.groupby(["county"]).sum()[i]-vest_pa_18.groupby(["COUNTYFP"]).sum()[i]
    print(i)
    for val in diff[diff != 0].index.values.tolist():
        if val not in diff_counties:
            diff_counties.append(val)
    print(diff[diff != 0])
    print("")

#print(diff_counties)
fips_name_dict=dict(zip(fips_file["FIPS County"],fips_file["County Name"]))
#print(fips_name_dict)
for i in diff_counties:
    print(fips_name_dict.get(i))

### Join Election Results w/ VEST Final to Validate

In [None]:
print(vest_pa_18.shape)
print(pivoted_2018.shape)

In [None]:
print(pivoted_2018.head())

In [None]:
print(vest_pa_18.head())


#Based on the print, it seems like the precinct name should be capitalized
pivoted_2018["cty_pct"] = pivoted_2018["cty_pct"].str.upper()

#Two options for the unique ID, use the second one
print((vest_pa_18["COUNTYFP"]+vest_pa_18["NAME"]).nunique())
print((vest_pa_18["COUNTYFP"]+vest_pa_18["VTDST"]+vest_pa_18["NAME"]).nunique())

#Create a unique ID out of election results to join on
vest_pa_18["unique_vote_id"]=vest_pa_18["G18USSDCAS"].astype(str)+vest_pa_18["G18USSRBAR"].astype(str)+vest_pa_18["G18USSLKER"].astype(str)+vest_pa_18["G18USSGGAL"].astype(str)+vest_pa_18["G18GOVDWOL"].astype(str)+vest_pa_18["G18GOVRWAG"].astype(str)

vest_pa_18["join_col_vest"]=vest_pa_18["COUNTYFP"]+vest_pa_18["VTDST"]+vest_pa_18["NAME"]

#Reformat the columns in the source election results so they are all integers
for i in col_list:
    pivoted_2018.loc[:,i]=pivoted_2018.loc[:,i].astype(int)

pivoted_2018["unique_vote_id"]=pivoted_2018["G18USSDCAS"].astype(str)+pivoted_2018["G18USSRBAR"].astype(str)+pivoted_2018["G18USSLKER"].astype(str)+pivoted_2018["G18USSGGAL"].astype(str)+pivoted_2018["G18GOVDWOL"].astype(str)+pivoted_2018["G18GOVRWAG"].astype(str)

#Remove the duplicate voter ID columns
dup_list = ["000000","2591002611"]
joined_vest_pa_18 = vest_pa_18[~(vest_pa_18["unique_vote_id"].isin(dup_list))]

#Remove the duplicate voter ID columns
joined_pivoted_2018 = pivoted_2018[~(pivoted_2018["unique_vote_id"].isin(dup_list))]

vote_name_hack = pd.merge(joined_vest_pa_18,joined_pivoted_2018,how="outer",on="unique_vote_id",indicator=True)


In [None]:
print(vote_name_hack["_merge"].value_counts())

In [None]:
print(vote_name_hack["join_col_vest"])

In [None]:
name_quick = vote_name_hack[vote_name_hack["_merge"]=="both"][["cty_pct","join_col_vest"]]

In [None]:
print(name_quick.head())
print(name_quick["join_col_vest"].value_counts())
print(name_quick["cty_pct"].value_counts())

In [None]:
rename_dict = dict(zip(name_quick["cty_pct"],name_quick["join_col_vest"]))
pivoted_2018["cty_pct"]=pivoted_2018["cty_pct"].map(rename_dict).fillna(pivoted_2018["cty_pct"])

In [None]:
print(pivoted_2018.columns)
data_columns = ['G18GOVGGLO', 'G18GOVLKRA', 'G18GOVRWAG', 'G18GOVDWOL',
       'G18USSRBAR', 'G18USSDCAS', 'G18USSGGAL', 'G18USSLKER']

for i in data_columns:
    pivoted_2018.loc[pivoted_2018["cty_pct"]=="045SPRINGFIELD 2W 3P",i]+=int(pivoted_2018.loc[pivoted_2018["cty_pct"]=="045SPRINGFIELD 2W 3P EMERG",i]) 
    pivoted_2018.loc[pivoted_2018["cty_pct"]=="045UPPER DARBY 6D 11P",i]+=int(pivoted_2018.loc[pivoted_2018["cty_pct"]=="045UPPER DARBY 6D 11P EMERG",i])
    pivoted_2018.loc[pivoted_2018["cty_pct"]=="045UPPER DARBY 6D 8P",i]+=int(pivoted_2018.loc[pivoted_2018["cty_pct"]=="045UPPER DARBY 6D 8P EMERG",i])
    pivoted_2018.loc[pivoted_2018["cty_pct"]=="095LOWER MT BETHEL TWP LOWER",i]+=int(pivoted_2018.loc[pivoted_2018["cty_pct"]=="095LOWER MT BETHEL TWP LOWER - IND",i])
    pivoted_2018.loc[pivoted_2018["cty_pct"]=="095PALMER TWP UPPER WESTERN",i]+=int(pivoted_2018.loc[pivoted_2018["cty_pct"]=="095PALMER TWP UPPER WESTERN - NAZ IND",i])

remove_list = ["045SPRINGFIELD 2W 3P EMERG","045UPPER DARBY 6D 11P EMERG","045UPPER DARBY 6D 8P EMERG",
              "095LOWER MT BETHEL TWP LOWER - IND","095PALMER TWP UPPER WESTERN - NAZ IND"]

pivoted_2018 = pivoted_2018[~(pivoted_2018["cty_pct"].isin(remove_list))]


#Create two empty districts



In [None]:
elections_to_vest_changes = pd.read_csv("./elections_to_vest_changes.csv")
elections_to_vest_changes = dict(zip(elections_to_vest_changes["elections_name"],elections_to_vest_changes["vest_name"]))
pivoted_2018["cty_pct"] = pivoted_2018["cty_pct"].map(elections_to_vest_changes).fillna(pivoted_2018["cty_pct"])


In [None]:
vest_pa_18.rename(columns={"join_col_vest":"cty_pct"},inplace=True)
election_confirm = pd.merge(pivoted_2018,vest_pa_18,how="outer",on="cty_pct",indicator=True)

print(election_confirm["_merge"].value_counts())
right_only = election_confirm[election_confirm["_merge"]=="right_only"]
left_only = election_confirm[election_confirm["_merge"]=="left_only"]
right_only.to_csv("./vest.csv")
left_only.to_csv("./elections.csv")

In [None]:
print(right_only.head())
print(left_only.head())

In [None]:
print(pivoted_2018.head())

In [None]:
def validater_row (df, column_List):
    matching_rows = 0
    different_rows = 0
    diff_list=[]
    diff_values = []
    max_diff = 0
    
    for j in range(0,len(df.index)):
        same = True
        for i in column_List:
            left_Data = i + "_x"
            right_Data = i + "_y"
            diff = abs(df.iloc[j][left_Data]-df.iloc[j][right_Data])
            if(diff != 0):
                #print(df.iloc[j]['countypct'])
                #print(i)
                diff_values.append(abs(diff))
                same = False
                if(np.isnan(diff)):
                    print("NaN value at diff is: ", df.iloc[j]['cty_pct'])
                    print(df.iloc[j][left_Data])
                    print(df.iloc[j][right_Data])
                if (diff>max_diff):
                    max_diff = diff
                    print("New max diff is: ", str(max_diff))
                    print(df.iloc[j]['cty_pct'])
        if(same != True):
            different_rows +=1
            diff_list.append(df.iloc[j]['cty_pct'])
        else:
            matching_rows +=1
    print("There are ", len(df.index)," total rows")
    print(different_rows," of these rows have election result differences")
    print(matching_rows," of these rows are the same")
    print("")
    print("The max difference between any one shared column in a row is: ", max_diff)
    if(len(diff_values)!=0):
        print("The average difference is: ", str(sum(diff_values)/len(diff_values)))
    count_big_diff = len([i for i in diff_values if i > 10])
    print("There are ", str(count_big_diff), "precinct results with a difference greater than 10")
    diff_list.sort()
    print(diff_list)
    election_confirm[election_confirm["_merge"]=="both"][election_confirm[election_confirm["_merge"]=="both"]['cty_pct'].isin(diff_list)].to_csv("./pa_election_validation.csv")

In [None]:
print(vest_pa_18.shape)

In [None]:
diff_list = validater_row(election_confirm[election_confirm["_merge"]=="both"],data_columns)

# Load in shapefiles

Precinct shapefiles primarily from the U.S. Census Bureau's 2020 Redistricting Data Program Phase 2 release. The shapefiles from Delaware County and the City of Pittsburgh are from the respective jurisdictions instead. Precinct numbers were corrected to match the voter file in the following locales: Allegheny (Elizabeth, Pittsburgh W12), Blair (Greenfield), Bradford (Athens), Greene (Nonongahela), Monroe (Smithfield), Montgomery (Hatfield), Northampton (Bethlehem Twp), Perry (Toboyne), Washington (New Eagle, Somerset), York (Fairview).

In [None]:
#Combine all the data from separate files into one
li = []
for i in fips_codes:
    ref = "./raw-from-source/Census/partnership_shapefiles_19v2_"
    file_ref = ref+i+"/PVS_19_v2_vtd_"+i+".shp"
    file_prev = gp.read_file(file_ref)
    #print(file_prev.shape)
    li.append(file_prev)
global shapefiles_census
shapefiles_census = pd.concat(li, axis=0, ignore_index=True)

In [None]:
shapefiles_census = shapefiles_census[["COUNTYFP","VTDST","NAME","geometry"]]

In [None]:
shapefiles_census["cty_pct"] = shapefiles_census["COUNTYFP"]+shapefiles_census["VTDST"]+shapefiles_census["NAME"]
shapefiles_census["shp_join"] = shapefiles_census["COUNTYFP"]+shapefiles_census["VTDST"]
print(shapefiles_census.shape)
print(shapefiles_census["shp_join"].nunique())

### Look into differences for Allegheny

In [None]:
allegheny = gp.read_file("./raw-from-source/Allegheny/Allegheny_County_Voting_District_Boundaries-shp/Allegheny_County_Voting_District_Boundaries.shp")

census_pitt = shapefiles_census[(shapefiles_census["COUNTYFP"]=="003") & (shapefiles_census["NAME"].str.contains("PITTSBURGH"))]
census_pitt.plot()

county_pitt = allegheny[allegheny["LABEL"]=="Pittsburgh"]
county_pitt = county_pitt.to_crs(census_pitt.crs)
county_pitt.plot()

vest_pitt = vest_pa_18[vest_pa_18["NAME"].str.contains("PITTSBURGH")]
vest_pitt.plot()


gp.overlay(census_pitt,vest_pitt,how="symmetric_difference").plot()
gp.overlay(county_pitt,vest_pitt,how="symmetric_difference").plot()

In [None]:
pivoted_2018["shp_join"]=pivoted_2018["cty_pct"].str[0:9]

def shp_combiner(new_name,merge_list):
    global shapefiles_census
    prec_holder = []
    countyfp = new_name[0:3]
    vtdst = new_name[3:9] 
    name = new_name[9:]
    for i in merge_list:
        #shapefiles_census.loc[shapefiles_census["cty_pct"]==i,:].plot()
        prec_holder.append(shapefiles_census.loc[shapefiles_census["cty_pct"]==i,:])
    new_geom = pd.concat(prec_holder, ignore_index=True).unary_union
    new_prec = gp.GeoDataFrame(geometry=[new_geom],columns=shapefiles_census.columns,crs = shapefiles_census.crs)
    new_prec["COUNTYFP"] = countyfp
    new_prec["VTDST"] = vtdst
    new_prec["NAME"] = name
    new_prec["cty_pct"] = countyfp+vtdst+name
    new_prec["shp_join"] = countyfp+vtdst
    shapefiles_census = shapefiles_census.append(new_prec)
    shapefiles_census = shapefiles_census[~(shapefiles_census["cty_pct"].isin(merge_list))]

In [None]:
shp_combiner('00300F397SOUTH FAYETTE DISTRICT 03',['00300F398SOUTH FAYETTE DISTRICT 03 (CD 17)','00300F399SOUTH FAYETTE DISTRICT 03 (CD 18)'])
shp_combiner('00300F408SOUTH FAYETTE DISTRICT 05',['00300F409SOUTH FAYETTE DISTRICT 05 (CD 17)','00300F412SOUTH FAYETTE DISTRICT 05 (CD 18)'])
shp_combiner('019000355CRANBERRY DISTRICT EAST DIVISION 02',['019000353CRANBERRY DISTRICT EAST DIVISION 02 (CD 16)','019000354CRANBERRY DISTRICT EAST DIVISION 02 (CD 17)'])
shp_combiner('019000357CRANBERRY DISTRICT EAST DISTRICT 03',['019000358CRANBERRY DISTRICT EAST DISTRICT 03 (CD 16)','019000359CRANBERRY DISTRICT EAST DISTRICT 03 (CD 17)'])
shp_combiner('019000361CRANBERRY DISTRICT WEST DIVISION 01',['019000362CRANBERRY DISTRICT WEST DIVISION 01 (CD 16)','019000363CRANBERRY DISTRICT WEST DIVISION 01 (CD 17)'])
shp_combiner('019000365CRANBERRY DISTRICT WEST DIVISION 02',['019000366CRANBERRY DISTRICT WEST DIVISION 02 (CD 16)','019000367CRANBERRY DISTRICT WEST DIVISION 02 (CD 17)'])
shp_combiner('019000545JEFFERSON DISTRICT I',['019000546JEFFERSON DISTRICT I (CD 15)','019000547JEFFERSON DISTRICT I (CD 16)'])
shp_combiner('019000555JEFFERSON DISTRICT II',['019000556JEFFERSON DISTRICT II (CD 15)','019000557JEFFERSON DISTRICT II (CD 16)'])
shp_combiner('027000210FERGUSON DISTRICT NORTHEAST DIVISION 01',['027000212FERGUSON DISTRICT NORTHEAST DIVISION 01 A','027000214FERGUSON DISTRICT NORTHEAST DIVISION 01 B'])
shp_combiner('027000262HALFMOON PRECINCT Proper',['027000261HALFMOON PRECINCT Proper (CD 12)','027000263HALFMOON PRECINCT Proper (CD 15)'])
shp_combiner('027000268HALFMOON PRECINCT East Central',['027000265HALFMOON PRECINCT East Central (CD 15)','027000264HALFMOON PRECINCT East Central (CD 12)'])
shp_combiner('029000915PHOENIXVILLE WARD MIDDLE PRECINCT 01',['029000917PHOENIXVILLE WARD MIDDLE PRECINCT 01 (HD 157)','029000919PHOENIXVILLE WARD MIDDLE PRECINCT 01 (HD 155)'])
shp_combiner('041000600NORTH MIDDLETON PRECINCT 01',['041000601NORTH MIDDLETON PRECINCT 01 (CD 10)','041000602NORTH MIDDLETON PRECINCT 01 (CD 13)'])
shp_combiner('041000620NORTH MIDDLETON PRECINCT 03',['041000621NORTH MIDDLETON PRECINCT 03 (CD 10)','041000622NORTH MIDDLETON PRECINCT 03 (CD 13)'])
shp_combiner('061000380PENN',['061000381PENN PRECINCT B (9th Cong)','061000379PENN PRECINCT A (5th Cong)'])
shp_combiner('079000805HAZLE DISTRICT 01',['079000806HAZLE DISTRICT 01 (CD 08)','079000807HAZLE DISTRICT 01 (CD 09)'])
shp_combiner('129002850UNITY DISTRICT PLEASANT UNITY',['129002851UNITY DISTRICT PLEASANT UNITY (CD 13)','129002852UNITY DISTRICT PLEASANT UNITY (CD 14)'])
shp_combiner('133001429YORK WARD 05 PRECINCT 03',['133001428YORK WARD 05 PRECINCT 03 (CD 11)','133001427YORK WARD 05 PRECINCT 03 (CD 10)'])

### Shapefile Adjustments

### Precinct Number Changes

Precinct numbers were corrected to match the voter file in the following locales: Allegheny (Elizabeth, Pittsburgh W12), Blair (Greenfield), Bradford (Athens), Greene (Nonongahela), Monroe (Smithfield), Montgomery (Hatfield), Northampton (Bethlehem Twp), Perry (Toboyne), Washington (New Eagle, Somerset), York (Fairview).

#### Allegheny Adjustments

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("ELIZABETH"))&(shapefiles_census["COUNTYFP"]=="003")].plot(column="NAME",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("ELIZABETH"))&(vest_pa_18["COUNTYFP"]=="003")].plot(column="NAME",legend=True,figsize=(15,15))

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("PITTSBURGH WARD 12"))&(shapefiles_census["COUNTYFP"]=="003")].plot(column="NAME",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("PITTSBURGH WARD 12"))&(vest_pa_18["COUNTYFP"]=="003")].plot(column="NAME",legend=True,figsize=(15,15))

#### Blair Changes

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("GREENFIELD"))&(shapefiles_census["COUNTYFP"]=="013")].plot(column="NAME",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("GREENFIELD"))&(vest_pa_18["COUNTYFP"]=="013")].plot(column="NAME",legend=True,figsize=(15,15))

In [None]:
#print(shapefiles_census.loc[shapefiles_census["NAME"]=="GREENFIELD DISTRICT 01"])
#print(shapefiles_census.loc[shapefiles_census["NAME"]=="GREENFIELD DISTRICT 02"])
shapefiles_census.loc[shapefiles_census["NAME"]=="GREENFIELD DISTRICT 01","shp_join"] = "013000590"
shapefiles_census.loc[shapefiles_census["NAME"]=="GREENFIELD DISTRICT 02","shp_join"] = "013000580"
#print(shapefiles_census.loc[shapefiles_census["NAME"]=="GREENFIELD DISTRICT 01"])
#print(shapefiles_census.loc[shapefiles_census["NAME"]=="GREENFIELD DISTRICT 02"])

#### Bradford Changes

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("ATHENS DISTRICT"))&(shapefiles_census["COUNTYFP"]=="015")].plot(column="NAME",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("ATHENS DISTRICT"))&(vest_pa_18["COUNTYFP"]=="015")].plot(column="NAME",legend=True,figsize=(15,15))

In [None]:
print(shapefiles_census.loc[shapefiles_census["NAME"]=="ATHENS DISTRICT 01"])
print(shapefiles_census.loc[shapefiles_census["NAME"]=="ATHENS DISTRICT 02"])
shapefiles_census.loc[shapefiles_census["NAME"]=="ATHENS DISTRICT 01","shp_join"] = "015000060"
shapefiles_census.loc[shapefiles_census["NAME"]=="ATHENS DISTRICT 02","shp_join"] = "015000050"

#### Greene County

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("MONONGAHELA"))&(shapefiles_census["COUNTYFP"]=="059")].plot(column="NAME",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("MONONGAHELA"))&(vest_pa_18["COUNTYFP"]=="059")].plot(column="NAME",legend=True,figsize=(15,15))

In [None]:
print(shapefiles_census.loc[shapefiles_census["NAME"]=="MONONGAHELA DISTRICT 01"])
print(shapefiles_census.loc[shapefiles_census["NAME"]=="MONONGAHELA DISTRICT 02"])
shapefiles_census.loc[shapefiles_census["NAME"]=="MONONGAHELA DISTRICT 01","shp_join"] = "059000295"
shapefiles_census.loc[shapefiles_census["NAME"]=="MONONGAHELA DISTRICT 02","shp_join"] = "059000280"

#### Monroe

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("SMITHFIELD"))&(shapefiles_census["COUNTYFP"]=="089")].plot(column="NAME",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("SMITHFIELD"))&(vest_pa_18["COUNTYFP"]=="089")].plot(column="NAME",legend=True,figsize=(15,15))

In [None]:
print(shapefiles_census.loc[shapefiles_census["NAME"]=="SMITHFIELD DISTRICT 01 (CD 07)","VTDST"])
print(vest_pa_18.loc[vest_pa_18["NAME"]=="SMITHFIELD DISTRICT 01","VTDST"])

print(shapefiles_census.loc[shapefiles_census["NAME"]=="SMITHFIELD DISTRICT 01 (CD 08)","VTDST"])
print(vest_pa_18.loc[vest_pa_18["NAME"]=="SMITHFIELD DISTRICT 04","VTDST"])

print(shapefiles_census.loc[shapefiles_census["NAME"]=="SMITHFIELD DISTRICT 02","VTDST"])
print(vest_pa_18.loc[vest_pa_18["NAME"]=="SMITHFIELD DISTRICT 02","VTDST"])

print(shapefiles_census.loc[shapefiles_census["NAME"]=="SMITHFIELD DISTRICT 03","VTDST"])
print(vest_pa_18.loc[vest_pa_18["NAME"]=="SMITHFIELD DISTRICT 03","VTDST"])

Precinct numbers were corrected to match the voter file in the following locales: Allegheny (Elizabeth, Pittsburgh W12), Blair (Greenfield), Bradford (Athens), Greene (Nonongahela), Monroe (Smithfield), Montgomery (Hatfield), Northampton (Bethlehem Twp), Perry (Toboyne), Washington (New Eagle, Somerset), York (Fairview).

#### Montgomery (Hatfield)

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("HATFIELD"))&(shapefiles_census["COUNTYFP"]=="091")].plot(column="VTDST",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("HATFIELD"))&(vest_pa_18["COUNTYFP"]=="091")].plot(column="VTDST",legend=True,figsize=(15,15))

In [None]:
print(shapefiles_census.loc[shapefiles_census["NAME"]=="HATFIELD DISTRICT 01 DISTRICT 01"])
print(shapefiles_census.loc[shapefiles_census["NAME"]=="HATFIELD DISTRICT 01 DISTRICT 02"])
print(shapefiles_census.loc[shapefiles_census["NAME"]=="HATFIELD DISTRICT 02 DISTRICT 01"])
print(shapefiles_census.loc[shapefiles_census["NAME"]=="HATFIELD DISTRICT 02 DISTRICT 02"])

shapefiles_census.loc[shapefiles_census["NAME"]=="HATFIELD DISTRICT 01 DISTRICT 01","shp_join"] = "091001002"
shapefiles_census.loc[shapefiles_census["NAME"]=="HATFIELD DISTRICT 01 DISTRICT 02","shp_join"] = "091001006"
shapefiles_census.loc[shapefiles_census["NAME"]=="HATFIELD DISTRICT 02 DISTRICT 01","shp_join"] = "091000996"
shapefiles_census.loc[shapefiles_census["NAME"]=="HATFIELD DISTRICT 02 DISTRICT 02","shp_join"] = "091000992"

#### Northampton (Bethlehem Twp)

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("BETHLEHEM"))&(shapefiles_census["COUNTYFP"]=="095")].plot(column="VTDST",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("BETHLEHEM"))&(vest_pa_18["COUNTYFP"]=="095")].plot(column="VTDST",legend=True,figsize=(15,15))

#### Perry (Toboyne)

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("TOBOYNE"))&(shapefiles_census["COUNTYFP"]=="099")].plot(column="VTDST",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("TOBOYNE"))&(vest_pa_18["COUNTYFP"]=="099")].plot(column="VTDST",legend=True,figsize=(15,15))

In [None]:
print(shapefiles_census.loc[shapefiles_census["NAME"]=="TOBOYNE DISTRICT 01"])
print(shapefiles_census.loc[shapefiles_census["NAME"]=="TOBOYNE DISTRICT 02"])

shapefiles_census.loc[shapefiles_census["NAME"]=="TOBOYNE DISTRICT 01","shp_join"] = "099000290"
shapefiles_census.loc[shapefiles_census["NAME"]=="TOBOYNE DISTRICT 02","shp_join"] = "099000280"

#### Washington (New Eagle, Somerset)

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("SOMERSET DISTRICT"))&(shapefiles_census["COUNTYFP"]=="125")].plot(column="VTDST",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("SOMERSET DISTRICT"))&(vest_pa_18["COUNTYFP"]=="125")].plot(column="VTDST",legend=True,figsize=(15,15))

In [None]:
print(shapefiles_census.loc[(shapefiles_census["NAME"]=="SOMERSET DISTRICT 01") & (shapefiles_census["COUNTYFP"]=="125")])
print(shapefiles_census.loc[(shapefiles_census["NAME"]=="SOMERSET DISTRICT 02") & (shapefiles_census["COUNTYFP"]=="125")])

shapefiles_census.loc[(shapefiles_census["NAME"]=="SOMERSET DISTRICT 01") & (shapefiles_census["COUNTYFP"]=="125"),"shp_join"] = "125001630"
shapefiles_census.loc[(shapefiles_census["NAME"]=="SOMERSET DISTRICT 02") & (shapefiles_census["COUNTYFP"]=="125"),"shp_join"] = "125001620"

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("NEW EAGLE"))&(shapefiles_census["COUNTYFP"]=="125")].plot(column="VTDST",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("NEW EAGLE"))&(vest_pa_18["COUNTYFP"]=="125")].plot(column="VTDST",legend=True,figsize=(15,15))

In [None]:
print(shapefiles_census.loc[shapefiles_census["NAME"]=="NEW EAGLE DISTRICT 01"])
print(shapefiles_census.loc[shapefiles_census["NAME"]=="NEW EAGLE DISTRICT 02"])

shapefiles_census.loc[shapefiles_census["NAME"]=="NEW EAGLE DISTRICT 01","shp_join"] = "125001240"
shapefiles_census.loc[shapefiles_census["NAME"]=="NEW EAGLE DISTRICT 02","shp_join"] = "125001230"

#### York (Fairview)

In [None]:
shapefiles_census[(shapefiles_census["NAME"].str.contains("FAIRVIEW DISTRICT"))&(shapefiles_census["COUNTYFP"]=="133")].plot(column="VTDST",legend=True,figsize=(15,15))
vest_pa_18[(vest_pa_18["NAME"].str.contains("FAIRVIEW DISTRICT"))&(vest_pa_18["COUNTYFP"]=="133")].plot(column="VTDST",legend=True,figsize=(15,15))

In [None]:
print(shapefiles_census.loc[(shapefiles_census["VTDST"]=="000220") & (shapefiles_census["COUNTYFP"]=="133")])
print(shapefiles_census.loc[(shapefiles_census["VTDST"]=="000235") & (shapefiles_census["COUNTYFP"]=="133")])
print(shapefiles_census.loc[(shapefiles_census["VTDST"]=="000240") & (shapefiles_census["COUNTYFP"]=="133")])
print(shapefiles_census.loc[(shapefiles_census["VTDST"]=="000245") & (shapefiles_census["COUNTYFP"]=="133")])

shapefiles_census.loc[(shapefiles_census["VTDST"]=="000220") & (shapefiles_census["COUNTYFP"]=="133"),"shp_join"] = "133000235"
shapefiles_census.loc[(shapefiles_census["VTDST"]=="000235") & (shapefiles_census["COUNTYFP"]=="133"),"shp_join"] = "133000240"
shapefiles_census.loc[(shapefiles_census["VTDST"]=="000240") & (shapefiles_census["COUNTYFP"]=="133"),"shp_join"] = "133000245"
shapefiles_census.loc[(shapefiles_census["VTDST"]=="000245") & (shapefiles_census["COUNTYFP"]=="133"),"shp_join"] = "133000220"

In [None]:
merge_two = pd.merge(pivoted_2018,shapefiles_census,how="outer",on="shp_join",indicator=True)

print(merge_two["_merge"].value_counts())
right_only = merge_two[merge_two["_merge"]=="right_only"]
left_only = merge_two[merge_two["_merge"]=="left_only"]
print(right_only["shp_join"])
print(left_only["shp_join"])

In [None]:
print(merge_two[merge_two["shp_join"]=="013000590"])

In [None]:
merge_two.drop(['cty_pct_y'], axis=1,inplace=True)
merge_two.rename(columns={"cty_pct_x":"cty_pct"},inplace=True)
merge_two = merge_two[['COUNTYFP','VTDST', 'NAME','G18USSDCAS','G18USSRBAR',
                      'G18USSLKER','G18USSGGAL','G18GOVDWOL','G18GOVRWAG','G18GOVLKRA',
                      'G18GOVGGLO','geometry','cty_pct','_merge']]

In [None]:
merge_two.loc[:,"NAME"]=merge_two.loc[:,"cty_pct"].str[9:]

### Final Join

In [None]:
final_merge = pd.merge(merge_two[merge_two["_merge"]=="both"],vest_pa_18,how="outer",on="cty_pct",indicator="final_merge")

In [None]:
final_merge.columns

In [None]:
final_merge["final_merge"].value_counts()

In [None]:
print(final_merge[final_merge["final_merge"]=="right_only"]["cty_pct"])

### Join Merged Sourcefile to VEST

In [None]:
both = final_merge[final_merge["final_merge"]=="both"]
source_geoms = gp.GeoDataFrame(both,geometry="geometry_x",crs=vest_pa_18.crs)
vest_geoms = gp.GeoDataFrame(both,geometry="geometry_y",crs=vest_pa_18.crs)
source_geoms = source_geoms.to_crs(3857)
vest_geoms = vest_geoms.to_crs(3857)
source_geoms["geometry_x"]=source_geoms.buffer(0)
vest_geoms["geometry_y"]=vest_geoms.buffer(0)
vals = source_geoms.geom_almost_equals(vest_geoms,decimal=0)
print(vals.value_counts())

In [None]:
print(vals.value_counts())
print(source_geoms[~vals].shape)
print(vest_geoms[~vals].shape)

In [None]:
type(source_geoms[~vals])


In [None]:
print(vals[vals==False].index)

In [None]:
len(source_geoms[~vals])

In [None]:
count = 0
area_list = []
for i in range(0,len(source_geoms)):
    diff = source_geoms.iloc[[i]].symmetric_difference(vest_geoms.iloc[[i]])
    intersection = source_geoms.iloc[[i]].intersection(vest_geoms.iloc[[i]])
    area = float(diff.area/10e6)
    area_list.append(area)
    #print("Area is " + str(area))
    if (False):
        if (area > 1):
            count += 1
            name = source_geoms.iat[i,12]
            print(str(count)+") For " + name + " difference in area is " + str(area))
            if (intersection.iloc[0].is_empty):
                base = diff.plot(color="red")
                source_geoms.iloc[[i]].plot(color="orange",ax=base)
                vest_geoms.iloc[[i]].plot(color="blue",ax=base)
                base.set_title(name)
            else:
                base = diff.plot(color="red")
                source_geoms.iloc[[i]].plot(color="orange",ax=base)
                vest_geoms.iloc[[i]].plot(color="blue",ax=base)
                intersection.plot(color="green",ax=base)
                base.set_title(name)

In [None]:
df = pd.DataFrame(area_list)
print(df.shape)

print(str(len(df[df[0]==0]))+" precincts w/ a difference of 0 km^2")
print(str(len(df[(df[0]<.1) & (df[0]>0)]))+ " precincts w/ a difference between 0 and .1 km^2")
print(str(len(df[(df[0]<.5) & (df[0]>=.1)]))+ " precincts w/ a difference between .1 and .5 km^2")
print(str(len(df[(df[0]<1) & (df[0]>=.5)]))+ " precincts w/ a difference between .5 and 1 km^2")
print(str(len(df[(df[0]<2) & (df[0]>=1)]))+ " precincts w/ a difference between 1 and 2 km^2")
print(str(len(df[(df[0]>=2)]))+ " precincts w/ a difference greater than 2 km^2")

In [None]:
count = 0
for i in range(0,len(source_geoms[~vals])):
    diff = source_geoms[~vals].iloc[[i]].symmetric_difference(vest_geoms[~vals].iloc[[i]])
    intersection = source_geoms[~vals].iloc[[i]].intersection(vest_geoms[~vals].iloc[[i]])
    area = float(diff.area/10e6)
    #print("Area is " + str(area))
    if (area > .5):
        count += 1
        name = source_geoms[~vals].iat[i,12]
        print(str(count)+") For " + name + " difference in area is " + str(area))
        if (intersection.iloc[0].is_empty):
            base = diff.plot(color="red")
            source_geoms[~vals].iloc[[i]].plot(color="orange",ax=base)
            vest_geoms[~vals].iloc[[i]].plot(color="blue",ax=base)
            base.set_title(name)
        else:
            base = diff.plot(color="red")
            source_geoms[~vals].iloc[[i]].plot(color="orange",ax=base)
            vest_geoms[~vals].iloc[[i]].plot(color="blue",ax=base)
            intersection.plot(color="green",ax=base)
            base.set_title(name)