In [None]:
import matplotlib.pyplot as plt # for plotting maps
import maup # mggg's library for proration, see documentation here: https://github.com/mggg/maup
import pandas as pd # standard python data library
import geopandas as gp # the geo-version of pandas
import numpy as np 
from statistics import mean, median
from functools import reduce

**Load Election Results**

Importing the election results from the TX website (currently stored locally)
All of these files have 9082 rows

In [31]:
tx_governor_2014 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/governor_2014.csv')
tx_president_2012 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/president_2012.csv')
tx_president_2016 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/president_2016.csv')
tx_ussen_2012 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/ussen_2012.csv')
tx_ussen_2014 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/ussen_2014.csv')

**Load Voter Data Files**

Importing the voter data files from the TX webbsite (currently stored locally). All of these files have 9082 rows

In [84]:
voter_data_2012 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/voter data/2012_voter_data.csv')
voter_data_2014 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/voter data/2014_voter_data.csv')
voter_data_2016 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/voter data/2016_voter_data.csv')

**Load VTD Files**

Importing the various VTD data from the TX website (currently stored locally)
There is a different VTD file for each of the 3 years of election results
It looks like they are using demographic data from the 2012 file and the 2016 file has the same # of rows

In [85]:
vtd_2012 = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/vtds/vtd12g/VTDs.shp')
vtd_2014 = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/vtds/vtd14g/2014G_VTD.shp')
vtd_2016 = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/vtds/vtd16g/vtd16g.shp')
vtd_2018 = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/vtds/vtd18g/vtd18g.shp')

#2012 has demo data and 8,952 rows
#2016 has no demo data but 8,941 rows same as


**Try other files**

In [105]:
try_2=gp.read_file('/Users/peterhorton/Downloads/ftp_election_data_16g/2012_General_Election_VRTO.csv')
try_2=gp.read_file('/Users/peterhorton/Downloads/ftp_election_data_16g/2012_General_Election_Returns.csv')
new_vtd=gp.read_file('/Users/peterhorton/Downloads/vtd16g/vtd16g.shp')
print(try_2.count())
print(try_2.head())
print(new_vtd.head())

County       359786
FIPS         359786
VTD          359786
cntyvtd      359786
Office       359786
Name         359786
Party        359786
Incumbent    359786
Votes        359786
geometry          0
dtype: int64
     County FIPS   VTD cntyvtd     Office    Name Party Incumbent Votes  \
0  Anderson    1  0001   10001  President  Romney     R         N   754   
1  Anderson    1  0002   10002  President  Romney     R         N  1179   
2  Anderson    1  0003   10003  President  Romney     R         N   650   
3  Anderson    1  0004   10004  President  Romney     R         N   729   
4  Anderson    1  0005   10005  President  Romney     R         N   174   

  geometry  
0     None  
1     None  
2     None  
3     None  
4     None  
   CNTY  COLOR   VTD  CNTYVTD  VTDKEY    Shape_area      Shape_len  \
0    53      2  0008   530008       0  6.717692e+07   55772.006345   
1   469      2  0024  4690024       0  5.426296e+07   50831.107001   
2   421      6  0302  4210302       0  4.890835e

**Load US House District, TX Sen District, and TX House District**


In [9]:
us_house = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/house_sen/tl_2016_us_cd115/tl_2016_us_cd115.shp')
tx_sen = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/house_sen/PLANS172/PLANS172.shp')
tx_house = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/house_sen/PLANH407/PLANH407.shp')

**Load PGP File**

I first want to check if these two files are different
Their main output file has 8,941 rows

In [63]:
tx_pgp_output = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/output/TX_vtds.shp')

In [66]:
print(tx_pgp_output[tx_pgp_output["CNTYVTD"] == "507004A"])
full_merge = pd.merge(tx_pgp_output, vtd_2016, on='CNTYVTD', how='outer', indicator=True)
print(full_merge._merge.describe())
right_only = full_merge[full_merge["_merge"] == "right_only"]
print(right_only['CNTYVTD'])
left_only = full_merge[full_merge["_merge"] == "left_only"]
print(left_only['CNTYVTD'])

      CNTYVTD   VTD  WHITE  OTHER  HISPANIC  TOTPOP    VAP  BLACK  BLKHISP  \
8275  507004A  004A   51.0    2.0     995.0  1048.0  729.0    6.0    995.0   

      WVAP  ...  PRES16D  PRES16R  TOTVR16  TOTTO16 USCD  SEND  HD  AREA  \
8275  37.0  ...      237       67      826      308   23    19  80   554   

      PERIM                                           geometry  
8275    113  POLYGON ((992197.398 743346.287, 992347.472 74...  

[1 rows x 38 columns]
count     8941
unique       1
top       both
freq      8941
Name: _merge, dtype: object
Series([], Name: CNTYVTD, dtype: object)
Series([], Name: CNTYVTD, dtype: object)


**Clean Election and Voter Data Files**

Changing column names and removing candidates from outside the two main parties

In [34]:
tx_governor_2014.drop(tx_governor_2014.columns[[3,4,5]],1,inplace=True)
tx_governor_2014.rename(columns={'AbbottR_14G_Governor':'GOV14R','DavisD_14G_Governor':'GOV14D'},inplace=True)

tx_president_2012.drop(tx_president_2012.columns[[3,4,5]],1,inplace=True)
tx_president_2012.rename(columns={'RomneyR_12G_President':'PRES12R','ObamaD_12G_President':'PRES12D'},inplace=True)

tx_ussen_2012.drop(tx_ussen_2012.columns[[3,4]],1,inplace=True)
tx_ussen_2012.rename(columns={'CruzR_12G_U.S. Sen':'SEN12R','SadlerD_12G_U.S. Sen':'SEN12D'},inplace=True)

tx_ussen_2014.drop(tx_ussen_2014.columns[[3,4,5]],1,inplace=True)
tx_ussen_2014.rename(columns={'CornynR_14G_U.S. Sen':'SEN14R','AlameelD_14G_U.S. Sen':'SEN14D'},inplace=True)

tx_president_2016.drop(tx_president_2016.columns[[3,4,5]],1,inplace=True)
tx_president_2016.rename(columns={'ClintonD_16G_President':'PRES16D','TrumpR_16G_President':'PRES16R'},inplace=True)

In [35]:
voter_data_2012.drop(voter_data_2012.columns[[3,4]],1,inplace=True)
voter_data_2012.rename(columns={'Voter_Registration':'TOTVR12','Turnout':'TOTTO12'},inplace=True)
#print(voter_data_2012.head(2))

voter_data_2014.drop(voter_data_2014.columns[[3,4]],1,inplace=True)
voter_data_2014.rename(columns={'Voter_Registration':'TOTVR14','Turnout':'TOTTO14'},inplace=True)
#print(voter_data_2014.head(2))

voter_data_2016.drop(voter_data_2016.columns[[3,4]],1,inplace=True)
voter_data_2016.rename(columns={'Voter_Registration':'TOTVR16','Turnout':'TOTTO16'},inplace=True)
#print(voter_data_2016.head(2))

**Merge Election and Voter Data**



In [67]:
all_voter_data = [tx_governor_2014,tx_president_2012,tx_president_2016,tx_ussen_2012,tx_ussen_2014,voter_data_2012,voter_data_2014,voter_data_2016]
merged_voter_data = reduce(lambda  left,right: pd.merge(left,right,on=['CNTYVTD'], how='outer'), all_voter_data)

merged_voter_data = merged_voter_data.astype({"CNTYVTD": str})
#print(merged_voter_data.dtypes)
#print(merged_voter_data.SEN12R.sum())
#print(merged_voter_data["CNTYVTD"])

**Merge VTD Data**

In [None]:
vtd_2012["newCol"] = vtd_2012["CNTY"].astype(str) + vtd_2012["VTD"]
vtd_2016["newCol"] = vtd_2016["CNTY"].astype(str) + vtd_2016["VTD"]
vtd_2012["newCol"] = vtd_2012["newCol"].apply(lambda x: '{0:0>8}'.format(x))
vtd_2016["newCol"] = vtd_2016["newCol"].apply(lambda x: '{0:0>8}'.format(x))

print(vtd_2012.head())
print(vtd_2016.head())

merged_vtd = pd.merge(vtd_2016, vtd_2012, on='newCol', how='outer', indicator=True)
print(merged_vtd._merge.describe())
print(merged_vtd.head())
right_only = merged_vtd[merged_vtd["_merge"] == "right_only"]
print(right_only['newCol'])
left_only = merged_vtd[merged_vtd["_merge"] == "left_only"]
print(left_only['newCol'])

**Merge Election and Voter Data with VTD Shapefile**

In [60]:
print(merged_voter_data["CNTYVTD"])
#print(vtd_2012["CNTYVTD"])
vtd_2012["CNTYVTD"] = vtd_2012["CNTYVTD"].apply(lambda x: '{0:0>8}'.format(x))
merged_voter_data["CNTYVTD"] = merged_voter_data["CNTYVTD"].apply(lambda x: '{0:0>8}'.format(x))
print(vtd_2012.count())
#print(vtd_2012["CNTYVTD"])
merged_vtd_election = pd.merge(merged_voter_data, vtd_2012, on='CNTYVTD', how='outer', indicator=True)
print(merged_vtd_election._merge.describe())
right_only = merged_vtd_election[merged_vtd_election["_merge"] == "right_only"]
print(right_only['CNTYVTD'])
left_only = merged_vtd_election[merged_vtd_election["_merge"] == "left_only"]
print(left_only['CNTYVTD'])

0       00010001
1       00010002
2       00010003
3       00010004
4       00010005
          ...   
9077    05070002
9078    05070003
9079    05070004
9080    0507001A
9081    0507004A
Name: CNTYVTD, Length: 9082, dtype: object
CNTY          8952
COLOR         8952
VTD           8952
CNTYVTD       8952
VTDKEY        8952
CNTYKEY       8952
Shape_Leng    8952
Shape_Area    8952
e_ang         8952
e_oth         8952
e_hsp         8952
e_total       8952
e_vap         8952
e_blak        8952
e_bh          8952
e_angvap      8952
e_hspvap      8952
e_bhvap       8952
e_blakvap     8952
e_othvap      8952
geometry      8952
dtype: int64
count     9413
unique       3
top       both
freq      8621
Name: _merge, dtype: object
9082    00010014
9083    00050008
9084    00270407
9085    00291073
9086    00291080
          ...   
9408    04710305
9409    04710402
9410    04730420
9411    04870007
9412    04910277
Name: CNTYVTD, Length: 331, dtype: object
33      0050008A
34      0050008B
315    

In [51]:
vtd_election = pd.merge(merged_voter_data, vtd_2012, on='CNTYVTD', how='outer', indicator=True)

In [19]:
print(vtd_election.count())

CNTYVTD       11161
GOV14R         9082
GOV14D         9082
PRES12R        9082
PRES12D        9082
PRES16D        9082
PRES16R        9082
SEN12R         9082
SEN12D         9082
SEN14R         9082
SEN14D         9082
TOTVR12        9082
TOTTO12        9082
TOTVR14        9082
TOTTO14        9082
TOTVR16        9082
TOTTO16        9082
CNTY           8952
COLOR          8952
VTD            8952
VTDKEY         8952
CNTYKEY        8952
Shape_Leng     8952
Shape_Area     8952
e_ang          8952
e_oth          8952
e_hsp          8952
e_total        8952
e_vap          8952
e_blak         8952
e_bh           8952
e_angvap       8952
e_hspvap       8952
e_bhvap        8952
e_blakvap      8952
e_othvap       8952
geometry       8952
_merge        11161
dtype: int64


In [None]:
vtd_2016["geometry"] = vtd_2016.buffer(0)
vtd_2016.crs
vtd_2012["geometry"] = vtd_2012.buffer(0)
vtd_2012.crs

In [None]:
#print(vtd_2012.head())
#print(vtd_2012.count())
#print(vtd_2012.dtypes)
#Add leading zeros to CNTYVTD
#vtd_2012['CNTYVTD'] = vtd_2012['CNTYVTD'].apply(lambda x: '{0:0>7}'.format(x))
#print(vtd_2012.CNTYVTD.describe())
#print(vtd_assign.head())

In [None]:
merge_attempt = merged_voter_data.merge(vtd_2012,on=['CNTYVTD'],indicator=True)
print(merge_attempt.describe())
print(merged_voter_data.describe())
print(vtd_2012.describe())

print(vtd_2012.e_blak.sum())
print(merge_attempt.e_blak.sum())

#print(merge_attempt[merge_attempt["_merge"]=='left_only'].head())
#print(merge_attempt[merge_attempt["_merge"]=='right_only'].head())
#print(merge_attempt[merge_attempt["_merge"]=='both'].head())

In [None]:
print(us_house.head())
print(tx_sen.head())
print(tx_house.head())

**Merge US House District, TX Sen District, and TX House District Data w/ TX Shapefile**

In [55]:
proj = vtd_2016.crs
us_house = us_house.to_crs(proj)
us_house_assign = maup.assign(vtd_2016,us_house)
print(us_house_assign.head())

NameError: name 'us_house' is not defined