In [2]:
import matplotlib.pyplot as plt # for plotting maps
import maup # mggg's library for proration, see documentation here: https://github.com/mggg/maup
import pandas as pd # standard python data library
import geopandas as gp # the geo-version of pandas
import numpy as np 
from statistics import mean, median
from functools import reduce

**Load Original Files**

Importing the election results from the TX website (currently stored locally)
All of these files have 9082 rows

In [2]:
tx_governor_2014 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/governor_2014.csv')
tx_president_2012 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/president_2012.csv')
tx_president_2016 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/president_2016.csv')
tx_ussen_2012 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/ussen_2012.csv')
tx_ussen_2014 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/election results/ussen_2014.csv')

In [3]:
tx_governor_2014.drop(tx_governor_2014.columns[[3,4,5]],1,inplace=True)
tx_governor_2014.rename(columns={'AbbottR_14G_Governor':'GOV14R','DavisD_14G_Governor':'GOV14D'},inplace=True)

tx_president_2012.drop(tx_president_2012.columns[[3,4,5]],1,inplace=True)
tx_president_2012.rename(columns={'RomneyR_12G_President':'PRES12R','ObamaD_12G_President':'PRES12D'},inplace=True)

tx_ussen_2012.drop(tx_ussen_2012.columns[[3,4]],1,inplace=True)
tx_ussen_2012.rename(columns={'CruzR_12G_U.S. Sen':'SEN12R','SadlerD_12G_U.S. Sen':'SEN12D'},inplace=True)

tx_ussen_2014.drop(tx_ussen_2014.columns[[3,4,5]],1,inplace=True)
tx_ussen_2014.rename(columns={'CornynR_14G_U.S. Sen':'SEN14R','AlameelD_14G_U.S. Sen':'SEN14D'},inplace=True)

tx_president_2016.drop(tx_president_2016.columns[[3,4,5]],1,inplace=True)
tx_president_2016.rename(columns={'ClintonD_16G_President':'PRES16D','TrumpR_16G_President':'PRES16R'},inplace=True)

**Load Voter Data Files**

Importing the voter data files from the TX webbsite (currently stored locally). All of these files have 9082 rows

In [4]:
voter_data_2012 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/voter data/2012_voter_data.csv')
voter_data_2014 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/voter data/2014_voter_data.csv')
voter_data_2016 = pd.read_csv('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/voter data/2016_voter_data.csv')

In [5]:
voter_data_2012.drop(voter_data_2012.columns[[3,4]],1,inplace=True)
voter_data_2012.rename(columns={'Voter_Registration':'TOTVR12','Turnout':'TOTTO12'},inplace=True)
print(voter_data_2012.head(2))

voter_data_2014.drop(voter_data_2014.columns[[3,4]],1,inplace=True)
voter_data_2014.rename(columns={'Voter_Registration':'TOTVR14','Turnout':'TOTTO14'},inplace=True)
print(voter_data_2014.head(2))

voter_data_2016.drop(voter_data_2016.columns[[3,4]],1,inplace=True)
voter_data_2016.rename(columns={'Voter_Registration':'TOTVR16','Turnout':'TOTTO16'},inplace=True)
print(voter_data_2016.head(2))

   CNTYVTD  TOTVR12  TOTTO12
0  0010001     1629     1026
1  0010002     2224     1418
   CNTYVTD  TOTVR14  TOTTO14
0  0010001     1712      576
1  0010002     2389      786
   CNTYVTD  TOTVR16  TOTTO16
0  0010001     1799     1030
1  0010002     2455     1532


**Merge All Voter Data**



In [6]:
all_voter_data = [tx_governor_2014,tx_president_2012,tx_president_2016,tx_ussen_2012,tx_ussen_2014,voter_data_2012,voter_data_2014,voter_data_2016]
merged_voter_data = reduce(lambda  left,right: pd.merge(left,right,on=['CNTYVTD'],
                                            how='outer'), all_voter_data)
print(merged_voter_data.head(2))
print(merged_voter_data.dtypes)
print(merged_voter_data.SEN12R.sum())
print(merged_voter_data["CNTYVTD"])

   CNTYVTD  GOV14R  GOV14D  PRES12R  PRES12D  PRES16D  PRES16R  SEN12R  \
0  0010001     424     130      754      261      262      742     713   
1  0010002     663     110     1179      228      181     1318    1072   

   SEN12D  SEN14R  SEN14D  TOTVR12  TOTTO12  TOTVR14  TOTTO14  TOTVR16  \
0     285     441     110     1629     1026     1712      576     1799   
1     274     677      80     2224     1418     2389      786     2455   

   TOTTO16  
0     1030  
1     1532  
CNTYVTD    object
GOV14R      int64
GOV14D      int64
PRES12R     int64
PRES12D     int64
PRES16D     int64
PRES16R     int64
SEN12R      int64
SEN12D      int64
SEN14R      int64
SEN14D      int64
TOTVR12     int64
TOTTO12     int64
TOTVR14     int64
TOTTO14     int64
TOTVR16     int64
TOTTO16     int64
dtype: object
4439084
0       0010001
1       0010002
2       0010003
3       0010004
4       0010005
         ...   
9077    5070002
9078    5070003
9079    5070004
9080    507001A
9081    507004A
Name: CNTYV

**Load VTD Files**

Importing the various VTD data from the TX website (currently stored locally)
There is a different VTD file for each of the 3 years of election results
It looks like they are using demographic data from the 2012 file

In [8]:
vtd_2012 = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/vtds/vtd12g/VTDs.shp')
#vtd_2014 = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/vtds/vtd14g/2014G_VTD.shp')
vtd_2016 = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/vtds/vtd16g/vtd16g.shp')

In [14]:
vtd_2016["geometry"] = vtd_2016.buffer(0)

In [9]:
print(vtd_2012.head())
print(vtd_2012.count())
print(vtd_2012.dtypes)
#Add leading zeros to CNTYVTD
vtd_2012['CNTYVTD'] = vtd_2012['CNTYVTD'].apply(lambda x: '{0:0>7}'.format(x))
print(vtd_2012.CNTYVTD.describe())

   CNTY  COLOR   VTD CNTYVTD  VTDKEY  CNTYKEY    Shape_Leng    Shape_Area  \
0     1      3  0001   10001       1        1  15020.544928  5.616316e+06   
1     1      5  0002   10002       2        1  94432.828909  2.562133e+08   
2     1      1  0003   10003       3        1  55277.408937  7.084692e+07   
3     1      3  0004   10004       4        1  91313.174639  2.411696e+08   
4     1      7  0005   10005       5        1  86938.250104  1.689853e+08   

   e_ang  e_oth  ...  e_total  e_vap  e_blak  e_bh  e_angvap  e_hspvap  \
0   2053     89  ...     3131   2341     606   989      1677       217   
1   3171     64  ...     3744   2732     272   509      2365       135   
2   1443     32  ...     1766   1345     140   291      1134        96   
3   1962     40  ...     2147   1684      72   145      1553        47   
4    454      2  ...      476    366       6    20       349        12   

   e_bhvap  e_blakvap  e_othvap  \
0      592        378        72   
1      327        197 

In [10]:
merge_attempt = merged_voter_data.merge(vtd_2012,on=['CNTYVTD'],indicator=True)
print(merge_attempt.describe())
print(merged_voter_data.describe())
print(vtd_2012.describe())

print(vtd_2012.e_blak.sum())
print(merge_attempt.e_blak.sum())

#print(merge_attempt[merge_attempt["_merge"]=='left_only'].head())
#print(merge_attempt[merge_attempt["_merge"]=='right_only'].head())
#print(merge_attempt[merge_attempt["_merge"]=='both'].head())

            GOV14R       GOV14D      PRES12R      PRES12D      PRES16D  \
count  8611.000000  8611.000000  8611.000000  8611.000000  8611.000000   
mean    311.592498   206.482058   509.738242   372.378121   433.585414   
std     331.137177   223.417786   529.537923   375.501581   454.061913   
min       0.000000     0.000000     0.000000     0.000000     0.000000   
25%      61.000000    40.000000   105.000000    79.000000    80.000000   
50%     195.000000   139.000000   325.000000   270.000000   296.000000   
75%     462.000000   303.000000   763.000000   551.000000   663.500000   
max    2348.000000  2211.000000  3571.000000  2823.000000  4479.000000   

           PRES16R       SEN12R       SEN12D       SEN14R       SEN14D  ...  \
count  8611.000000  8611.000000  8611.000000  8611.000000  8611.000000  ...   
mean    519.588666   495.097201   359.723493   318.838695   179.759726  ...   
std     540.882548   512.708903   357.268550   338.617580   200.541684  ...   
min       0.00000

**Load US House District, TX Sen District, and TX House District**


In [3]:
us_house = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/house_sen/tl_2016_us_cd115/tl_2016_us_cd115.shp')
tx_sen = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/house_sen/PLANS172/PLANS172.shp')
tx_house = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/Original Files/house_sen/PLANH407/PLANH407.shp')

**Load PGP File**

I first want to check if these two files are different
Their main output file has 8,941 rows

In [12]:
tx_pgp_output = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/output/TX_vtds.shp')
#tx_pgp_vtds = gp.read_file('/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pgp-tx/TX_vtds/TX_vtds.shp')

In [15]:
#tx_pgp_output.difference(tx_pgp_vtds)
tx_pgp_output.difference(vtd_2016)


TopologyException: Input geom 0 is invalid: Ring Self-intersection at or near point 1153136.0481999964 456128.66519999877 at 1153136.0481999964 456128.66519999877


TopologicalError: The operation 'GEOSDifference_r' could not be performed. Likely cause is invalidity of the geometry <shapely.geometry.polygon.Polygon object at 0x7fcf0aa75fd0>

In [125]:
print(tx_pgp_output.head(2))
tx_pgp_output.count()
print(tx_pgp_output.BLACK.sum())

  CNTYVTD   VTD   WHITE  OTHER  HISPANIC  TOTPOP     VAP  BLACK  BLKHISP  \
0   10001  0001  2053.0   89.0     401.0  3131.0  2341.0  606.0    989.0   
1   10002  0002  3171.0   64.0     245.0  3744.0  2732.0  272.0    509.0   

     WVAP  ...  PRES16D  PRES16R  TOTVR16  TOTTO16 USCD  SEND  HD  AREA  \
0  1677.0  ...      262      742     1799     1030   05     3   8     6   
1  2365.0  ...      181     1318     2455     1532   05     3   8   259   

   PERIM                                           geometry  
0     15  POLYGON ((1413960.808 1073012.816, 1413971.571...  
1     95  POLYGON ((1420165.429 1066385.798, 1420251.968...  

[2 rows x 38 columns]
3168469.0


In [127]:
tx_pgp_output.SEN12R.sum()

4439084