<a href="https://colab.research.google.com/github/victoriabelotti42/MATH_497_Team3/blob/main/efficiency_gap.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

The redistricting code and related MCMC (GerryChain) code is adapted from the [GerryChain documentation](https://gerrychain.readthedocs.io/en/latest/user/quickstart.html), by the Metric Geometry and Gerrymandering Group.

See Seaborn documentation [here](https://seaborn.pydata.org/index.html) for visualization.

In [None]:
import numpy as np

In [None]:
#@title
!pip install gerrychain

Collecting gerrychain
[?25l  Downloading https://files.pythonhosted.org/packages/a5/fc/a9c80923910833d858542543b74e2afde8eb076cdf738964b90b7bbb199b/gerrychain-0.2.12-py2.py3-none-any.whl (48kB)
[K     |██████▊                         | 10kB 16.5MB/s eta 0:00:01[K     |█████████████▌                  | 20kB 22.6MB/s eta 0:00:01[K     |████████████████████▎           | 30kB 16.7MB/s eta 0:00:01[K     |███████████████████████████     | 40kB 15.3MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 4.0MB/s 
Collecting geopandas
[?25l  Downloading https://files.pythonhosted.org/packages/f7/a4/e66aafbefcbb717813bf3a355c8c4fc3ed04ea1dd7feb2920f2f4f868921/geopandas-0.8.1-py2.py3-none-any.whl (962kB)
[K     |████████████████████████████████| 972kB 13.7MB/s 
Collecting pyproj>=2.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/e4/ab/280e80a67cfc109d15428c0ec56391fc03a65857b7727cf4e6e6f99a4204/pyproj-3.0.0.post1-cp36-cp36m-manylinux2010_x86_64.whl (6.4MB)
[K 

In [None]:
!git clone https://github.com/mggg/GerryChain.git

Cloning into 'GerryChain'...
remote: Enumerating objects: 42, done.[K
remote: Counting objects: 100% (42/42), done.[K
remote: Compressing objects: 100% (32/32), done.[K
remote: Total 6007 (delta 20), reused 23 (delta 10), pack-reused 5965[K
Receiving objects: 100% (6007/6007), 111.77 MiB | 18.30 MiB/s, done.
Resolving deltas: 100% (4094/4094), done.


In [None]:
# make sure this file shows up in this directory
!ls GerryChain/docs/user/PA_VTDs.json

GerryChain/docs/user/PA_VTDs.json


In [None]:
path_to_pa = 'GerryChain/docs/user/PA_VTDs.json'

In [None]:
from gerrychain import Graph, Partition, Election
from gerrychain.updaters import Tally, cut_edges

graph = Graph.from_json(path_to_pa)

election = Election("SEN12", {"Dem": "USS12D", "Rep": "USS12R"})

initial_partition = Partition(
    graph,
    assignment="CD_2011",
    updaters={
        "cut_edges": cut_edges,
        "population": Tally("TOTPOP", alias="population"),
        "SEN12": election
    }
)

In [None]:
for district, pop in initial_partition["population"].items():
    print("District {}: {}".format(district, pop))

District 3: 705317
District 5: 706258
District 10: 705327
District 9: 705679
District 12: 705698
District 6: 705782
District 15: 705549
District 7: 706391
District 16: 705629
District 11: 705883
District 4: 705669
District 8: 705689
District 17: 705830
District 18: 705847
District 14: 705526
District 13: 705028
District 2: 705689
District 1: 705588


In [None]:
from gerrychain import MarkovChain
from gerrychain.constraints import single_flip_contiguous
from gerrychain.proposals import propose_random_flip
from gerrychain.accept import always_accept
import pandas as pd
import matplotlib.pyplot as plt

chain = MarkovChain(
    proposal=propose_random_flip,
    constraints=[single_flip_contiguous],
    accept=always_accept,
    initial_state=initial_partition,
    total_steps=1000
)

# Generic Data in DataFrame
Now, let's take a look at the sociodemographic and political records supporting this analysis.  The file /GerryChain/docs/user/PA_VTDs.json contains tabular data for Vote Tabulation Districts (VTDs), which are precinct-level or similar regions.

In [None]:
import json

with open(path_to_pa) as f:
    data = json.load(f)

In [None]:
df = pd.DataFrame(data['nodes'])

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9255 entries, 0 to 9254
Data columns (total 72 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   boundary_node   9255 non-null   bool   
 1   boundary_perim  305 non-null    float64
 2   area            9255 non-null   float64
 3   STATEFP10       9255 non-null   object 
 4   COUNTYFP10      9255 non-null   object 
 5   VTDST10         9255 non-null   object 
 6   GEOID10         9255 non-null   object 
 7   VTDI10          9255 non-null   object 
 8   NAME10          9255 non-null   object 
 9   NAMELSAD10      9255 non-null   object 
 10  LSAD10          9255 non-null   object 
 11  MTFCC10         9255 non-null   object 
 12  FUNCSTAT10      9255 non-null   object 
 13  ALAND10         9255 non-null   int64  
 14  AWATER10        9255 non-null   int64  
 15  INTPTLAT10      9255 non-null   object 
 16  INTPTLON10      9255 non-null   object 
 17  TOTPOP          9255 non-null   i

In [None]:
df

Unnamed: 0,boundary_node,boundary_perim,area,STATEFP10,COUNTYFP10,VTDST10,GEOID10,VTDI10,NAME10,NAMELSAD10,LSAD10,MTFCC10,FUNCSTAT10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,TOTPOP,NH_WHITE,NH_BLACK,NH_AMIN,NH_ASIAN,NH_NHPI,NH_OTHER,NH_2MORE,HISP,H_WHITE,H_BLACK,H_AMIN,H_ASIAN,H_NHPI,H_OTHER,H_2MORE,VAP,HVAP,WVAP,BVAP,AMINVAP,ASIANVAP,NHPIVAP,OTHERVAP,2MOREVAP,ATG12D,ATG12R,F2014GOVD,F2014GOVR,GOV10D,GOV10R,PRES12D,PRES12O,PRES12R,SEN10D,SEN10R,T16ATGD,T16ATGR,T16PRESD,T16PRESOTH,T16PRESR,T16SEND,T16SENR,USS12D,USS12R,REMEDIAL,GOV,TS,CD_2011,SEND,HDIST,538DEM,538GOP,538CMPCT,id
0,True,0.063126,0.004278,42,085,960,42085960,A,SHENANGO TWP VTD WEST,SHENANGO TWP VTD WEST,00,G5240,N,39740056,141805,+41.1564874,-080.4865792,1915,1839,35,1,8,0,3,19,10,3,0,1,0,0,4,2,1553,7,1494,30,1,6,0,2,13,514.000104,388.000078,290.000058,242.000049,289.000058,349.000070,492.000099,11.000002,451.000091,315.000063,328.000066,416.000084,558.000112,342.000069,32.000006,631.000127,379.000076,590.000119,505.000102,423.000085,16,3,3,3,50,7,03,03,03,0
1,False,,0.000551,42,039,40,4203940,A,BLOOMING VALLEY Voting District,BLOOMING VALLEY Voting District,00,G5240,N,5034196,57763,+41.6745788,-080.0382865,337,334,1,0,0,0,0,1,1,1,0,0,0,0,0,0,257,1,254,1,0,0,0,0,1,56.999999,123.999997,44.999999,96.999998,27.999999,124.999997,50.999999,10.000000,126.999997,34.999999,115.999998,39.999999,130.999997,32.999999,8.000000,133.999997,35.999999,125.999997,44.999999,135.999997,16,3,5,3,50,6,03,03,03,1
2,False,,0.007934,42,039,10,4203910,A,ATHENS TWP Voting District,ATHENS TWP Voting District,00,G5240,N,73207387,91606,+41.7499682,-079.8467004,734,719,4,4,0,0,0,2,5,5,0,0,0,0,0,0,586,3,578,1,3,0,0,0,1,67.999996,214.999986,47.999997,155.999990,41.999997,200.999987,74.999995,6.000000,224.999985,51.999997,192.999987,52.999997,266.999983,49.999997,8.999999,273.999982,54.999996,260.999983,64.999996,229.999985,16,3,5,3,50,65,03,03,03,2
3,True,0.091923,0.010283,42,039,20,4203920,A,BEAVER TWP Voting District,BEAVER TWP Voting District,00,G5240,N,94916802,2059,+41.8099591,-080.4562835,902,881,7,0,0,0,0,11,3,2,0,0,0,0,1,0,656,0,645,4,0,0,0,0,7,111.000001,153.000001,66.000000,102.000000,66.000000,139.000001,110.000001,6.000000,167.000001,76.000000,134.000001,89.000000,203.000001,74.000000,13.000000,214.000001,75.000000,204.000001,112.000001,160.000001,16,3,3,3,50,17,03,03,03,3
4,False,,0.010732,42,039,30,4203930,A,BLOOMFIELD TWP Voting District,BLOOMFIELD TWP Voting District,00,G5240,N,98195373,847815,+41.8113731,-079.8363351,1919,1877,7,1,2,1,0,16,15,10,0,0,0,0,1,4,1470,7,1450,2,1,2,0,0,8,329.999982,341.999981,225.999988,230.999987,177.999990,363.999980,314.999983,15.999999,393.999979,196.999989,339.999982,263.999986,528.999971,227.999988,29.999998,555.999970,246.999987,511.999972,267.999985,435.999976,16,3,5,3,50,65,03,03,03,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9250,False,,0.002479,42,001,100,42001100,A,CONEWAGO TWP VTD 02,CONEWAGO TWP VTD 02,00,G5240,N,23415287,157872,+39.7934562,-077.0339760,4276,4042,51,4,49,0,5,47,78,41,0,0,4,0,31,2,3096,40,2971,35,0,30,0,1,19,676.791329,945.105052,437.511555,675.790159,308.360546,936.094516,550.643833,27.031606,1108.295860,358.419077,876.024280,467.546673,1074.256060,415.485801,65.076089,1098.284154,435.509213,1064.244354,589.689487,1052.230306,13,4,4,4,33,91,09,04,04,9250
9251,True,0.044005,0.004784,42,001,480,42001480,A,UNION TWP,UNION TWP Voting District,V2,G5240,N,45438462,82892,+39.7558798,-077.0498477,3148,2996,20,4,23,0,2,36,67,57,0,0,0,0,6,4,2404,34,2320,11,4,15,0,1,19,440.999993,855.999987,246.999996,588.999991,185.999997,832.999988,351.999995,24.000000,982.999985,217.999997,796.999988,347.999995,1122.999983,310.999995,62.999999,1154.999983,339.999995,1080.999984,364.999995,964.999986,13,4,4,4,33,91,09,04,04,9251
9252,False,,0.003723,42,001,260,42001260,A,HAMILTON TWP,HAMILTON TWP Voting District,V2,G5240,N,34979161,374933,+39.8997205,-077.0206256,2530,2414,8,1,10,1,2,28,66,42,1,0,0,0,22,1,1930,37,1870,5,1,5,1,1,10,440.000003,706.000004,311.000002,490.000003,202.000001,689.000004,341.000002,17.000000,828.000005,240.000001,650.000004,343.000002,958.000006,278.000002,47.000000,1006.000006,295.000002,969.000006,392.000002,762.000005,13,4,4,4,33,193,09,04,04,9252
9253,False,,0.000151,42,001,10,4200110,A,ABBOTTSTOWN,ABBOTTSTOWN Voting District,V2,G5240,N,1416984,19097,+39.8817704,-076.9927881,1011,894,7,0,10,0,0,5,95,43,7,1,0,0,37,7,752,50,688,5,0,7,0,0,2,136.000003,178.000003,0.000000,124.000002,67.000001,186.000003,129.000002,8.000000,195.000004,85.000002,169.000003,145.000003,267.000005,119.000002,24.000000,274.000005,128.000002,256.000005,131.000002,190.000004,13,4,4,4,33,193,09,04,04,9253


In [None]:
# votes_per_dist = {} 
# for i in range(df['GOV'].max()): 
grouped_cd_2011  = df.groupby('CD_2011')['T16PRESD','T16PRESR'].sum()
grouped_cd_2011.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,T16PRESD,T16PRESR
CD_2011,Unnamed: 1_level_1,Unnamed: 2_level_1
1,250442.040614,57251.003598
2,334039.258555,28058.53755
3,112663.414224,195686.697215
4,125918.433935,198711.229557
5,103861.199549,191897.128258


In [None]:
grouped_cd_2011['D_prop'] =  grouped_cd_2011["T16PRESD"]/(grouped_cd_2011["T16PRESD"]+grouped_cd_2011["T16PRESR"])
grouped_cd_2011['R_prop'] =  grouped_cd_2011["T16PRESR"]/(grouped_cd_2011["T16PRESD"]+grouped_cd_2011["T16PRESR"])
grouped_cd_2011['D_wasted'] = np.where(grouped_cd_2011['D_prop'] <  grouped_cd_2011['R_prop'], grouped_cd_2011['D_prop'], grouped_cd_2011['D_prop'] -.5)
grouped_cd_2011['R_wasted'] = np.where(grouped_cd_2011['R_prop'] <  grouped_cd_2011['D_prop'], grouped_cd_2011['R_prop'], grouped_cd_2011['R_prop'] -.5)
grouped_cd_2011.head()

Unnamed: 0_level_0,T16PRESD,T16PRESR,D_prop,R_prop,D_wasted,R_wasted
CD_2011,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,250442.040614,57251.003598,0.813935,0.186065,0.313935,0.186065
2,334039.258555,28058.53755,0.922511,0.077489,0.422511,0.077489
3,112663.414224,195686.697215,0.365375,0.634625,0.365375,0.134625
4,125918.433935,198711.229557,0.387883,0.612117,0.387883,0.112117
5,103861.199549,191897.128258,0.351169,0.648831,0.351169,0.148831


In [None]:
efficiency_gap = (sum(grouped_cd_2011["R_wasted"]) - sum(grouped_cd_2011["D_wasted"])) / (sum(grouped_cd_2011["R_prop"]) + sum(grouped_cd_2011["D_prop"]) )
efficiency_gap

-0.14873416638683068

In [None]:
plans = ['REMEDIAL','GOV','TS','CD_2011','538DEM','538GOP','538CMPCT']

In [None]:
groupdef calculate_efficiency_gap(df): 
  df['D_prop'] =  df["T16PRESD"]/(df["T16PRESD"]+df["T16PRESR"])
  df['R_prop'] =  df["T16PRESR"]/(df["T16PRESD"]+df["T16PRESR"])
  df['D_wasted'] = np.where(df['D_prop'] <  df['R_prop'], df['D_prop'], df['D_prop'] -.5)
  df['R_wasted'] = np.where(df['R_prop'] <  df['D_prop'], df['R_prop'], df['R_prop'] -.5)
  # grouped_cd_2011.head()
  efficiency_gap = (sum(df["D_wasted"]) - sum(df["R_wasted"])) / (sum(df["R_prop"]) + sum(df["D_prop"]) )
  return efficiency_gap

In [None]:
for plan in plans: 
  eff_gap = calculate_efficiency_gap(df.groupby(plan)['T16PRESD','T16PRESR'].sum())
  print(plan,round(eff_gap,4))

REMEDIAL 0.0332
GOV 0.0909
TS 0.1477
CD_2011 0.1487
538DEM -0.0216
538GOP 0.2036
538CMPCT 0.0902


  
  
  
  
  
  
  
