# Injecting the Vest 2018 data into the map

@authors: vcle, bpuhani

In [16]:
import io
import time
import warnings
from contextlib import redirect_stdout
import geopandas as gpd

import maup

import utilities as util

In [17]:
maup.progress.enabled = True

warnings.filterwarnings('ignore')

start_time = time.time()

## Loading the needed data.
For this notebook to work we assume, that you ran the following notebooks first:
* `0_IL_import_and_explore_data.ipynb`
* `B_2_IL_clean_maup_with_congress.ipynb`
* `B_4_IL_find_map_without_holes_vest20_cong.ipynb`

In [18]:
il_df = util.load_shapefile("il_data/IL_congress_without_holes.shp")
vest18_df = util.load_shapefile("il_data/il_vest_18/il_vest_18.shp")
county_df = util.load_shapefile("il_data/il_pl2020_cnty/il_pl2020_cnty.shp")
vap_df = util.load_shapefile("il_data/il_pl2020_b/il_pl2020_p4_b.shp")
population_df = util.load_shapefile("il_data/il_pl2020_b/il_pl2020_p2_b.shp")

Loading shapefile from il_data/IL_congress_without_holes.shp...
Shapefile data loaded from cache.
Loading shapefile from il_data/il_vest_18/il_vest_18.shp...
Shapefile data loaded from cache.
Loading shapefile from il_data/il_pl2020_cnty/il_pl2020_cnty.shp...
Shapefile data loaded from cache.
Loading shapefile from il_data/il_pl2020_b/il_pl2020_p4_b.shp...
Shapefile data loaded from cache.
Loading shapefile from il_data/il_pl2020_b/il_pl2020_p2_b.shp...
Shapefile data loaded from cache.


## Cleaning the vest18 data
### Reformating the `crs` of the `vest18_df` to the metric system.

In [20]:
vest18_df = vest18_df.to_crs(vest18_df.estimate_utm_crs())
county_df = county_df.to_crs(county_df.estimate_utm_crs())
vap_df = vap_df.to_crs(vap_df.estimate_utm_crs())
population_df = population_df.to_crs(population_df.estimate_utm_crs())

### Examining the vest18 data (MAUP Doctor)

In [21]:
try:
    print(maup.doctor(vest18_df))
except Exception as e:
    print(f"Error in MAUP Doctor for vest18_df: {e}")

100%|██████████| 10116/10116 [00:10<00:00, 933.83it/s]


There are 4 overlaps.
There are 5 holes.
False


### Clean the data
* `with min_rook_length = 30`
* `nest_within_regions = county_df`

In [22]:
repaired_vest18_df = maup.smart_repair(
    vest18_df,
    nest_within_regions=county_df,
    min_rook_length=30
)

100%|██████████| 102/102 [00:00<00:00, 242.24it/s]


Snapping all geometries to a grid with precision 10^( -5 ) to avoid GEOS errors.


100%|██████████| 102/102 [00:01<00:00, 70.46it/s]
100%|██████████| 102/102 [00:00<00:00, 2652.50it/s]


Identifying overlaps...


100%|██████████| 10746/10746 [00:11<00:00, 905.66it/s] 


Resolving overlaps and filling gaps...


100%|██████████| 102/102 [00:01<00:00, 71.41it/s]
100%|██████████| 102/102 [00:00<00:00, 2537.66it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00:00, ?it/s]
Gaps to simplify: 0it [00:00, ?it/s]
Gaps to fill: 0it [00

Converting small rook adjacencies to queen...


100%|██████████| 10116/10116 [00:11<00:00, 908.18it/s] 
100%|██████████| 4/4 [00:00<00:00, 1333.11it/s]
100%|██████████| 4/4 [00:00<00:00, 790.33it/s]
100%|██████████| 4/4 [00:00<00:00, 1752.56it/s]
100%|██████████| 4/4 [00:00<00:00, 568.45it/s]
100%|██████████| 4/4 [00:00<00:00, 1051.07it/s]
100%|██████████| 4/4 [00:00<00:00, 1325.63it/s]
100%|██████████| 4/4 [00:00<00:00, 1331.63it/s]
100%|██████████| 5/5 [00:00<00:00, 1667.05it/s]
100%|██████████| 4/4 [00:00<00:00, 999.12it/s]
100%|██████████| 4/4 [00:00<00:00, 1323.65it/s]
100%|██████████| 4/4 [00:00<00:00, 1596.01it/s]
100%|██████████| 4/4 [00:00<00:00, 996.33it/s]
100%|██████████| 4/4 [00:00<00:00, 1333.54it/s]
100%|██████████| 4/4 [00:00<00:00, 1997.05it/s]
100%|██████████| 4/4 [00:00<00:00, 1255.50it/s]
100%|██████████| 4/4 [00:00<00:00, 1332.90it/s]
100%|██████████| 4/4 [00:00<00:00, 1325.21it/s]
100%|██████████| 3/3 [00:00<00:00, 999.75it/s]
100%|██████████| 4/4 [00:00<00:00, 1093.55it/s]
100%|██████████| 4/4 [00:00<00:00, 12

### Examining the vest18 data (MAUP Doctor) after the cleaning

In [23]:
try:
    print(maup.doctor(repaired_vest18_df))
except Exception as e:
    print(f"Error in MAUP Doctor for repaired_vest18_df: {e}")

100%|██████████| 10116/10116 [00:10<00:00, 935.81it/s] 


True


Looks good! Now we can clean the `repaired_vest18_df` by removing and renaming the columns.

In [24]:
repaired_vest18_df.columns

Index(['STATEFP20', 'COUNTYFP20', 'VTDST20', 'GEOID20', 'NAME20', 'G18GOVDPRI',
       'G18GOVRRAU', 'G18GOVCMCC', 'G18GOVLJAC', 'G18ATGDRAO', 'G18ATGRHAR',
       'G18ATGLHAR', 'G18SOSDWHI', 'G18SOSRHEL', 'G18SOSLDUT', 'G18COMDMEN',
       'G18COMRSEN', 'G18COMLBAL', 'G18TREDFRE', 'G18TRERDOD', 'G18TRELLEH',
       'geometry'],
      dtype='object')

Let's rename the columns we need and remove the columns we don't need.

In [25]:
# copied from SC_MAUP.ipynb # and modified for the G20USS candidates from Illinois
rename_dict = {
    'G18GOVDPRI': 'G18GOVD',
    'G18GOVRRAU': 'G18GOVR',
    'G18ATGDRAO': 'G18ATGD',
    'G18ATGRHAR': 'G18ATGR',
    'G18SOSDWHI': 'G18SOSD',
    'G18SOSRHEL': 'G18SOSR',
    'G18COMDMEN': 'G18COMD',
    'G18COMRSEN': 'G18COMR',
    'G18TREDFRE': 'G18TRED',
    'G18TRERDOD': 'G18TRER'
}

In [26]:
drop_list = [
    'G18GOVCMCC',
    'G18GOVLJAC',
    'G18ATGLHAR',
    'G18SOSLDUT',
    'G18COMLBAL',
    'G18TRELLEH'
]

In [27]:
repaired_vest18_df.rename(columns=rename_dict, inplace=True)
repaired_vest18_df.drop(columns=drop_list, inplace=True)

In [28]:
repaired_vest18_df.columns

Index(['STATEFP20', 'COUNTYFP20', 'VTDST20', 'GEOID20', 'NAME20', 'G18GOVD',
       'G18GOVR', 'G18ATGD', 'G18ATGR', 'G18SOSD', 'G18SOSR', 'G18COMD',
       'G18COMR', 'G18TRED', 'G18TRER', 'geometry'],
      dtype='object')

In [59]:
elec2018_cols = list(rename_dict.values())
print(elec2018_cols)

['G18GOVD', 'G18GOVR', 'G18ATGD', 'G18ATGR', 'G18SOSD', 'G18SOSR', 'G18COMD', 'G18COMR', 'G18TRED', 'G18TRER']


Now that is's clean, we can add it to the `repaired_vest18_df`

Remember that the 2020 and 2018 election years have different precincts, so we need to disaggregate this data to the block level as previously discussed, and re-aggregate to the 2020 precincts.

In [62]:
repaired_vest18_df.head()

Unnamed: 0,STATEFP20,COUNTYFP20,VTDST20,GEOID20,NAME20,G18GOVD,G18GOVR,G18ATGD,G18ATGR,G18SOSD,G18SOSR,G18COMD,G18COMR,G18TRED,G18TRER,geometry
0,17,19,CN0100,17019CN0100,Cunningham 1,554,40,510,94,588,22,569,28,566,29,"POLYGON ((395000.017 4443248.947, 395062.695 4..."
1,17,19,CC0600,17019CC0600,City of Champaign 06,708,186,674,252,772,146,724,186,734,174,"POLYGON ((392826.416 4443312.151, 392828.282 4..."
2,17,19,CC0100,17019CC0100,City of Champaign 01,497,23,463,69,517,14,514,17,503,17,"POLYGON ((395286.570 4441435.160, 395283.825 4..."
3,17,19,CC0900,17019CC0900,City of Champaign 09,454,81,444,125,505,57,480,83,470,82,"POLYGON ((392819.261 4442698.203, 392818.473 4..."
4,17,19,CC0300,17019CC0300,City of Champaign 03,892,182,896,195,956,137,900,167,867,184,"POLYGON ((394718.724 4440992.295, 394872.839 4..."


In [63]:
print("Sum of all Votes:", sum(repaired_vest18_df[elec2018_cols].sum(axis=1)))

Sum of all Votes: 21874163


In [64]:
blocks_to_2018precincts_assignment = maup.assign(population_df.geometry, repaired_vest18_df.geometry)

100%|██████████| 10116/10116 [00:13<00:00, 741.22it/s]
100%|██████████| 10116/10116 [00:45<00:00, 223.42it/s]


In [65]:
# VAP = P0040001
weights2018 = vap_df["P0040001"]/blocks_to_2018precincts_assignment.map(vap_df["P0040001"].groupby(blocks_to_2018precincts_assignment).sum())
weights2018 = weights2018.fillna(0)

In [66]:
weights2018

0         0.022222
1         0.009119
2         0.015267
3         0.111702
4         0.000000
            ...   
369973    0.016362
369974    0.012388
369975    0.018930
369976    0.009322
369977    0.040449
Length: 369978, dtype: float64

In [67]:
prorated2018 = maup.prorate(blocks_to_2018precincts_assignment, repaired_vest18_df[elec2018_cols], weights2018)

In [69]:
il_df[elec2018_cols] = prorated2018

In [70]:
il_df.head()

Unnamed: 0,boundary_n,area,STATEFP20,COUNTYFP20,VTDST20,GEOID20,NAME20,G20PRED,G20PRER,G20USSD,...,G18GOVD,G18GOVR,G18ATGD,G18ATGR,G18SOSD,G18SOSR,G18COMD,G18COMR,G18TRED,G18TRER
0,False,44420250.0,17,89,00HA01,1708900HA01,HAMPSHIRE 1,533,951,496,...,1.222222,1.644444,1.377778,1.888889,2.266667,1.111111,1.755556,1.555556,1.577778,1.6
1,False,15959620.0,17,89,00HA03,1708900HA03,HAMPSHIRE 3,817,1075,784,...,0.848024,1.158055,0.720365,1.68693,1.449848,1.103343,1.066869,1.413374,1.00304,1.513678
2,False,1689435.0,17,37,00DK09,1703700DK09,DEKALB 9,716,346,691,...,1.022901,1.969466,1.022901,2.152672,1.694656,1.51145,1.29771,1.89313,1.206107,1.923664
3,False,9126377.0,17,37,00DK02,1703700DK02,DEKALB 2,677,133,604,...,4.356383,7.037234,3.68617,9.829787,7.595745,5.808511,5.138298,7.930851,5.25,8.042553
4,False,83994330.0,17,201,00Bu01,1720100Bu01,Burritt 1,187,432,183,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [71]:
print("Sum of all Votes (projected to VEST20):", sum(il_df[elec2018_cols].sum(axis=1)))

Sum of all Votes (projected to VEST20): 594398.4240203657
