In [1]:
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
import glob
import os       #mkdir
import datetime
from scipy.sparse import csgraph #for laplacian
from scipy.linalg import null_space
from plotnine import (ggplot, aes, geom_map, geom_text, geom_label,
                     ggtitle, element_blank, element_rect,
                     scale_fill_manual, theme_minimal, theme, scale_fill_cmap)
import math         ##for math.sqrt
import random       #for random selection of district to start with

to do:

    - create dataframe with previous and current district maps with 4 districts
    - create gpd dataframes with shapef_ia_redist equivalent for above maps
    - compute compactness scores for above maps
    - export all scores to a csv in order to compare to simulated maps' compactness scores


In [2]:
votes_df=pd.read_csv('./data/voter_numbers.csv')
votes_df['county_id']=votes_df['county_id'].astype(str).str.pad(2,fillchar='0')

votes_df.head(5)

Unnamed: 0,county_id,NAME10,dem_votes,rep_votes,indep_votes,og_district
0,0,Adair,973,2166,,3
1,1,Adams,510,1126,,3
2,2,Allamakee,1932,3820,,2
3,3,Appanoose,1412,3249,,3
4,4,Audubon,633,1639,51.0,4


In [3]:
#census.csv is data from Secretary of State's office.
census_df=pd.read_csv('census.csv')
census_df['COUNTYFP10']=census_df['COUNTYFP10'].astype(str).str.pad(3,fillchar='0')

#imports county shapefiles from MGGG
shapefile_iowa = gpd.read_file('IA_counties/IA_counties.shp').sort_values('NAME10',ignore_index=True)

## Merging ONLY 2020 population numbers and county_id from census df into shapefile_iowa
shapefile_iowa = shapefile_iowa.merge(census_df[['COUNTYFP10','population','county_id']], on='COUNTYFP10').copy()

county_populations = np.array(census_df['population'])
state_population = sum(county_populations)

#Then project the shapefiles to UTM 15N
shapef_ia_proj = shapefile_iowa.to_crs(epsg=26915)


In [4]:
shapef_ia_proj['xcentr_lon'] = shapef_ia_proj.centroid.x
shapef_ia_proj['ycentr_lat'] = shapef_ia_proj.centroid.y

In [5]:
districting_columns = ['COUNTYFP10', 'NAME10', 'geometry', 
       'population', 'county_id', 'xcentr_lon', 'ycentr_lat']

#make a new shapefile, which will be merged/dissolved on in the process of making districts
shapef_ia_fordistricting = shapef_ia_proj[districting_columns].copy()

In [6]:
shapef_ia_fordistricting['county_id']=shapef_ia_fordistricting['county_id'].astype(str).str.pad(2,fillchar='0')

shapef_ia_fordistricting.head(10)

Unnamed: 0,COUNTYFP10,NAME10,geometry,population,county_id,xcentr_lon,ycentr_lat
0,1,Adair,"POLYGON ((386139.673 4557123.147, 385899.972 4...",7496,0,376909.476792,4576509.0
1,3,Adams,"POLYGON ((347722.464 4557927.937, 347913.212 4...",3704,1,357154.753953,4543364.0
2,5,Allamakee,"POLYGON ((621990.182 4817526.586, 622024.806 4...",14061,2,631594.46907,4793646.0
3,7,Appanoose,"POLYGON ((491767.058 4517888.762, 491767.214 4...",12317,3,511092.713138,4510250.0
4,9,Audubon,"POLYGON ((325801.578 4617291.999, 325803.323 4...",5674,4,341387.505752,4616499.0
5,11,Benton,"POLYGON ((587335.112 4634973.661, 586720.225 4...",25575,5,577281.033972,4659083.0
6,13,Black Hawk,"POLYGON ((546226.645 4721244.274, 546286.490 4...",131144,6,556821.315153,4702196.0
7,15,Boone,"POLYGON ((403600.593 4654273.888, 403600.861 4...",26715,7,422884.534346,4654246.0
8,17,Bremer,"POLYGON ((565670.028 4721392.729, 565649.450 4...",24988,8,555787.494078,4736008.0
9,19,Buchanan,"POLYGON ((605972.713 4683490.725, 604982.657 4...",20565,9,595537.545341,4702694.0


In [7]:
# shapef_ia_fordistricting[shapef_ia_fordistricting['NAME10']=='Fremont']

In [8]:
# shapef_compactness = shapef_ia_fordistricting.merge(votes_df[['og_district','county_id']], on='county_id').copy()
shapef_compactness=shapef_ia_fordistricting.copy()
shapef_compactness['DISTRICT']=votes_df['og_district']
shapef_compactness.head()

Unnamed: 0,COUNTYFP10,NAME10,geometry,population,county_id,xcentr_lon,ycentr_lat,DISTRICT
0,1,Adair,"POLYGON ((386139.673 4557123.147, 385899.972 4...",7496,0,376909.476792,4576509.0,3
1,3,Adams,"POLYGON ((347722.464 4557927.937, 347913.212 4...",3704,1,357154.753953,4543364.0,3
2,5,Allamakee,"POLYGON ((621990.182 4817526.586, 622024.806 4...",14061,2,631594.46907,4793646.0,2
3,7,Appanoose,"POLYGON ((491767.058 4517888.762, 491767.214 4...",12317,3,511092.713138,4510250.0,3
4,9,Audubon,"POLYGON ((325801.578 4617291.999, 325803.323 4...",5674,4,341387.505752,4616499.0,4


In [9]:
shapef_compactness[shapef_compactness['NAME10']=='Fremont']

Unnamed: 0,COUNTYFP10,NAME10,geometry,population,county_id,xcentr_lon,ycentr_lat,DISTRICT
35,71,Fremont,"POLYGON ((289482.569 4530846.082, 290113.960 4...",6605,35,280092.499127,4513775.0,4


In [10]:
#dissolve by district
def dissolve_by_district(county_shapefile):
    dissolved_shapefile = county_shapefile.dissolve(
        by="DISTRICT",
        aggfunc = {
            "population": "sum",
            "xcentr_lon": "first",  #dummy, since we'll recalculate
            "ycentr_lat": "first",
            #skip the county_id_string now since we aren't slicing it
            # "county_id_string": "sum", #sum = concatenation here b/c string
            "DISTRICT": "first"
        }
    )
    # dissolved_shapefile['xcentr_lon'] = dissolved_shapefile.centroid.x
    # dissolved_shapefile['ycentr_lat'] = dissolved_shapefile.centroid.y

    #the dissolve process makes the temp_district column into the index of the dataframe
    #which then has issues when we iterate the process
    #so dump the index for a dummy one now
    dissolved_shapefile = dissolved_shapefile.reset_index(drop=True)

    return dissolved_shapefile

In [11]:
shapef_dissolve_test=dissolve_by_district(shapef_compactness)

In [12]:
color_dict = { 1 : '#3995ff',
               2 : '#ff8539',
               3 : '#ffe839',
               4 : '#d139ff',
               }


def distmap_by_county(map_data,data_label):
    plot_distmap = (
        ggplot(map_data)
    + geom_map(aes(fill='DISTRICT')
        ,show_legend=True
        ) 
    + geom_label(aes(x='xcentr_lon', y='ycentr_lat', label=data_label,size=2)
        , show_legend=False)
    + theme_minimal()
    + theme(axis_text_x=element_blank(),
            axis_text_y=element_blank(),
            axis_title_x=element_blank(),
            axis_title_y=element_blank(),
            axis_ticks=element_blank(),
            panel_grid_major=element_blank(),
            panel_grid_minor=element_blank(),
            plot_background = element_rect(fill = 'white')       #whole png area
            )
    + scale_fill_manual(values=color_dict)        #uncomment this line to make district categorical!
    )
    return plot_distmap

In [15]:
# distmap_by_county(shapef_dissolve_test, 'DISTRICT')
#failed attempt at mapping dissolved districts

In [14]:
def compactness_func(shapef_ia_redist):
    shapef_ia_compactness = shapef_ia_redist.copy()
    shapef_ia_compactness['area'] = shapef_ia_compactness['geometry'].area
    shapef_ia_compactness['perimeter'] = shapef_ia_compactness['geometry'].length
    #Polsby-Popper Score
    shapef_ia_compactness['PolsbyPopper']=4*math.pi*shapef_ia_compactness['area']/(shapef_ia_compactness['perimeter']**2)
    #radius of minimum bounding circle
    shapef_ia_compactness['min_bounding_radius']=shapef_ia_compactness['geometry'].minimum_bounding_radius()
    #Reock Score
    shapef_ia_compactness['Reock']=shapef_ia_compactness['area']/(math.pi*(shapef_ia_compactness['min_bounding_radius']**2))
    
    return shapef_ia_compactness

In [16]:
compactness_func(shapef_dissolve_test)

Unnamed: 0,geometry,population,xcentr_lon,ycentr_lat,DISTRICT,area,perimeter,PolsbyPopper,min_bounding_radius,Reock
0,"POLYGON ((597046.306 4495965.242, 595731.779 4...",797584,655223.617473,4626175.0,1,28472890000.0,1120709.0,0.284876,160047.768111,0.35382
1,"POLYGON ((558251.482 4634182.102, 558251.206 4...",797589,631594.46907,4793646.0,2,33612240000.0,1004718.0,0.418426,141576.968905,0.53378
2,"POLYGON ((413996.789 4491953.713, 413922.278 4...",797551,376909.476792,4576509.0,3,27823730000.0,997141.5,0.351651,144824.951242,0.422259
3,"POLYGON ((299084.952 4528678.815, 299079.684 4...",797645,341387.505752,4616499.0,4,55789230000.0,1595846.0,0.275282,191597.504183,0.48375
