In [1]:
import networkx as nx
from gerrychain import Graph
import math
import time
import gurobipy as gp
from gurobipy import GRB

from util import update_attributes, get_k_L_U
from cluster import is_within_window, find_cluster
from separation import labeling_contiguity_callback
filepath = 'C:\\districting-data-2020\\'

In [2]:
# finds a nontrivial county clustering (S,V\S) of the vertices 
#   (if one exists) under 1-person population deviation
#
def find_one_person_deviation_clustering(G, k):
    
    # set L and U
    total_population = sum( G.nodes[i]['TOTPOP'] for i in G.nodes )
    L = math.floor( total_population / k )
    U = math.ceil( total_population / k )
    
    # first, check if some county plus a subset of its neighborhood fits the bill
    for v in G.nodes:
        neighborhood = list( G.neighbors(v) ) + [v]
        (cluster, size) = find_cluster(G.subgraph( neighborhood ), [v], L, U, k, verify=False)
        if cluster is not None:
            complement = [ i for i in G.nodes if i not in cluster ]
            if len(complement) > 0 and nx.is_connected( G.subgraph(complement) ):
                print("Found a neighborhood solution around",G.nodes[v]['NAME20'],"County.")
                return ( [cluster, complement], [size, k-size] )
            
    # second, check if some edge plus a subset of its neighborhood fits the bill
    for u,v in G.edges:
        neighborhood = set( list(G.neighbors(u)) + list(G.neighbors(v)) )                       
        (cluster, size) = find_cluster(G.subgraph( neighborhood ), [u,v], L, U, k, verify=False)
        if cluster is not None:
            complement = [ i for i in G.nodes if i not in cluster ]
            if len(complement) > 0 and nx.is_connected( G.subgraph(complement) ):
                print("Found a neighborhood solution around",G.nodes[u]['NAME20'],"and",G.nodes[v]['NAME20'],"Counties.")
                return ( [cluster, complement], [size, k-size] )

    # build model
    m = gp.Model()
    
    # x[i,j]=1 if vertex i is assigned to cluster j
    x = m.addVars(G.nodes, 2, vtype=GRB.BINARY)
    m.addConstrs( x[i,0] + x[i,1] == 1 for i in G.nodes )
    
    # y[j] is size of cluster j
    y = m.addVars(2, vtype=GRB.INTEGER)
    m.addConstr( y[0] + y[1] == k )
    y[1].LB = 1
    
    # add population balance constraints
    for j in range(2):
        m.addConstr( gp.quicksum( G.nodes[i]['TOTPOP'] * x[i,j] for i in G.nodes ) >= L * y[j]  )
        m.addConstr( gp.quicksum( G.nodes[i]['TOTPOP'] * x[i,j] for i in G.nodes ) <= U * y[j]  )
        
    # symmetry breaking: fix a particular vertex to be in S, say one with extreme population.
    mpop = min( G.nodes[i]['TOTPOP'] for i in G.nodes )
    mpv = [ i for i in G.nodes if G.nodes[i]['TOTPOP']==mpop ][0]
    x[mpv,0].LB = 1
    
    # add flow-based contiguity constraints for S (which is rooted at mpv)
    M = G.number_of_nodes() - 1
    DG = nx.DiGraph(G)
    f = m.addVars(DG.edges)
    m.addConstrs( gp.quicksum( f[j,i] - f[i,j] for j in G.neighbors(i) ) == x[i,0] for i in G.nodes if i != mpv )
    m.addConstrs( gp.quicksum( f[j,i] for j in G.neighbors(i) ) <= M * x[i,0] for i in G.nodes if i != mpv )
    m.addConstr( gp.quicksum( f[j,mpv] for j in G.neighbors(mpv) ) == 0 )
    
    # add cut-based contiguity constraints for V\S (whose root we do not know a priori)
    m.Params.LazyConstraints = 1
    m._x = x
    m._DG = DG
    m._parts = 2
    
    # add compactness objective: a transportation objective for cluster S, with root at mpv.
    dist = nx.shortest_path_length(G, source=mpv)
    m.setObjective( gp.quicksum( dist[i] * dist[i] * round(G.nodes[i]['TOTPOP']/1000) * x[i,0] for i in G.nodes ), GRB.MINIMIZE )
    
    # solve
    m.Params.IntFeasTol = 1e-7
    m.Params.FeasibilityTol = 1e-7
    m.Params.MIPGap = 1.00  # use a large MIP gap, as we are only interested in feasibility
    
    # try each possible size for cluster S
    for size in range(1,k):
        y[0].LB = size
        y[0].UB = size
        m._callback = labeling_contiguity_callback 
        m.optimize( m._callback )
        
        if m.solCount > 0:
            cluster = [ i for i in G.nodes if x[i,0].x > 0.5 ]
            size = round( y[0].x )
            complement = [ i for i in G.nodes if x[i,1].x > 0.5 ]
            return ( [cluster, complement], [ size, k-size ] )
    
    return ( [list(G.nodes)], [k] )

In [3]:
from number_of_districts import congressional_districts_2020
states = sorted([ state for state in congressional_districts_2020.keys() ])

results = dict()

for district_type in ['SS', 'SH']:
    
    print("**********************************")
    print("**********************************")
    print("District_type:",district_type)
    print("**********************************")
    print("**********************************")
    print("")
    
    for state in ["WY"]:   
        
        print("**********************************")
        print("State:",state)
        print("**********************************")

        filename = state + '_county.json'
        GC = Graph.from_json( filepath + filename )
        update_attributes(GC, state)    

        (k, L, U) = get_k_L_U(GC, state, district_type)
        if k <= 1 or not nx.is_connected(GC):
            print("Skipping this state because k <= 1 or because G is disconnected.")
            continue

        (clusters, sizes) = find_one_person_deviation_clustering(GC, k)
        results[state, district_type] = dict()
        results[state, district_type]['clusters'] = clusters
        results[state, district_type]['sizes'] = sizes

**********************************
**********************************
District_type: SS
**********************************
**********************************

**********************************
State: WY
**********************************
Starting WY with k = 31 and deviation = 0.1
Thus, we have L = 17678 and U = 19538
Set parameter Username
Academic license - for non-commercial use only - expires 2024-01-27
Found a neighborhood solution around Natrona and Fremont Counties.
**********************************
**********************************
District_type: SH
**********************************
**********************************

**********************************
State: WY
**********************************
Starting WY with k = 62 and deviation = 0.1
Thus, we have L = 8839 and U = 9769
Found a neighborhood solution around Weston County.


In [4]:
print("state type cluster?")
for (state, district_type) in results.keys():
    sizes = results[state,district_type]['sizes']
    print(state, district_type, len(sizes)>1 )

state type cluster?
WY SS True
WY SH True


In [5]:
print("results =",results)

results = {('WY', 'SS'): {'clusters': [[0, 6, 10, 13, 16, 17, 18, 19], [1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 20, 21, 22]], 'sizes': [11, 20]}, ('WY', 'SH'): {'clusters': [[3, 8], [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]], 'sizes': [1, 61]}}


In [6]:
# count_CD = sum( 1 for (state, district_type) in results.keys() if district_type=='CD' )
# count_CD_true = sum( 1 for (state, district_type) in results.keys() if district_type=='CD' and len(results[state,district_type]['sizes'])>1 )
# print("CD: ",count_CD_true,'/', count_CD)

count_SS = sum( 1 for (state, district_type) in results.keys() if district_type=='SS' )
count_SS_true = sum( 1 for (state, district_type) in results.keys() if district_type=='SS' and  len(results[state,district_type]['sizes'])>1 )
print("SS: ",count_SS_true,'/', count_SS)

count_SH = sum( 1 for (state, district_type) in results.keys() if district_type=='SH' )
count_SH_true = sum( 1 for (state, district_type) in results.keys() if district_type=='SH' and  len(results[state,district_type]['sizes'])>1 )
print("SH: ",count_SH_true,'/', count_SH)

SS:  1 / 1
SH:  1 / 1


In [7]:
# make table
for state in ["WY"]:
    if state == 'HI':
        continue
    
    # state
    print(state, end='')
    
    # num counties
    clusters = results[state, 'SS']['clusters']
    num_counties = sum( len(cluster) for cluster in clusters )
    print(" &",num_counties, end='')
    
    for district_type in ['SS','SH']:
        
        if (state, district_type) in results.keys():
            status = '\\greencheck' if len(results[state,district_type]['sizes'])>1 else '\\redx'
        else:
            status = '$\\blacksquare$'
            
        print(" &", status, end='')
        
    print("\\\\")

WY & 23 & \greencheck & \greencheck\\
